本文整理汇总了C++中dA函数的典型用法代码示例。如果您正苦于以下问题:C++ dA函数的具体用法?C++ dA怎么用?C++ dA使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了dA函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: magma_zdtohpo
extern "C" magma_int_t
magma_zdtohpo(magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n,
magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magma_int_t NB,
magmaDoubleComplex *a, magma_int_t lda,
magmaDoubleComplex *dwork[], magma_int_t ldda,
magma_queue_t stream[][3], magma_int_t *info)
{
magma_int_t k;
if( lapackf77_lsame(uplo, "U") ) {
magma_int_t j, jj, jb, mj;
/* go through each column */
for (j=off_j+NB; j<n; j+=nb) {
jj = (j-off_j)/(nb*num_gpus);
k = ((j-off_j)/nb)%num_gpus;
jb = min(nb, (n-j));
if(j+jb < off_j+m)
mj = (j-off_i)+jb;
else
mj = m;
magma_setdevice(k);
magma_zgetmatrix_async( mj, jb,
dA(k, 0, jj*nb), ldda,
A(off_i, j), lda,
stream[k][0] );
magma_queue_sync( stream[k][0] );
}
} else {
magma_int_t i, ii, ib, ni;
/* go through each row */
for(i=off_i+NB; i<m; i+=nb) {
ii = (i-off_i)/(nb*num_gpus);
k = ((i-off_i)/nb)%num_gpus;
ib = min(nb, (m-i));
if(i+ib < off_i+n)
ni = (i-off_i)+ib;
else
ni = n;
magma_setdevice(k);
magma_zgetmatrix_async( ib, ni,
dA(k, ii*nb, 0), ldda,
A(i, off_j), lda,
stream[k][0] );
magma_queue_sync( stream[k][0] );
}
}
/*for( k=0; k<num_gpus; k++ ) {
magma_setdevice(k);
magma_queue_sync( stream[k][0] );
}*/
magma_setdevice(0);
return *info;
}
开发者ID:soulsheng,项目名称:magma,代码行数:59,代码来源:zpotrf3_mgpu.cpp
示例2: magma_zhtodhe
// ----------------------------------------------------------------------
// TODO info is unused
extern "C" magma_int_t
magma_zhtodhe(
magma_int_t ngpu,
magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
magmaDoubleComplex *A, magma_int_t lda,
magmaDoubleComplex_ptr dA[], magma_int_t ldda,
magma_queue_t queues[][10],
magma_int_t *info)
{
magma_device_t orig_dev;
magma_getdevice( &orig_dev );
magma_int_t k;
if (uplo == MagmaLower) {
/* go through each block-column */
magma_int_t j, jj, jb, mj;
for (j=0; j < n; j += nb) {
jj = j/(nb*ngpu);
k = (j/nb)%ngpu;
jb = min(nb, (n-j));
mj = n-j;
magma_setdevice( k );
magma_zsetmatrix_async( mj, jb,
A(j,j), lda,
dA(k, j, jj*nb), ldda,
queues[k][0] );
}
}
else {
/* go through each block-column */
magma_int_t j, jj, jb, mj;
for (j=0; j < n; j += nb) {
jj = j/(nb*ngpu);
k = (j/nb)%ngpu;
jb = min(nb, (n-j));
mj = j+jb;
magma_setdevice( k );
magma_zsetmatrix_async( mj, jb,
A(0, j), lda,
dA(k, 0, jj*nb), ldda,
queues[k][0] );
}
}
for( k=0; k < ngpu; k++ ) {
magma_setdevice( k );
magma_queue_sync( queues[k][0] );
}
magma_setdevice( orig_dev );
return *info;
}
开发者ID:cjy7117,项目名称:FT-MAGMA,代码行数:57,代码来源:zhetrd_mgpu.cpp
示例3: magma_shtodhe
extern "C" magma_int_t
magma_shtodhe(magma_int_t num_gpus, magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
float *A, magma_int_t lda,
float **dA, magma_int_t ldda,
magma_queue_t stream[][10], magma_int_t *info)
{
magma_device_t orig_dev;
magma_getdevice( &orig_dev );
magma_int_t k;
if (uplo == MagmaLower) {
/* go through each block-column */
magma_int_t j, jj, jb, mj;
for (j=0; j < n; j += nb) {
jj = j/(nb*num_gpus);
k = (j/nb)%num_gpus;
jb = min(nb, (n-j));
mj = n-j;
magma_setdevice(k);
magma_ssetmatrix_async( mj, jb,
A(j,j), lda,
dA(k, j, jj*nb), ldda,
stream[k][0] );
}
}
else {
/* go through each block-column */
magma_int_t j, jj, jb, mj;
for (j=0; j < n; j += nb) {
jj = j/(nb*num_gpus);
k = (j/nb)%num_gpus;
jb = min(nb, (n-j));
mj = j+jb;
magma_setdevice(k);
magma_ssetmatrix_async( mj, jb,
A(0, j), lda,
dA(k, 0, jj*nb), ldda,
stream[k][0] );
}
}
for( k=0; k < num_gpus; k++ ) {
magma_setdevice(k);
magma_queue_sync(stream[k][0]);
}
magma_setdevice( orig_dev );
return *info;
}
开发者ID:cjy7117,项目名称:DVFS-MAGMA,代码行数:52,代码来源:ssytrd_mgpu.cpp
示例4: ParFESpace
Operator &ParNonlinearForm::GetGradient(const Vector &x) const
{
ParFiniteElementSpace *pfes = ParFESpace();
pGrad.Clear();
NonlinearForm::GetGradient(x); // (re)assemble Grad, no b.c.
OperatorHandle dA(pGrad.Type()), Ph(pGrad.Type());
if (fnfi.Size() == 0)
{
dA.MakeSquareBlockDiag(pfes->GetComm(), pfes->GlobalVSize(),
pfes->GetDofOffsets(), Grad);
}
else
{
MFEM_ABORT("TODO: assemble contributions from shared face terms");
}
// TODO - construct Dof_TrueDof_Matrix directly in the pGrad format
Ph.ConvertFrom(pfes->Dof_TrueDof_Matrix());
pGrad.MakePtAP(dA, Ph);
// Impose b.c. on pGrad
OperatorHandle pGrad_e;
pGrad_e.EliminateRowsCols(pGrad, ess_tdof_list);
return *pGrad.Ptr();
}
开发者ID:LLNL,项目名称:mfem,代码行数:30,代码来源:pnonlinearform.cpp
示例5: cmaxent_fortran
void cmaxent_fortran( double* xqmc, double* xtau, int32_t len, double xmom1, double ( *xker ) ( const double&, double&, double& ),
double ( *backtrans ) ( double&, double&, double& ), double beta, double* alpha_tot, int32_t n_alpha, int32_t ngamma, double omega_start, double omega_end,
int32_t omega_points, int32_t nsweeps, int32_t nbins, int32_t nwarmup,/* double* u,*/ double* sigma)
{
std::string fr("Aom"); std::string dA("dump_Aom"); std::string ml("max_stoch_log"); std::string energies("energies"); std::string bf("best_fit"); std::string dump("dump");
cmaxent(xqmc, xtau, len, xmom1, xker, backtrans, beta, alpha_tot, n_alpha, ngamma, omega_start, omega_end, omega_points, nsweeps, nbins, nwarmup,
fr, dA, ml, energies, bf, dump, /*u*/NULL, sigma);
}
开发者ID:CaptainSifff,项目名称:cmaxent,代码行数:8,代码来源:cmaxent_fortran_interface.cpp
示例6: testDeviceVector
void testDeviceVector()
{
const int aSize = 64;
std::vector<int> hA(aSize), hB(aSize);
bolt::cl::device_vector<int> dA(aSize), dB(aSize);
for(int i=0; i<aSize; i++) {
hA[i] = hB[i] = dB[i] = dA[i] = i;
};
int hSum = std::inner_product(hA.begin(), hA.end(), hB.begin(), 1);
int sum = bolt::cl::inner_product( dA.begin(), dA.end(),
dB.begin(), 1, bolt::cl::plus<int>(), bolt::cl::multiplies<int>() );
};
开发者ID:mdlh,项目名称:Bolt,代码行数:15,代码来源:InnerProductDeviceVector.cpp
示例7: testDeviceVector
void testDeviceVector()
{
const int aSize = 1000;
std::vector<int> hA(aSize);
bolt::cl::device_vector<int> dA(aSize);
for(int i=0; i<aSize; i++) {
hA[i] = i;
dA[i] = i;
};
std::vector<int>::iterator smaxdex = std::max_element(hA.begin(), hA.end());
bolt::cl::device_vector<int>::iterator bmaxdex = bolt::cl::max_element(dA.begin(), dA.end(),bolt::cl::greater<int>());
};
开发者ID:K-Vamshi-Krishna,项目名称:Bolt,代码行数:15,代码来源:MaxElementDeviceVector.cpp
示例8: R
void GLTorus::draw()
{
GLfloat R(m_majorRadius);
GLfloat r(m_minorRadius);
GLfloat dA(m_angle/m_majorSegments);
GLfloat da(TwoPi/m_minorSegments);
GLfloat cosTheta[2], sinTheta[2], cosPhi, sinPhi;
Vec v, n;
for (int i = 0; i < m_majorSegments; ++i) {
cosTheta[0] = cos( i *dA);
cosTheta[1] = cos((i+1)*dA);
sinTheta[0] = sin( i *dA);
sinTheta[1] = sin((i+1)*dA);
glBegin(GL_QUAD_STRIP);
for (int j = 0; j <= m_minorSegments; ++j) {
cosPhi = cos(j*da);
sinPhi = sin(j*da);
for (int k = 0; k <= 1; ++k) {
v.x = (R+r*cosPhi) * cosTheta[k];
v.y = (R+r*cosPhi) * sinTheta[k];
v.z = r*sinPhi;
n.x = R*cosTheta[k];
n.y = R*sinTheta[k];
n.z = 0;
n = (v-n).unit();
glNormal3f(n.x, n.y, n.z);
glVertex3f(v.x, v.y, v.z);
}
}
glEnd();
}
}
开发者ID:Tyf0n,项目名称:IQmol,代码行数:37,代码来源:GLShape.C
示例9: main
int main( int argc, char** argv )
{
TESTING_INIT();
real_Double_t gflops, t1, t2;
double c_neg_one = MAGMA_D_NEG_ONE;
magma_int_t ione = 1;
const char trans[] = { 'N', 'C', 'T' };
const char uplo[] = { 'L', 'U' };
const char diag[] = { 'U', 'N' };
const char side[] = { 'L', 'R' };
double *A, *B, *C, *C2, *LU;
double *dA, *dB, *dC1, *dC2;
double alpha = MAGMA_D_MAKE( 0.5, 0.1 );
double beta = MAGMA_D_MAKE( 0.7, 0.2 );
double dalpha = 0.6;
double dbeta = 0.8;
double work[1], error, total_error;
magma_int_t ISEED[4] = {0,0,0,1};
magma_int_t m, n, k, size, maxn, ld, info;
magma_int_t *piv;
magma_err_t err;
magma_opts opts;
parse_opts( argc, argv, &opts );
printf( "Compares magma wrapper function to cublas function; all diffs should be exactly 0.\n\n" );
total_error = 0.;
for( int i = 0; i < opts.ntest; ++i ) {
m = opts.msize[i];
n = opts.nsize[i];
k = opts.ksize[i];
printf("=========================================================================\n");
printf( "M %d, N %d, K %d\n", (int) m, (int) n, (int) k );
// allocate matrices
// over-allocate so they can be any combination of {m,n,k} x {m,n,k}.
maxn = max( max( m, n ), k );
ld = maxn;
size = maxn*maxn;
err = magma_malloc_cpu( (void**) &piv, maxn*sizeof(magma_int_t) ); assert( err == 0 );
err = magma_dmalloc_pinned( &A, size ); assert( err == 0 );
err = magma_dmalloc_pinned( &B, size ); assert( err == 0 );
err = magma_dmalloc_pinned( &C, size ); assert( err == 0 );
err = magma_dmalloc_pinned( &C2, size ); assert( err == 0 );
err = magma_dmalloc_pinned( &LU, size ); assert( err == 0 );
err = magma_dmalloc( &dA, size ); assert( err == 0 );
err = magma_dmalloc( &dB, size ); assert( err == 0 );
err = magma_dmalloc( &dC1, size ); assert( err == 0 );
err = magma_dmalloc( &dC2, size ); assert( err == 0 );
// initialize matrices
size = maxn*maxn;
lapackf77_dlarnv( &ione, ISEED, &size, A );
lapackf77_dlarnv( &ione, ISEED, &size, B );
lapackf77_dlarnv( &ione, ISEED, &size, C );
printf( "========== Level 1 BLAS ==========\n" );
// ----- test DSWAP
// swap 2nd and 3rd columns of dA, then copy to C2 and compare with A
assert( n >= 4 );
magma_dsetmatrix( m, n, A, ld, dA, ld );
magma_dsetmatrix( m, n, A, ld, dB, ld );
magma_dswap( m, dA(0,1), 1, dA(0,2), 1 );
magma_dswap( m, dB(0,1), 1, dB(0,2), 1 );
// check results, storing diff between magma and cuda calls in C2
cublasDaxpy( ld*n, c_neg_one, dA, 1, dB, 1 );
magma_dgetmatrix( m, n, dB, ld, C2, ld );
error = lapackf77_dlange( "F", &m, &k, C2, &ld, work );
total_error += error;
printf( "dswap diff %.2g\n", error );
// ----- test IDAMAX
// get argmax of column of A
magma_dsetmatrix( m, k, A, ld, dA, ld );
error = 0;
for( int j = 0; j < k; ++j ) {
magma_int_t i1 = magma_idamax( m, dA(0,j), 1 );
magma_int_t i2 = cublasIdamax( m, dA(0,j), 1 );
assert( i1 == i2 );
error += abs( i1 - i2 );
}
total_error += error;
gflops = (double)m * k / 1e9;
printf( "idamax diff %.2g\n", error );
printf( "\n" );
printf( "========== Level 2 BLAS ==========\n" );
// ----- test DGEMV
// c = alpha*A*b + beta*c, with A m*n; b,c m or n-vectors
// try no-trans/trans
for( int ia = 0; ia < 3; ++ia ) {
magma_dsetmatrix( m, n, A, ld, dA, ld );
magma_dsetvector( maxn, B, 1, dB, 1 );
magma_dsetvector( maxn, C, 1, dC1, 1 );
//.........这里部分代码省略.........
开发者ID:soulsheng,项目名称:magma,代码行数:101,代码来源:testing_dblas.cpp
示例10: dimension
//.........这里部分代码省略.........
H(i) = I - tau * v * v'
where tau is a complex scalar, and v is a complex vector with
v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
and tau in TAU(i).
The elements of the vectors v together form the n-by-nb matrix V
which is needed, with W, to apply the transformation to the unreduced
part of the matrix, using a Hermitian rank-2k update of the form:
A := A - V*W' - W*V'.
The contents of A on exit are illustrated by the following examples
with n = 5 and nb = 2:
if UPLO = MagmaUpper: if UPLO = MagmaLower:
( a a a v4 v5 ) ( d )
( a a v4 v5 ) ( 1 d )
( a 1 v5 ) ( v1 1 a )
( d 1 ) ( v1 v2 a a )
( d ) ( v1 v2 a a a )
where d denotes a diagonal element of the reduced matrix, a denotes
an element of the original matrix that is unchanged, and vi denotes
an element of the vector defining H(i).
@ingroup magma_cheev_aux
********************************************************************/
extern "C" magma_int_t
magma_clatrd2(magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
magmaFloatComplex *A, magma_int_t lda,
float *e, magmaFloatComplex *tau,
magmaFloatComplex *W, magma_int_t ldw,
magmaFloatComplex *dA, magma_int_t ldda,
magmaFloatComplex *dW, magma_int_t lddw,
magmaFloatComplex *dwork, magma_int_t ldwork)
{
#define A(i, j) (A + (j)*lda + (i))
#define W(i, j) (W + (j)*ldw + (i))
#define dA(i, j) (dA + (j)*ldda + (i))
#define dW(i, j) (dW + (j)*lddw + (i))
magma_int_t i;
magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
magmaFloatComplex c_one = MAGMA_C_ONE;
magmaFloatComplex c_zero = MAGMA_C_ZERO;
magmaFloatComplex value = MAGMA_C_ZERO;
magma_int_t ione = 1;
magma_int_t i_n, i_1, iw;
magmaFloatComplex alpha;
magmaFloatComplex *f;
if (n <= 0) {
return 0;
}
magma_queue_t stream;
magma_queue_create( &stream );
magma_cmalloc_cpu( &f, n );
assert( f != NULL ); // TODO return error, or allocate outside clatrd
开发者ID:EmergentOrder,项目名称:magma,代码行数:67,代码来源:clatrd2.cpp
示例11: magmablas_ssymm_mgpu_com
void magmablas_ssymm_mgpu_com(
magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n,
float alpha,
float *dA[], magma_int_t ldda, magma_int_t offset,
float *dB[], magma_int_t lddb,
float beta, float *dC[], magma_int_t lddc,
float *dwork[], magma_int_t dworksiz,
float *C, magma_int_t ldc,
float *work[], magma_int_t worksiz,
magma_int_t ngpu, magma_int_t nb,
magma_queue_t streams[][20], magma_int_t nstream,
magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents,
magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx )
{
#define dA(dev, i, j) (dA[dev] + (i) + (j)*ldda)
#define dB(dev, i, j) (dB[dev] + (i) + (j)*lddb)
#define dC(dev, i, j) (dC[dev] + (i) + (j)*lddc)
#define dwork(dev, i, j) (dwork[dev] + (i) + (j)*lddwork)
#define C(i, j) (C + (i) + (j)*ldc)
//printf("####################################################\n");
//printf(" start ssymm \n");
//printf("####################################################\n");
if ( side != MagmaLeft || uplo != MagmaLower ) {
fprintf( stderr, "%s: only Left Lower implemented\n", __func__ );
}
assert( ldda >= m );
assert( lddb >= m );
assert( lddc >= m );
assert( nstream >= ngpu );
assert( nbevents >= ngpu*ngpu );
float c_one = MAGMA_S_ONE;
float *dwork1[MagmaMaxGPUs];
float *dwork2[MagmaMaxGPUs];
magma_int_t maxgsize = n*m;
magma_int_t lddwork = lddc;
magma_int_t ldwork = m;
for( magma_int_t dev = 0; dev < ngpu; ++dev ) {
dwork1[dev] = dwork[dev]; // size of dwork1 is n*lddwork
dwork2[dev] = dwork[dev]+n*lddwork; // size of dwork2 is maxgsize*ngpu
}
assert( dworksiz >= (n*lddwork+maxgsize*ngpu) );
assert( worksiz >= (n*ldwork) );
magma_device_t cdev;
magma_getdevice( &cdev );
magma_queue_t cstream;
magmablasGetKernelStream(&cstream);
magma_int_t dev, devperm, myblk, mycolsize, myblkoffst;
magma_int_t gmaster;
magma_int_t masterdev, lcdev, lccolsize, myngpu;
magma_int_t stdev = (offset/nb)%ngpu;
magma_int_t blockoffset = offset % nb;
magma_int_t fstblksiz = 0;
if(blockoffset>0){
fstblksiz = min(m, (nb - blockoffset));
}
//magma_int_t nbblk = magma_ceildiv(m, nb);
magma_int_t nbblk = magma_ceildiv((m+blockoffset), nb);
magma_int_t remm = m- fstblksiz;
magma_int_t nbblkoffst = offset/nb;
magma_int_t nblstblks = -1;
magma_int_t devlstblk = -1;
magma_int_t lstblksiz = remm%nb;
if(lstblksiz>0){
nblstblks = nbblk%ngpu;
devlstblk = (nblstblks-1+ngpu)%ngpu;
}
magma_int_t nbcmplxactive = 0;
magma_int_t cmplxisactive[MagmaMaxGPUs];
magma_int_t gpuisactive[MagmaMaxGPUs];
memset(gpuisactive, 0, MagmaMaxGPUs*sizeof(magma_int_t));
memset(cmplxisactive, 0, MagmaMaxGPUs*sizeof(magma_int_t));
for( magma_int_t dev = 0; dev < ngpu; ++dev ) {
magma_setdevice( dev );
magmablasSetKernelStream( streams[ dev ][ 0 ] );
cudaMemset(dwork(dev,0,0), 0, (lddwork)*(n)*sizeof(float) );
// put all dC on all dev to 0 except the one which
// hold i==0 because this one has to multiply by beta.
if(dev!=stdev){
cudaMemset(dC(dev,0,0), 0, (lddc)*(n)*sizeof(float) );
}
}
magma_int_t newoffset = offset;
//.........这里部分代码省略.........
开发者ID:XapaJIaMnu,项目名称:magma,代码行数:101,代码来源:ssymm_mgpu.cpp
示例12: H
/**
Purpose
-------
SORGQR generates an M-by-N REAL matrix Q with orthonormal columns,
which is defined as the first N columns of a product of K elementary
reflectors of order M
Q = H(1) H(2) . . . H(k)
as returned by SGEQRF_GPU.
Arguments
---------
@param[in]
m INTEGER
The number of rows of the matrix Q. M >= 0.
@param[in]
n INTEGER
The number of columns of the matrix Q. M >= N >= 0.
@param[in]
k INTEGER
The number of elementary reflectors whose product defines the
matrix Q. N >= K >= 0.
@param[in,out]
dA REAL array A on the GPU device,
dimension (LDDA,N). On entry, the i-th column must contain
the vector which defines the elementary reflector H(i), for
i = 1,2,...,k, as returned by SGEQRF_GPU in the first k
columns of its array argument A.
On exit, the M-by-N matrix Q.
@param[in]
ldda INTEGER
The first dimension of the array A. LDDA >= max(1,M).
@param[in]
tau REAL array, dimension (K)
TAU(i) must contain the scalar factor of the elementary
reflector H(i), as returned by SGEQRF_GPU.
@param[in]
dT REAL work space array on the GPU device,
dimension (MIN(M, N) )*NB.
This must be the 6th argument of magma_sgeqrf_gpu
[ note that if N here is bigger than N in magma_sgeqrf_gpu,
the workspace requirement DT in magma_sgeqrf_gpu must be
as specified in this routine ].
@param[in]
nb INTEGER
This is the block size used in SGEQRF_GPU, and correspondingly
the size of the T matrices, used in the factorization, and
stored in DT.
@param[out]
info INTEGER
- = 0: successful exit
- < 0: if INFO = -i, the i-th argument has an illegal value
@ingroup magma_ssyev_2stage
********************************************************************/
extern "C" magma_int_t
magma_sorgqr_2stage_gpu(magma_int_t m, magma_int_t n, magma_int_t k,
float *dA, magma_int_t ldda,
float *tau, float *dT,
magma_int_t nb, magma_int_t *info)
{
#define dA(a_1,a_2) (dA + (a_2)*(ldda) + (a_1))
#define dT(a_1) (dT + (a_1)*nb)
float c_zero = MAGMA_S_ZERO;
float c_one = MAGMA_S_ONE;
magma_int_t i__1, i__2, i__3;
//magma_int_t lwork;
magma_int_t i, ib, ki, kk; //, iinfo;
//magma_int_t lddwork = min(m, n);
//float *work, *panel;
float *dwork;
//magma_queue_t stream[2];
magma_int_t ldt=nb; // need to be an input parameter
*info = 0;
if (m < 0) {
*info = -1;
} else if ((n < 0) || (n > m)) {
*info = -2;
} else if ((k < 0) || (k > n)) {
*info = -3;
} else if (ldda < max(1,m)) {
*info = -5;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
//.........这里部分代码省略.........
开发者ID:XapaJIaMnu,项目名称:magma,代码行数:101,代码来源:sorgqr_2stage_gpu.cpp
示例13: elements
/**
Purpose
-------
SGETRF computes an LU factorization of a general M-by-N matrix A
using partial pivoting with row interchanges. This version does not
require work space on the GPU passed as input. GPU memory is allocated
in the routine.
The factorization has the form
A = P * L * U
where P is a permutation matrix, L is lower triangular with unit
diagonal elements (lower trapezoidal if m > n), and U is upper
triangular (upper trapezoidal if m < n).
This is the right-looking Level 3 BLAS version of the algorithm.
It uses 2 queues to overlap communication and computation.
Arguments
---------
@param[in]
m INTEGER
The number of rows of the matrix A. M >= 0.
@param[in]
n INTEGER
The number of columns of the matrix A. N >= 0.
@param[in,out]
A REAL array, dimension (LDA,N)
On entry, the M-by-N matrix to be factored.
On exit, the factors L and U from the factorization
A = P*L*U; the unit diagonal elements of L are not stored.
\n
Higher performance is achieved if A is in pinned memory, e.g.
allocated using magma_malloc_pinned.
@param[in]
lda INTEGER
The leading dimension of the array A. LDA >= max(1,M).
@param[out]
ipiv INTEGER array, dimension (min(M,N))
The pivot indices; for 1 <= i <= min(M,N), row i of the
matrix was interchanged with row IPIV(i).
@param[out]
info INTEGER
- = 0: successful exit
- < 0: if INFO = -i, the i-th argument had an illegal value
or another error occured, such as memory allocation failed.
- > 0: if INFO = i, U(i,i) is exactly zero. The factorization
has been completed, but the factor U is exactly
singular, and division by zero will occur if it is used
to solve a system of equations.
@ingroup magma_sgesv_comp
********************************************************************/
extern "C" magma_int_t
magma_sgetrf(
magma_int_t m, magma_int_t n,
float *A, magma_int_t lda,
magma_int_t *ipiv,
magma_int_t *info)
{
#ifdef HAVE_clBLAS
#define dA(i_, j_) dA, ((i_)*nb + (j_)*nb*ldda + dA_offset)
#define dAT(i_, j_) dAT, ((i_)*nb*lddat + (j_)*nb + dAT_offset)
#define dwork(i_) dwork, (i_)
#else
#define dA(i_, j_) ( dA + (i_)*nb + (j_)*nb*ldda)
#define dAT(i_, j_) ( dAT + (i_)*nb*lddat + (j_)*nb)
#define dwork(i_) (dwork + (i_))
#endif
// Constants
const float c_one = MAGMA_S_ONE;
const float c_neg_one = MAGMA_S_NEG_ONE;
// Local variables
float *work;
magmaFloat_ptr dA, dAT, dwork;
magma_int_t iinfo, nb;
/* Check arguments */
*info = 0;
if (m < 0)
*info = -1;
else if (n < 0)
*info = -2;
else if (lda < max(1,m))
*info = -4;
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return if possible */
if (m == 0 || n == 0)
//.........这里部分代码省略.........
开发者ID:xulunfan,项目名称:magma,代码行数:101,代码来源:sgetrf.cpp
示例14: BlockOperator
BlockOperator & ParBlockNonlinearForm::GetGradient(const Vector &x) const
{
if (pBlockGrad == NULL)
{
pBlockGrad = new BlockOperator(block_trueOffsets);
}
Array<const ParFiniteElementSpace *> pfes(fes.Size());
for (int s1=0; s1<fes.Size(); ++s1)
{
pfes[s1] = ParFESpace(s1);
for (int s2=0; s2<fes.Size(); ++s2)
{
phBlockGrad(s1,s2)->Clear();
}
}
GetLocalGradient(x); // gradients are stored in 'Grads'
if (fnfi.Size() > 0)
{
MFEM_ABORT("TODO: assemble contributions from shared face terms");
}
for (int s1=0; s1<fes.Size(); ++s1)
{
for (int s2=0; s2<fes.Size(); ++s2)
{
OperatorHandle dA(phBlockGrad(s1,s2)->Type()),
Ph(phBlockGrad(s1,s2)->Type()),
Rh(phBlockGrad(s1,s2)->Type());
if (s1 == s2)
{
dA.MakeSquareBlockDiag(pfes[s1]->GetComm(), pfes[s1]->GlobalVSize(),
pfes[s1]->GetDofOffsets(), Grads(s1,s1));
Ph.ConvertFrom(pfes[s1]->Dof_TrueDof_Matrix());
phBlockGrad(s1,s1)->MakePtAP(dA, Ph);
}
else
{
dA.MakeRectangularBlockDiag(pfes[s1]->GetComm(),
pfes[s1]->GlobalVSize(),
pfes[s2]->GlobalVSize(),
pfes[s1]->GetDofOffsets(),
pfes[s2]->GetDofOffsets(),
Grads(s1,s2));
Rh.ConvertFrom(pfes[s1]->Dof_TrueDof_Matrix());
Ph.ConvertFrom(pfes[s2]->Dof_TrueDof_Matrix());
phBlockGrad(s1,s2)->MakeRAP(Rh, dA, Ph);
}
pBlockGrad->SetBlock(s1, s2, phBlockGrad(s1,s2)->Ptr());
}
}
return *pBlockGrad;
}
开发者ID:LLNL,项目名称:mfem,代码行数:61,代码来源:pnonlinearform.cpp
示例15: dimension
//.........这里部分代码省略.........
int upper = (uplo == MagmaUpper);
lquery = (lwork == -1);
if (! upper && uplo != MagmaLower) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (lda < max(1,n)) {
*info = -4;
} else if (lwork < nb*n && ! lquery) {
*info = -9;
}
/* Determine the block size. */
ldw = n;
lddw = ldda;
lwkopt = n * nb;
if (*info == 0) {
work[0] = MAGMA_Z_MAKE( lwkopt, 0 );
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
else if (lquery)
return *info;
/* Quick return if possible */
if (n == 0) {
work[0] = c_one;
return *info;
}
magmaDoubleComplex *dA;
#ifdef FAST_HEMV
magma_int_t ldwork2 = ldda*ceildiv(n,64);
#else
magma_int_t ldwork2 = 0;
#endif
if (MAGMA_SUCCESS != magma_zmalloc( &dA, ldda*n + 2*lddw*nb + ldwork2 )) {
*info = MAGMA_ERR_DEVICE_ALLOC;
return *info;
}
magmaDoubleComplex *dwork = dA + ldda*n;
#ifdef FAST_HEMV
magmaDoubleComplex *dwork2 = dwork + 2*lddw*nb;
#endif
//if (n < 2048)
// nx = n;
//else
// nx = 512;
nx = min( 128, n ); // nx <= n is required
// clear out dwork in case it has NANs (used as y in zhemv)
// rest of dwork (used as work in magmablas_zhemv) doesn't need to be cleared
magmablas_zlaset( MagmaFull, n, nb, c_zero, c_zero, dwork, lddw );
if (upper) {
/* Copy the matrix to the GPU */
magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
/* Reduce the upper triangle of A.
Columns 1:kk are handled by the unblocked method. */
kk = n - (n - nx + nb - 1) / nb * nb;
开发者ID:cjy7117,项目名称:FT-MAGMA,代码行数:67,代码来源:zhetrd.cpp
示例16: magma_zhegst
//.........这里部分代码省略.........
/* Test the input parameters. */
*info = 0;
if (itype<1 || itype>3){
*info = -1;
}else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
*info = -2;
} else if (n < 0) {
*info = -3;
} else if (lda < max(1,n)) {
*info = -5;
}else if (ldb < max(1,n)) {
*info = -7;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
/* Quick return */
if ( n == 0 )
return *info;
if (MAGMA_SUCCESS != magma_zmalloc( &dw, 2*n*n )) {
*info = MAGMA_ERR_DEVICE_ALLOC;
return *info;
}
nb = magma_get_zhegst_nb(n);
magma_queue_t stream[2];
magma_queue_create( &stream[0] );
magma_queue_create( &stream[1] );
magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
magma_zsetmatrix( n, n, B(0, 0), ldb, dB(0, 0), lddb );
/* Use hybrid blocked code */
if (itype==1) {
if (upper) {
/* Compute inv(U')*A*inv(U) */
for(k = 0; k<n; k+=nb){
kb = min(n-k,nb);
kb2= min(n-k-nb,nb);
/* Update the upper triangle of A(k:n,k:n) */
lapackf77_zhegst( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
magma_zsetmatrix_async( kb, kb,
A(k, k), lda,
dA(k, k), ldda, stream[0] );
if(k+kb<n){
magma_ztrsm(MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaNonUnit,
kb, n-k-kb,
c_one, dB(k,k), lddb,
dA(k,k+kb), ldda);
magma_queue_sync( stream[0] );
magma_zhemm(MagmaLeft, MagmaUpper,
kb, n-k-kb,
c_neg_half, dA(k,k), ldda,
dB(k,k+kb), lddb,
开发者ID:soulsheng,项目名称:magma,代码行数:67,代码来源:zhegst.cpp
示例17: dimension
/**
Purpose
=======
SSYTRF_nopiv computes the LDLt factorization of a real symmetric
matrix A. This version does not require work space on the GPU passed
as input. GPU memory is allocated in the routine.
The factorization has the form
A = U^H * D * U, if UPLO = MagmaUpper, or
A = L * D * L^H, if UPLO = MagmaLower,
where U is an upper triangular matrix, L is lower triangular, and
D is a diagonal matrix.
This is the block version of the algorithm, calling Level 3 BLAS.
Arguments
---------
@param[in]
uplo magma_uplo_t
- = MagmaUpper: Upper triangle of A is stored;
- = MagmaLower: Lower triangle of A is stored.
@param[in]
n INTEGER
The order of the matrix A. N >= 0.
@param[in,out]
A REAL array, dimension (LDA,N)
On entry, the symmetric matrix A. If UPLO = MagmaUpper, the leading
N-by-N upper triangular part of A contains the upper
triangular part of the matrix A, and the strictly lower
triangular part of A is not referenced. If UPLO = MagmaLower, the
leading N-by-N lower triangular part of A contains the lower
triangular part of the matrix A, and the strictly upper
triangular part of A is not referenced.
\n
On exit, if INFO = 0, the factor U or L from the Cholesky
factorization A = U^H D U or A = L D L^H.
\n
Higher performance is achieved if A is in pinned memory.
@param[in]
lda INTEGER
The leading dimension of the array A. LDA >= max(1,N).
@param[out]
info INTEGER
- = 0: successful exit
- < 0: if INFO = -i, the i-th argument had an illegal value
if INFO = -6, the GPU memory allocation failed
- > 0: if INFO = i, the leading minor of order i is not
positive definite, and the factorization could not be
completed.
@ingroup magma_ssysv_comp
******************************************************************* */
extern "C" magma_int_t
magma_ssytrf_nopiv(
magma_uplo_t uplo, magma_int_t n,
float *A, magma_int_t lda,
magma_int_t *info)
{
#define A(i, j) ( A +(j)*lda + (i))
#define dA(i, j) (dA +(j)*ldda + (i))
#define dW(i, j) (dW +(j)*ldda + (i))
#define dWt(i, j) (dW +(j)*nb + (i))
/* Constants */
const float c_one = MAGMA_S_ONE;
const float c_neg_one = MAGMA_S_NEG_ONE;
/* Local variables */
bool upper = (uplo == MagmaUpper);
magma_int_t j, k, jb, ldda, nb, ib, iinfo;
magmaFloat_ptr dA;
magmaFloat_ptr dW;
*info = 0;
if (! upper && uplo != MagmaLower) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (lda < max(1,n)) {
*info = -4;
}
if (*info != 0) {
magma_xerbla( __func__, -(*info) );
return MAGMA_ERR_ILLEGAL_VALUE;
}
/* Quick return */
if ( n == 0 )
return MAGMA_SUCCESS;
ldda = magma_roundup( n, 32 );
nb = magma_get_ssytrf_nopiv_nb(n);
ib = min(32, nb); // inner-block for diagonal factorization
if ((MAGMA_SUCCESS != magma_smalloc(&dA, n *ldda)) ||
//.........这里部分代码省略.........
开发者ID:xulunfan,项目名称:magma,代码行数:101,代码来源:ssytrf_nopiv.cpp
示例18: dimension
//.........这里部分代码省略.........
The contents of A on exit are illustrated by the following examples:
@verbatim
m = 6 and n = 5 (m > n): m = 5 and n = 6 (m < n):
( d e u1 u1 u1 ) ( d u1 u1 u1 u1 u1 )
( v1 d e u2 u2 ) ( e d u2 u2 u2 u2 )
( v1 v2 d e u3 ) ( v1 e d u3 u3 u3 )
( v1 v2 v3 d e ) ( v1 v2 e d u4 u4 )
( v1 v2 v3 v4 d ) ( v1 v2 v3 e d u5 )
( v1 v2 v3 v4 v5 )
@endverbatim
where d and e denote diagonal and off-diagonal elements of B, vi
denotes an element of the vector defining H(i), and ui an element of
the vector defining G(i).
@ingroup magma_sgesvd_comp
********************************************************************/
extern "C" magma_int_t
magma_sgebrd(
magma_int_t m, magma_int_t n,
float *A, magma_int_t lda, float *d, float *e,
float *tauq, float *taup,
float *work, magma_int_t lwork,
magma_int_t *info)
{
#define A(i, j) (A + (j)*lda + (i))
#define dA(i, j) (dA + (j)*ldda + (i))
float c_neg_one = MAGMA_S_NEG_ONE;
float c_one = MAGMA_S_ONE;
float *dA, *dwork;
magma_int_t ncol, nrow, jmax, nb, ldda;
magma_int_t i, j, nx;
magma_int_t iinfo;
magma_int_t minmn;
magma_int_t ldwrkx, ldwrky, lwkopt;
magma_int_t lquery;
nb = magma_get_sgebrd_nb(n);
ldda = m;
lwkopt = (m + n) * nb;
work[0] = MAGMA_S_MAKE( lwkopt, 0. );
lquery = (lwork == -1);
/* Check arguments */
*info = 0;
if (m < 0) {
*info = -1;
} else if (n < 0) {
*info = -2;
} else if (lda < max(1,m)) {
*info = -4;
} else if (lwork < lwkopt && (! lquery) ) {
*info = -10;
}
if (*info < 0) {
magma_xerbla( __func__, -(*info) );
return *info;
}
开发者ID:cjy7117,项目名称:FT-MAGMA,代码行数:67,代码来源:sgebrd.cpp
示例19: magma_cgeqrf
extern "C" magma_err_t
magma_cgeqrf(magma_int_t m, magma_int_t n,
magmaFloatComplex *A, magma_int_t lda, magmaFloatComplex *tau,
magmaFloatComplex *work, magma_int_t lwork,
magma_int_t *info,
magma_queue_t* queue )
{
/* -- clMAGMA (version 1.1.0) --
Univ. of Tennessee, Knoxville
Univ. of California, Berkeley
Univ. of Colorado, Denver
@date January 2014
Purpose
=======
CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A:
A = Q * R. This version does not require work space on the GPU
passed as input. GPU memory is allocated in the routine.
If the current stream is NULL, this version replaces it with user defined
stream to overlap computation with communication.
Arguments
=========
M (input) INTEGER
The number of rows of the matrix A. M >= 0.
N (input) INTEGER
The number of columns of the matrix A. N >= 0.
A (input/output) COMPLEX array, dimension (LDA,N)
On entry, the M-by-N matrix A.
On exit, the elements on and above the diagonal of the array
contain th
|
请发表评论