C++ dA函数代码示例

OGeek|极客世界-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中dA函数的典型用法代码示例。如果您正苦于以下问题：C++ dA函数的具体用法？C++ dA怎么用？C++ dA使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了dA函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: magma_zdtohpo

extern "C" magma_int_t
magma_zdtohpo(magma_int_t num_gpus, char *uplo, magma_int_t m, magma_int_t n,
              magma_int_t off_i, magma_int_t off_j, magma_int_t nb, magma_int_t NB,
              magmaDoubleComplex *a,       magma_int_t lda,
              magmaDoubleComplex *dwork[], magma_int_t ldda,
              magma_queue_t stream[][3], magma_int_t *info)
{
    magma_int_t k;
    if( lapackf77_lsame(uplo, "U") ) {
        magma_int_t j, jj, jb, mj;

        /* go through each column */
        for (j=off_j+NB; j<n; j+=nb) {
            jj =  (j-off_j)/(nb*num_gpus);
            k  = ((j-off_j)/nb)%num_gpus;

            jb = min(nb, (n-j));
            if(j+jb < off_j+m)
                mj = (j-off_i)+jb;
            else
                mj = m;

            magma_setdevice(k);
            magma_zgetmatrix_async( mj, jb,
                                    dA(k, 0, jj*nb), ldda,
                                    A(off_i, j),     lda,
                                    stream[k][0] );
            magma_queue_sync( stream[k][0] );
        }
    } else {
        magma_int_t i, ii, ib, ni;

        /* go through each row */
        for(i=off_i+NB; i<m; i+=nb) {
            ii = (i-off_i)/(nb*num_gpus);
            k  = ((i-off_i)/nb)%num_gpus;

            ib = min(nb, (m-i));
            if(i+ib < off_i+n)
                ni = (i-off_i)+ib;
            else
                ni = n;

            magma_setdevice(k);
            magma_zgetmatrix_async( ib, ni,
                                    dA(k, ii*nb, 0), ldda,
                                    A(i, off_j),     lda,
                                    stream[k][0] );
            magma_queue_sync( stream[k][0] );
        }
    }
    /*for( k=0; k<num_gpus; k++ ) {
        magma_setdevice(k);
        magma_queue_sync( stream[k][0] );
    }*/
    magma_setdevice(0);

    return *info;
}

开发者ID:soulsheng，项目名称:magma，代码行数:59，代码来源:zpotrf3_mgpu.cpp

示例2: magma_zhtodhe

// ----------------------------------------------------------------------
// TODO info is unused
extern "C" magma_int_t
magma_zhtodhe(
    magma_int_t ngpu,
    magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
    magmaDoubleComplex     *A,   magma_int_t lda,
    magmaDoubleComplex_ptr dA[], magma_int_t ldda,
    magma_queue_t queues[][10],
    magma_int_t *info)
{
    magma_device_t orig_dev;
    magma_getdevice( &orig_dev );
    
    magma_int_t k;
    if (uplo == MagmaLower) {
        /* go through each block-column */
        magma_int_t j, jj, jb, mj;
        for (j=0; j < n; j += nb) {
            jj =  j/(nb*ngpu);
            k  = (j/nb)%ngpu;
            
            jb = min(nb, (n-j));
            mj = n-j;
            
            magma_setdevice( k );
            magma_zsetmatrix_async( mj, jb,
                                     A(j,j),         lda,
                                    dA(k, j, jj*nb), ldda,
                                    queues[k][0] );
        }
    }
    else {
        /* go through each block-column */
        magma_int_t j, jj, jb, mj;
        for (j=0; j < n; j += nb) {
            jj =  j/(nb*ngpu);
            k  = (j/nb)%ngpu;
            
            jb = min(nb, (n-j));
            mj = j+jb;
            
            magma_setdevice( k );
            magma_zsetmatrix_async( mj, jb,
                                     A(0, j),        lda,
                                    dA(k, 0, jj*nb), ldda,
                                    queues[k][0] );
        }
    }
    for( k=0; k < ngpu; k++ ) {
        magma_setdevice( k );
        magma_queue_sync( queues[k][0] );
    }
    magma_setdevice( orig_dev );
    
    return *info;
}

开发者ID:cjy7117，项目名称:FT-MAGMA，代码行数:57，代码来源:zhetrd_mgpu.cpp

示例3: magma_shtodhe

extern "C" magma_int_t
magma_shtodhe(magma_int_t num_gpus, magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
              float *A, magma_int_t lda,
              float **dA, magma_int_t ldda,
              magma_queue_t stream[][10], magma_int_t *info)
{
    magma_device_t orig_dev;
    magma_getdevice( &orig_dev );
    
    magma_int_t k;
    if (uplo == MagmaLower) {
        /* go through each block-column */
        magma_int_t j, jj, jb, mj;
        for (j=0; j < n; j += nb) {
            jj =  j/(nb*num_gpus);
            k  = (j/nb)%num_gpus;
            
            jb = min(nb, (n-j));
            mj = n-j;
            
            magma_setdevice(k);
            magma_ssetmatrix_async( mj, jb,
                                     A(j,j),         lda,
                                    dA(k, j, jj*nb), ldda,
                                    stream[k][0] );
        }
    }
    else {
        /* go through each block-column */
        magma_int_t j, jj, jb, mj;
        for (j=0; j < n; j += nb) {
            jj =  j/(nb*num_gpus);
            k  = (j/nb)%num_gpus;
            
            jb = min(nb, (n-j));
            mj = j+jb;
            
            magma_setdevice(k);
            magma_ssetmatrix_async( mj, jb,
                                     A(0, j),        lda,
                                    dA(k, 0, jj*nb), ldda,
                                    stream[k][0] );
        }
    }
    for( k=0; k < num_gpus; k++ ) {
        magma_setdevice(k);
        magma_queue_sync(stream[k][0]);
    }
    magma_setdevice( orig_dev );
    
    return *info;
}

开发者ID:cjy7117，项目名称:DVFS-MAGMA，代码行数:52，代码来源:ssytrd_mgpu.cpp

示例4: ParFESpace

Operator &ParNonlinearForm::GetGradient(const Vector &x) const
{
   ParFiniteElementSpace *pfes = ParFESpace();

   pGrad.Clear();

   NonlinearForm::GetGradient(x); // (re)assemble Grad, no b.c.

   OperatorHandle dA(pGrad.Type()), Ph(pGrad.Type());

   if (fnfi.Size() == 0)
   {
      dA.MakeSquareBlockDiag(pfes->GetComm(), pfes->GlobalVSize(),
                             pfes->GetDofOffsets(), Grad);
   }
   else
   {
      MFEM_ABORT("TODO: assemble contributions from shared face terms");
   }

   // TODO - construct Dof_TrueDof_Matrix directly in the pGrad format
   Ph.ConvertFrom(pfes->Dof_TrueDof_Matrix());
   pGrad.MakePtAP(dA, Ph);

   // Impose b.c. on pGrad
   OperatorHandle pGrad_e;
   pGrad_e.EliminateRowsCols(pGrad, ess_tdof_list);

   return *pGrad.Ptr();
}

开发者ID:LLNL，项目名称:mfem，代码行数:30，代码来源:pnonlinearform.cpp

示例5: cmaxent_fortran

void cmaxent_fortran( double* xqmc, double* xtau, int32_t len, double xmom1, double ( *xker ) ( const double&, double&, double& ),
		      double ( *backtrans ) ( double&, double&, double& ), double beta, double* alpha_tot, int32_t n_alpha, int32_t ngamma, double omega_start, double omega_end,
		      int32_t omega_points, int32_t nsweeps, int32_t nbins, int32_t nwarmup,/* double* u,*/ double* sigma)
{
  std::string fr("Aom"); std::string dA("dump_Aom"); std::string ml("max_stoch_log"); std::string energies("energies"); std::string bf("best_fit"); std::string dump("dump");
  cmaxent(xqmc, xtau, len, xmom1, xker, backtrans, beta, alpha_tot, n_alpha, ngamma, omega_start, omega_end, omega_points, nsweeps, nbins, nwarmup,
	  fr, dA, ml, energies, bf, dump, /*u*/NULL, sigma);
}

开发者ID:CaptainSifff，项目名称:cmaxent，代码行数:8，代码来源:cmaxent_fortran_interface.cpp

示例6: testDeviceVector

void testDeviceVector()
{
    const int aSize = 64;
    std::vector<int> hA(aSize), hB(aSize);
    bolt::cl::device_vector<int> dA(aSize), dB(aSize);

    for(int i=0; i<aSize; i++) {
        hA[i] = hB[i] = dB[i] = dA[i] = i;
    };

    int hSum = std::inner_product(hA.begin(), hA.end(), hB.begin(), 1);

	int sum = bolt::cl::inner_product(  dA.begin(), dA.end(),
                                        dB.begin(), 1, bolt::cl::plus<int>(), bolt::cl::multiplies<int>()  );
};

开发者ID:mdlh，项目名称:Bolt，代码行数:15，代码来源:InnerProductDeviceVector.cpp

示例7: testDeviceVector

void testDeviceVector()
{
    const int aSize = 1000;
    std::vector<int> hA(aSize);
    bolt::cl::device_vector<int> dA(aSize);

    for(int i=0; i<aSize; i++) {
        hA[i] = i;
        dA[i] = i;
    };

    std::vector<int>::iterator smaxdex = std::max_element(hA.begin(), hA.end());
     bolt::cl::device_vector<int>::iterator bmaxdex = bolt::cl::max_element(dA.begin(), dA.end(),bolt::cl::greater<int>());

};

开发者ID:K-Vamshi-Krishna，项目名称:Bolt，代码行数:15，代码来源:MaxElementDeviceVector.cpp

示例8: R

void GLTorus::draw()
{
   GLfloat R(m_majorRadius);
   GLfloat r(m_minorRadius);
   GLfloat dA(m_angle/m_majorSegments);
   GLfloat da(TwoPi/m_minorSegments);
   GLfloat cosTheta[2], sinTheta[2], cosPhi, sinPhi;
   Vec v, n;
   
   for (int i = 0; i < m_majorSegments; ++i) {
       cosTheta[0] = cos( i   *dA);
       cosTheta[1] = cos((i+1)*dA);
       sinTheta[0] = sin( i   *dA);
       sinTheta[1] = sin((i+1)*dA);

       glBegin(GL_QUAD_STRIP);
       for (int j = 0; j <= m_minorSegments; ++j) {
           cosPhi = cos(j*da);
           sinPhi = sin(j*da);

           for (int k = 0; k <= 1; ++k) {
               v.x = (R+r*cosPhi) * cosTheta[k];
               v.y = (R+r*cosPhi) * sinTheta[k];
               v.z = r*sinPhi;

               n.x = R*cosTheta[k];
               n.y = R*sinTheta[k];
               n.z = 0;
               n = (v-n).unit();

               glNormal3f(n.x, n.y, n.z); 
               glVertex3f(v.x, v.y, v.z);
           }
       }
       glEnd();
   }     
}

开发者ID:Tyf0n，项目名称:IQmol，代码行数:37，代码来源:GLShape.C

示例9: main

int main( int argc, char** argv )
{
    TESTING_INIT();
    
    real_Double_t   gflops, t1, t2;
    double c_neg_one = MAGMA_D_NEG_ONE;
    magma_int_t ione = 1;
    const char trans[] = { 'N', 'C', 'T' };
    const char uplo[]  = { 'L', 'U' };
    const char diag[]  = { 'U', 'N' };
    const char side[]  = { 'L', 'R' };
    
    double  *A,  *B,  *C,   *C2, *LU;
    double *dA, *dB, *dC1, *dC2;
    double alpha = MAGMA_D_MAKE( 0.5, 0.1 );
    double beta  = MAGMA_D_MAKE( 0.7, 0.2 );
    double dalpha = 0.6;
    double dbeta  = 0.8;
    double work[1], error, total_error;
    magma_int_t ISEED[4] = {0,0,0,1};
    magma_int_t m, n, k, size, maxn, ld, info;
    magma_int_t *piv;
    magma_err_t err;
    
    magma_opts opts;
    parse_opts( argc, argv, &opts );
    
    printf( "Compares magma wrapper function to cublas function; all diffs should be exactly 0.\n\n" );
    
    total_error = 0.;
    for( int i = 0; i < opts.ntest; ++i ) {
        m = opts.msize[i];
        n = opts.nsize[i];
        k = opts.ksize[i];
        printf("=========================================================================\n");
        printf( "M %d, N %d, K %d\n", (int) m, (int) n, (int) k );
        
        // allocate matrices
        // over-allocate so they can be any combination of {m,n,k} x {m,n,k}.
        maxn = max( max( m, n ), k );
        ld = maxn;
        size = maxn*maxn;
        err = magma_malloc_cpu( (void**) &piv, maxn*sizeof(magma_int_t) );  assert( err == 0 );
        err = magma_dmalloc_pinned( &A,  size );  assert( err == 0 );
        err = magma_dmalloc_pinned( &B,  size );  assert( err == 0 );
        err = magma_dmalloc_pinned( &C,  size );  assert( err == 0 );
        err = magma_dmalloc_pinned( &C2, size );  assert( err == 0 );
        err = magma_dmalloc_pinned( &LU, size );  assert( err == 0 );
        err = magma_dmalloc( &dA,  size );        assert( err == 0 );
        err = magma_dmalloc( &dB,  size );        assert( err == 0 );
        err = magma_dmalloc( &dC1, size );        assert( err == 0 );
        err = magma_dmalloc( &dC2, size );        assert( err == 0 );
        
        // initialize matrices
        size = maxn*maxn;
        lapackf77_dlarnv( &ione, ISEED, &size, A  );
        lapackf77_dlarnv( &ione, ISEED, &size, B  );
        lapackf77_dlarnv( &ione, ISEED, &size, C  );
        
        printf( "========== Level 1 BLAS ==========\n" );
        
        // ----- test DSWAP
        // swap 2nd and 3rd columns of dA, then copy to C2 and compare with A
        assert( n >= 4 );
        magma_dsetmatrix( m, n, A, ld, dA, ld );
        magma_dsetmatrix( m, n, A, ld, dB, ld );
        magma_dswap( m, dA(0,1), 1, dA(0,2), 1 );
        magma_dswap( m, dB(0,1), 1, dB(0,2), 1 );
        
        // check results, storing diff between magma and cuda calls in C2
        cublasDaxpy( ld*n, c_neg_one, dA, 1, dB, 1 );
        magma_dgetmatrix( m, n, dB, ld, C2, ld );
        error = lapackf77_dlange( "F", &m, &k, C2, &ld, work );
        total_error += error;
        printf( "dswap             diff %.2g\n", error );
        
        // ----- test IDAMAX
        // get argmax of column of A
        magma_dsetmatrix( m, k, A, ld, dA, ld );
        error = 0;
        for( int j = 0; j < k; ++j ) {
            magma_int_t i1 = magma_idamax( m, dA(0,j), 1 );
            magma_int_t i2 = cublasIdamax( m, dA(0,j), 1 );
            assert( i1 == i2 );
            error += abs( i1 - i2 );
        }
        total_error += error;
        gflops = (double)m * k / 1e9;
        printf( "idamax            diff %.2g\n", error );
        printf( "\n" );
        
        printf( "========== Level 2 BLAS ==========\n" );
        
        // ----- test DGEMV
        // c = alpha*A*b + beta*c,  with A m*n; b,c m or n-vectors
        // try no-trans/trans
        for( int ia = 0; ia < 3; ++ia ) {
            magma_dsetmatrix( m, n, A,  ld, dA,  ld );
            magma_dsetvector( maxn, B, 1, dB,  1 );
            magma_dsetvector( maxn, C, 1, dC1, 1 );
//.........这里部分代码省略.........

开发者ID:soulsheng，项目名称:magma，代码行数:101，代码来源:testing_dblas.cpp

示例10: dimension


//.........这里部分代码省略.........
        H(i) = I - tau * v * v'

    where tau is a complex scalar, and v is a complex vector with
    v(1:i) = 0 and v(i+1) = 1; v(i+1:n) is stored on exit in A(i+1:n,i),
    and tau in TAU(i).

    The elements of the vectors v together form the n-by-nb matrix V
    which is needed, with W, to apply the transformation to the unreduced
    part of the matrix, using a Hermitian rank-2k update of the form:
    A := A - V*W' - W*V'.

    The contents of A on exit are illustrated by the following examples
    with n = 5 and nb = 2:

    if UPLO = MagmaUpper:                       if UPLO = MagmaLower:

        (  a   a   a   v4  v5 )              (  d                  )
        (      a   a   v4  v5 )              (  1   d              )
        (          a   1   v5 )              (  v1  1   a          )
        (              d   1  )              (  v1  v2  a   a      )
        (                  d  )              (  v1  v2  a   a   a  )

    where d denotes a diagonal element of the reduced matrix, a denotes
    an element of the original matrix that is unchanged, and vi denotes
    an element of the vector defining H(i).

    @ingroup magma_cheev_aux
    ********************************************************************/
extern "C" magma_int_t
magma_clatrd2(magma_uplo_t uplo, magma_int_t n, magma_int_t nb,
              magmaFloatComplex *A,  magma_int_t lda,
              float *e, magmaFloatComplex *tau,
              magmaFloatComplex *W,  magma_int_t ldw,
              magmaFloatComplex *dA, magma_int_t ldda,
              magmaFloatComplex *dW, magma_int_t lddw,
              magmaFloatComplex *dwork, magma_int_t ldwork)
{
#define A(i, j) (A + (j)*lda + (i))
#define W(i, j) (W + (j)*ldw + (i))

#define dA(i, j) (dA + (j)*ldda + (i))
#define dW(i, j) (dW + (j)*lddw + (i))

    magma_int_t i;

    magmaFloatComplex c_neg_one = MAGMA_C_NEG_ONE;
    magmaFloatComplex c_one     = MAGMA_C_ONE;
    magmaFloatComplex c_zero    = MAGMA_C_ZERO;

    magmaFloatComplex value = MAGMA_C_ZERO;

    magma_int_t ione = 1;

    magma_int_t i_n, i_1, iw;

    magmaFloatComplex alpha;
    magmaFloatComplex *f;

    if (n <= 0) {
        return 0;
    }

    magma_queue_t stream;
    magma_queue_create( &stream );
    magma_cmalloc_cpu( &f, n );
    assert( f != NULL );  // TODO return error, or allocate outside clatrd

开发者ID:EmergentOrder，项目名称:magma，代码行数:67，代码来源:clatrd2.cpp

示例11: magmablas_ssymm_mgpu_com

void magmablas_ssymm_mgpu_com(
    magma_side_t side, magma_uplo_t uplo, magma_int_t m, magma_int_t n,
    float alpha,
    float *dA[],    magma_int_t ldda,  magma_int_t offset,
    float *dB[],    magma_int_t lddb,
    float beta,     float *dC[], magma_int_t lddc,
    float *dwork[], magma_int_t dworksiz,
    float *C,       magma_int_t ldc,
    float *work[],  magma_int_t worksiz,
    magma_int_t ngpu, magma_int_t nb, 
    magma_queue_t streams[][20], magma_int_t nstream, 
    magma_event_t redevents[][MagmaMaxGPUs*MagmaMaxGPUs+10], magma_int_t nbevents, 
    magma_int_t gnode[MagmaMaxGPUs][MagmaMaxGPUs+2], magma_int_t nbcmplx )
{
    #define dA(dev, i, j) (dA[dev] + (i) + (j)*ldda)
    #define dB(dev, i, j) (dB[dev] + (i) + (j)*lddb)
    #define dC(dev, i, j) (dC[dev] + (i) + (j)*lddc)
    #define dwork(dev, i, j) (dwork[dev] + (i) + (j)*lddwork)
    #define C(i, j) (C + (i) + (j)*ldc)
    //printf("####################################################\n");
    //printf("                      start ssymm                   \n");
    //printf("####################################################\n");
   
    if ( side != MagmaLeft || uplo != MagmaLower ) {
        fprintf( stderr, "%s: only Left Lower implemented\n", __func__ );
    }
    
    assert( ldda >= m );
    assert( lddb >= m );
    assert( lddc >= m );
    assert( nstream >= ngpu );
    assert( nbevents >= ngpu*ngpu );
   
    
    float c_one  = MAGMA_S_ONE;

    float *dwork1[MagmaMaxGPUs];
    float *dwork2[MagmaMaxGPUs];


    magma_int_t maxgsize    = n*m;
    magma_int_t lddwork = lddc;
    magma_int_t ldwork  = m;
    for( magma_int_t dev = 0; dev < ngpu; ++dev ) {
        dwork1[dev] = dwork[dev];  // size of dwork1 is n*lddwork
        dwork2[dev] = dwork[dev]+n*lddwork;  // size of dwork2 is maxgsize*ngpu
    }
    assert( dworksiz >= (n*lddwork+maxgsize*ngpu) );
    assert( worksiz  >= (n*ldwork) );

        
    magma_device_t cdev;
    magma_getdevice( &cdev );
    magma_queue_t cstream;
    magmablasGetKernelStream(&cstream);


    magma_int_t dev, devperm, myblk, mycolsize, myblkoffst;
    magma_int_t gmaster;
    magma_int_t masterdev, lcdev, lccolsize, myngpu;

    magma_int_t stdev       = (offset/nb)%ngpu;  
    magma_int_t blockoffset = offset % nb;  
    magma_int_t fstblksiz   = 0;
    if(blockoffset>0){
        fstblksiz   = min(m, (nb - blockoffset));
    }
    //magma_int_t nbblk       = magma_ceildiv(m, nb);
    magma_int_t nbblk       = magma_ceildiv((m+blockoffset), nb);
    magma_int_t remm        = m- fstblksiz;
    magma_int_t nbblkoffst  = offset/nb;


    magma_int_t nblstblks = -1;
    magma_int_t devlstblk = -1;
    magma_int_t lstblksiz = remm%nb;
    if(lstblksiz>0){
        nblstblks = nbblk%ngpu;
        devlstblk = (nblstblks-1+ngpu)%ngpu;
    }

    magma_int_t nbcmplxactive =  0;
    magma_int_t cmplxisactive[MagmaMaxGPUs];
    magma_int_t gpuisactive[MagmaMaxGPUs];
    memset(gpuisactive, 0, MagmaMaxGPUs*sizeof(magma_int_t));
    memset(cmplxisactive, 0, MagmaMaxGPUs*sizeof(magma_int_t));


    for( magma_int_t dev = 0; dev < ngpu; ++dev ) {
        magma_setdevice( dev );
        magmablasSetKernelStream( streams[ dev ][ 0 ] );
        cudaMemset(dwork(dev,0,0), 0, (lddwork)*(n)*sizeof(float) );
        // put all dC on all dev to 0 except the one which
        // hold i==0 because this one has to multiply by beta.
        if(dev!=stdev){
           cudaMemset(dC(dev,0,0), 0, (lddc)*(n)*sizeof(float) );
        }
    }

    magma_int_t newoffset = offset;
//.........这里部分代码省略.........

开发者ID:XapaJIaMnu，项目名称:magma，代码行数:101，代码来源:ssymm_mgpu.cpp

示例12: H

/**
    Purpose
    -------
    SORGQR generates an M-by-N REAL matrix Q with orthonormal columns,
    which is defined as the first N columns of a product of K elementary
    reflectors of order M

          Q  =  H(1) H(2) . . . H(k)

    as returned by SGEQRF_GPU.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix Q. M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix Q. M >= N >= 0.

    @param[in]
    k       INTEGER
            The number of elementary reflectors whose product defines the
            matrix Q. N >= K >= 0.

    @param[in,out]
    dA      REAL array A on the GPU device,
            dimension (LDDA,N). On entry, the i-th column must contain
            the vector which defines the elementary reflector H(i), for
            i = 1,2,...,k, as returned by SGEQRF_GPU in the first k
            columns of its array argument A.
            On exit, the M-by-N matrix Q.

    @param[in]
    ldda    INTEGER
            The first dimension of the array A. LDDA >= max(1,M).

    @param[in]
    tau     REAL array, dimension (K)
            TAU(i) must contain the scalar factor of the elementary
            reflector H(i), as returned by SGEQRF_GPU.

    @param[in]
    dT      REAL work space array on the GPU device,
            dimension (MIN(M, N) )*NB.
            This must be the 6th argument of magma_sgeqrf_gpu
            [ note that if N here is bigger than N in magma_sgeqrf_gpu,
              the workspace requirement DT in magma_sgeqrf_gpu must be
              as specified in this routine ].

    @param[in]
    nb      INTEGER
            This is the block size used in SGEQRF_GPU, and correspondingly
            the size of the T matrices, used in the factorization, and
            stored in DT.

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument has an illegal value

    @ingroup magma_ssyev_2stage
    ********************************************************************/
extern "C" magma_int_t
magma_sorgqr_2stage_gpu(magma_int_t m, magma_int_t n, magma_int_t k,
                 float *dA, magma_int_t ldda,
                 float *tau, float *dT,
                 magma_int_t nb, magma_int_t *info)
{
    #define dA(a_1,a_2) (dA + (a_2)*(ldda) + (a_1))
    #define dT(a_1)     (dT + (a_1)*nb)

    float c_zero = MAGMA_S_ZERO;
    float c_one  = MAGMA_S_ONE;
    
    magma_int_t  i__1, i__2, i__3;
    //magma_int_t lwork;
    magma_int_t i, ib, ki, kk;  //, iinfo;
    //magma_int_t lddwork = min(m, n);
    //float *work, *panel;
    float *dwork;
    //magma_queue_t stream[2];
    magma_int_t ldt=nb; // need to be an input parameter

    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if ((n < 0) || (n > m)) {
        *info = -2;
    } else if ((k < 0) || (k > n)) {
        *info = -3;
    } else if (ldda < max(1,m)) {
        *info = -5;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

//.........这里部分代码省略.........

开发者ID:XapaJIaMnu，项目名称:magma，代码行数:101，代码来源:sorgqr_2stage_gpu.cpp

示例13: elements

/**
    Purpose
    -------
    SGETRF computes an LU factorization of a general M-by-N matrix A
    using partial pivoting with row interchanges.  This version does not
    require work space on the GPU passed as input. GPU memory is allocated
    in the routine.

    The factorization has the form
        A = P * L * U
    where P is a permutation matrix, L is lower triangular with unit
    diagonal elements (lower trapezoidal if m > n), and U is upper
    triangular (upper trapezoidal if m < n).

    This is the right-looking Level 3 BLAS version of the algorithm.

    It uses 2 queues to overlap communication and computation.

    Arguments
    ---------
    @param[in]
    m       INTEGER
            The number of rows of the matrix A.  M >= 0.

    @param[in]
    n       INTEGER
            The number of columns of the matrix A.  N >= 0.

    @param[in,out]
    A       REAL array, dimension (LDA,N)
            On entry, the M-by-N matrix to be factored.
            On exit, the factors L and U from the factorization
            A = P*L*U; the unit diagonal elements of L are not stored.
    \n
            Higher performance is achieved if A is in pinned memory, e.g.
            allocated using magma_malloc_pinned.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,M).

    @param[out]
    ipiv    INTEGER array, dimension (min(M,N))
            The pivot indices; for 1 <= i <= min(M,N), row i of the
            matrix was interchanged with row IPIV(i).

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value
                  or another error occured, such as memory allocation failed.
      -     > 0:  if INFO = i, U(i,i) is exactly zero. The factorization
                  has been completed, but the factor U is exactly
                  singular, and division by zero will occur if it is used
                  to solve a system of equations.

    @ingroup magma_sgesv_comp
    ********************************************************************/
extern "C" magma_int_t
magma_sgetrf(
    magma_int_t m, magma_int_t n,
    float *A, magma_int_t lda,
    magma_int_t *ipiv,
    magma_int_t *info)
{
    #ifdef HAVE_clBLAS
    #define  dA(i_, j_)     dA, ((i_)*nb  + (j_)*nb*ldda + dA_offset)
    #define dAT(i_, j_)    dAT, ((i_)*nb*lddat + (j_)*nb + dAT_offset)
    #define dwork(i_)    dwork, (i_)
    #else
    #define  dA(i_, j_) (   dA + (i_)*nb  + (j_)*nb*ldda)
    #define dAT(i_, j_) (  dAT + (i_)*nb*lddat + (j_)*nb)
    #define dwork(i_)   (dwork + (i_))
    #endif
    
    // Constants
    const float c_one     = MAGMA_S_ONE;
    const float c_neg_one = MAGMA_S_NEG_ONE;
    
    // Local variables
    float *work;
    magmaFloat_ptr dA, dAT, dwork;
    magma_int_t iinfo, nb;

    /* Check arguments */
    *info = 0;
    if (m < 0)
        *info = -1;
    else if (n < 0)
        *info = -2;
    else if (lda < max(1,m))
        *info = -4;

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

    /* Quick return if possible */
    if (m == 0 || n == 0)
//.........这里部分代码省略.........

开发者ID:xulunfan，项目名称:magma，代码行数:101，代码来源:sgetrf.cpp

示例14: BlockOperator

BlockOperator & ParBlockNonlinearForm::GetGradient(const Vector &x) const
{
   if (pBlockGrad == NULL)
   {
      pBlockGrad = new BlockOperator(block_trueOffsets);
   }

   Array<const ParFiniteElementSpace *> pfes(fes.Size());

   for (int s1=0; s1<fes.Size(); ++s1)
   {
      pfes[s1] = ParFESpace(s1);

      for (int s2=0; s2<fes.Size(); ++s2)
      {
         phBlockGrad(s1,s2)->Clear();
      }
   }

   GetLocalGradient(x); // gradients are stored in 'Grads'

   if (fnfi.Size() > 0)
   {
      MFEM_ABORT("TODO: assemble contributions from shared face terms");
   }

   for (int s1=0; s1<fes.Size(); ++s1)
   {
      for (int s2=0; s2<fes.Size(); ++s2)
      {
         OperatorHandle dA(phBlockGrad(s1,s2)->Type()),
                        Ph(phBlockGrad(s1,s2)->Type()),
                        Rh(phBlockGrad(s1,s2)->Type());

         if (s1 == s2)
         {
            dA.MakeSquareBlockDiag(pfes[s1]->GetComm(), pfes[s1]->GlobalVSize(),
                                   pfes[s1]->GetDofOffsets(), Grads(s1,s1));
            Ph.ConvertFrom(pfes[s1]->Dof_TrueDof_Matrix());
            phBlockGrad(s1,s1)->MakePtAP(dA, Ph);
         }
         else
         {
            dA.MakeRectangularBlockDiag(pfes[s1]->GetComm(),
                                        pfes[s1]->GlobalVSize(),
                                        pfes[s2]->GlobalVSize(),
                                        pfes[s1]->GetDofOffsets(),
                                        pfes[s2]->GetDofOffsets(),
                                        Grads(s1,s2));
            Rh.ConvertFrom(pfes[s1]->Dof_TrueDof_Matrix());
            Ph.ConvertFrom(pfes[s2]->Dof_TrueDof_Matrix());

            phBlockGrad(s1,s2)->MakeRAP(Rh, dA, Ph);
         }

         pBlockGrad->SetBlock(s1, s2, phBlockGrad(s1,s2)->Ptr());
      }
   }

   return *pBlockGrad;
}

开发者ID:LLNL，项目名称:mfem，代码行数:61，代码来源:pnonlinearform.cpp

示例15: dimension


//.........这里部分代码省略.........
    int upper = (uplo == MagmaUpper);
    lquery = (lwork == -1);
    if (! upper && uplo != MagmaLower) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,n)) {
        *info = -4;
    } else if (lwork < nb*n && ! lquery) {
        *info = -9;
    }

    /* Determine the block size. */
    ldw = n;
    lddw = ldda;
    lwkopt = n * nb;
    if (*info == 0) {
        work[0] = MAGMA_Z_MAKE( lwkopt, 0 );
    }

    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    else if (lquery)
        return *info;

    /* Quick return if possible */
    if (n == 0) {
        work[0] = c_one;
        return *info;
    }

    magmaDoubleComplex *dA;
    #ifdef FAST_HEMV
    magma_int_t ldwork2 = ldda*ceildiv(n,64);
    #else
    magma_int_t ldwork2 = 0;
    #endif
    if (MAGMA_SUCCESS != magma_zmalloc( &dA, ldda*n + 2*lddw*nb + ldwork2 )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }

    magmaDoubleComplex *dwork  = dA + ldda*n;
    #ifdef FAST_HEMV
    magmaDoubleComplex *dwork2 = dwork + 2*lddw*nb;
    #endif

    //if (n < 2048)
    //    nx = n;
    //else
    //    nx = 512;
    nx = min( 128, n );  // nx <= n is required

    // clear out dwork in case it has NANs (used as y in zhemv)
    // rest of dwork (used as work in magmablas_zhemv) doesn't need to be cleared
    magmablas_zlaset( MagmaFull, n, nb, c_zero, c_zero, dwork, lddw );

    if (upper) {
        /* Copy the matrix to the GPU */
        magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );

        /* Reduce the upper triangle of A.
           Columns 1:kk are handled by the unblocked method. */
        kk = n - (n - nx + nb - 1) / nb * nb;

开发者ID:cjy7117，项目名称:FT-MAGMA，代码行数:67，代码来源:zhetrd.cpp

示例16: magma_zhegst


//.........这里部分代码省略.........
    /* Test the input parameters. */
    *info = 0;
    if (itype<1 || itype>3){
        *info = -1;
    }else if ((! upper) && (! lapackf77_lsame(uplo_, "L"))) {
        *info = -2;
    } else if (n < 0) {
        *info = -3;
    } else if (lda < max(1,n)) {
        *info = -5;
    }else if (ldb < max(1,n)) {
        *info = -7;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }
    
    /* Quick return */
    if ( n == 0 )
        return *info;
    
    if (MAGMA_SUCCESS != magma_zmalloc( &dw, 2*n*n )) {
        *info = MAGMA_ERR_DEVICE_ALLOC;
        return *info;
    }
    
    nb = magma_get_zhegst_nb(n);
    
    magma_queue_t stream[2];
    magma_queue_create( &stream[0] );
    magma_queue_create( &stream[1] );
    
    magma_zsetmatrix( n, n, A(0, 0), lda, dA(0, 0), ldda );
    magma_zsetmatrix( n, n, B(0, 0), ldb, dB(0, 0), lddb );
    
    /* Use hybrid blocked code */
    
    if (itype==1) {
        if (upper) {
            /* Compute inv(U')*A*inv(U) */
            
            for(k = 0; k<n; k+=nb){
                kb = min(n-k,nb);
                kb2= min(n-k-nb,nb);
                
                /* Update the upper triangle of A(k:n,k:n) */
                
                lapackf77_zhegst( &itype, uplo_, &kb, A(k,k), &lda, B(k,k), &ldb, info);
                
                magma_zsetmatrix_async( kb, kb,
                                        A(k, k),  lda,
                                        dA(k, k), ldda, stream[0] );
                
                if(k+kb<n){
                    magma_ztrsm(MagmaLeft, MagmaUpper, MagmaConjTrans, MagmaNonUnit,
                                kb, n-k-kb,
                                c_one, dB(k,k), lddb,
                                dA(k,k+kb), ldda);
                    
                    magma_queue_sync( stream[0] );
                    
                    magma_zhemm(MagmaLeft, MagmaUpper,
                                kb, n-k-kb,
                                c_neg_half, dA(k,k), ldda,
                                dB(k,k+kb), lddb,

开发者ID:soulsheng，项目名称:magma，代码行数:67，代码来源:zhegst.cpp

示例17: dimension

/**
    Purpose
    =======

    SSYTRF_nopiv computes the LDLt factorization of a real symmetric
    matrix A. This version does not require work space on the GPU passed
    as input. GPU memory is allocated in the routine.

    The factorization has the form
       A = U^H * D * U,   if UPLO = MagmaUpper, or
       A = L   * D * L^H, if UPLO = MagmaLower,
    where U is an upper triangular matrix, L is lower triangular, and
    D is a diagonal matrix.

    This is the block version of the algorithm, calling Level 3 BLAS.

    Arguments
    ---------
    @param[in]
    uplo    magma_uplo_t
      -     = MagmaUpper:  Upper triangle of A is stored;
      -     = MagmaLower:  Lower triangle of A is stored.

    @param[in]
    n       INTEGER
            The order of the matrix A.  N >= 0.

    @param[in,out]
    A       REAL array, dimension (LDA,N)
            On entry, the symmetric matrix A.  If UPLO = MagmaUpper, the leading
            N-by-N upper triangular part of A contains the upper
            triangular part of the matrix A, and the strictly lower
            triangular part of A is not referenced.  If UPLO = MagmaLower, the
            leading N-by-N lower triangular part of A contains the lower
            triangular part of the matrix A, and the strictly upper
            triangular part of A is not referenced.
    \n
            On exit, if INFO = 0, the factor U or L from the Cholesky
            factorization A = U^H D U or A = L D L^H.
    \n
            Higher performance is achieved if A is in pinned memory.

    @param[in]
    lda     INTEGER
            The leading dimension of the array A.  LDA >= max(1,N).

    @param[out]
    info    INTEGER
      -     = 0:  successful exit
      -     < 0:  if INFO = -i, the i-th argument had an illegal value
                  if INFO = -6, the GPU memory allocation failed
      -     > 0:  if INFO = i, the leading minor of order i is not
                  positive definite, and the factorization could not be
                  completed.

    @ingroup magma_ssysv_comp
    ******************************************************************* */
extern "C" magma_int_t
magma_ssytrf_nopiv(
    magma_uplo_t uplo, magma_int_t n,
    float *A, magma_int_t lda,
    magma_int_t *info)
{
    #define  A(i, j)  ( A +(j)*lda  + (i))
    #define dA(i, j)  (dA +(j)*ldda + (i))
    #define dW(i, j)  (dW +(j)*ldda + (i))
    #define dWt(i, j) (dW +(j)*nb   + (i))

    /* Constants */
    const float c_one     = MAGMA_S_ONE;
    const float c_neg_one = MAGMA_S_NEG_ONE;
    
    /* Local variables */
    bool upper = (uplo == MagmaUpper);
    magma_int_t j, k, jb, ldda, nb, ib, iinfo;
    magmaFloat_ptr dA;
    magmaFloat_ptr dW;

    *info = 0;
    if (! upper && uplo != MagmaLower) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,n)) {
        *info = -4;
    }
    if (*info != 0) {
        magma_xerbla( __func__, -(*info) );
        return MAGMA_ERR_ILLEGAL_VALUE;
    }

    /* Quick return */
    if ( n == 0 )
      return MAGMA_SUCCESS;

    ldda = magma_roundup( n, 32 );
    nb = magma_get_ssytrf_nopiv_nb(n);
    ib = min(32, nb); // inner-block for diagonal factorization

    if ((MAGMA_SUCCESS != magma_smalloc(&dA, n *ldda)) ||
//.........这里部分代码省略.........

开发者ID:xulunfan，项目名称:magma，代码行数:101，代码来源:ssytrf_nopiv.cpp

示例18: dimension


//.........这里部分代码省略.........

    The contents of A on exit are illustrated by the following examples:

    @verbatim
    m = 6 and n = 5 (m > n):          m = 5 and n = 6 (m < n):

      (  d   e   u1  u1  u1 )           (  d   u1  u1  u1  u1  u1 )
      (  v1  d   e   u2  u2 )           (  e   d   u2  u2  u2  u2 )
      (  v1  v2  d   e   u3 )           (  v1  e   d   u3  u3  u3 )
      (  v1  v2  v3  d   e  )           (  v1  v2  e   d   u4  u4 )
      (  v1  v2  v3  v4  d  )           (  v1  v2  v3  e   d   u5 )
      (  v1  v2  v3  v4  v5 )
    @endverbatim

    where d and e denote diagonal and off-diagonal elements of B, vi
    denotes an element of the vector defining H(i), and ui an element of
    the vector defining G(i).

    @ingroup magma_sgesvd_comp
    ********************************************************************/
extern "C" magma_int_t
magma_sgebrd(
    magma_int_t m, magma_int_t n,
    float *A, magma_int_t lda, float *d, float *e,
    float *tauq, float *taup,
    float *work, magma_int_t lwork,
    magma_int_t *info)
{
#define  A(i, j) (A  + (j)*lda  + (i))
#define dA(i, j) (dA + (j)*ldda + (i))

    float c_neg_one = MAGMA_S_NEG_ONE;
    float c_one     = MAGMA_S_ONE;
    float *dA, *dwork;

    magma_int_t ncol, nrow, jmax, nb, ldda;

    magma_int_t i, j, nx;
    magma_int_t iinfo;

    magma_int_t minmn;
    magma_int_t ldwrkx, ldwrky, lwkopt;
    magma_int_t lquery;

    nb   = magma_get_sgebrd_nb(n);
    ldda = m;

    lwkopt = (m + n) * nb;
    work[0] = MAGMA_S_MAKE( lwkopt, 0. );
    lquery = (lwork == -1);
    
    /* Check arguments */
    *info = 0;
    if (m < 0) {
        *info = -1;
    } else if (n < 0) {
        *info = -2;
    } else if (lda < max(1,m)) {
        *info = -4;
    } else if (lwork < lwkopt && (! lquery) ) {
        *info = -10;
    }
    if (*info < 0) {
        magma_xerbla( __func__, -(*info) );
        return *info;
    }

开发者ID:cjy7117，项目名称:FT-MAGMA，代码行数:67，代码来源:sgebrd.cpp

示例19: magma_cgeqrf

extern "C" magma_err_t
magma_cgeqrf(magma_int_t m, magma_int_t n,
             magmaFloatComplex *A,    magma_int_t lda, magmaFloatComplex *tau,
             magmaFloatComplex *work, magma_int_t lwork,
             magma_int_t *info,
             magma_queue_t* queue )
{
    /*  -- clMAGMA (version 1.1.0) --
           Univ. of Tennessee, Knoxville
           Univ. of California, Berkeley
           Univ. of Colorado, Denver
           @date January 2014

        Purpose
        =======
        CGEQRF computes a QR factorization of a COMPLEX M-by-N matrix A:
        A = Q * R. This version does not require work space on the GPU
        passed as input. GPU memory is allocated in the routine.

        If the current stream is NULL, this version replaces it with user defined
        stream to overlap computation with communication.

        Arguments
        =========
        M       (input) INTEGER
                The number of rows of the matrix A.  M >= 0.

        N       (input) INTEGER
                The number of columns of the matrix A.  N >= 0.

        A       (input/output) COMPLEX array, dimension (LDA,N)
                On entry, the M-by-N matrix A.
                On exit, the elements on and above the diagonal of the array
                contain th

鲜花

握手

雷人

路过

鸡蛋

该文章已有0人参与评论

请发表评论

全部评论

专题导读

More+

10-27 六六分期app的软件客服如何联系？(六六分期

11-06 可心卡盟:win10系统火狐flash插件崩溃怎么

11-06 亲亲特价:怎么删除回收站图标

11-06 济南大学虚拟社区:鲁大师节能降温的具体办

11-06 xlueops.exe:无线网络安装向导

11-06 女斗合众国:win7系统cf与主机连接不稳定怎

11-06 0xc000022-[cf烟雾头]cf怎么调烟雾头

11-06 qizideyouhuo:应用程序无法正常启动0xc0000

11-06 ipz-185:win7系统vcf文件怎么打开

11-06 傻哥蹦迪:win10系统s4怎么打开usb调试

11-06 八神浩树gtaste:回收站清空了怎么恢复

11-06 妖尾之黑色守护:win10系统电脑没有1440x900

11-06 校园至尊魔王小说:win7系统浏览网页时字体

11-06 女斗合众国:win10系统访问共享文件夹提示请

11-06 tokyo hot n0654:恢复win7系统默认字体一招

11-06 雨酷仙境:设置win7系统转移临时文件夹腾出

11-06 阿穆纳伊之杖:win7系统开始菜单在右边还原

11-06 tunespotting:win10系统火狐flash插件总是

11-06 甘尔葛分析师：计谋网站seo关键词暴涨有什

11-06 蔡贵霖: 计谋网站seo关键词暴涨有什么秘密

11-06 博益网首页:ao3网页版进入不了解决方法

11-06 漏斗子专栏: 网站数据分析小白易懂精华篇

11-06 见证双虹怎么做:win7系统开启telnet命令的

11-06 颾狐蝶蜋:系统资源不足无法完成请求的服务

11-06 国光中学校歌:提交网站到alexa查询详细步骤

11-06 西安有情天:静态网页和动态网页的区别

11-06 红木雅尚斋:外部链接构造对网站的好处

11-06 前官礼遇：防止域名劫持–增强域安全性的10

11-06 密传二转答案: 中文分词算法有哪些

11-06 金泉家园邮编:百度快照劫持的表现及应对方

C++ dAASSERT函数代码示例发布时间：2022-05-30

C++ d4_assert函数代码示例发布时间：2022-05-30

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

2023-10-27

florent37/ViewAnimator: A fluent Android

2022-08-15

florent37/Shrine-MaterialDesign2: implem

2022-08-17

CVE-2020-36276

2022-09-23

SimpleSoftwareIO/simple-sms: Send and re

2022-08-13

阅读排行榜

1 六六分期app的软件客服如何联系？(六六分期

六六分期app的软件客服如何联系？不知道吗？加qq群【895510560】即可！标题：六六分期

阅读：18088|2023-10-27

2 可心卡盟:win10系统火狐flash插件崩溃怎么

今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题，可能很多用户都不知

阅读：9616|2022-11-06

3 亲亲特价:怎么删除回收站图标

今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置，可能很多用户都不知道

阅读：8149|2022-11-06

4 济南大学虚拟社区:鲁大师节能降温的具体办

今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法，想必大家都遇到过需要

阅读：8530|2022-11-06

5 xlueops.exe:无线网络安装向导

我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置，可能很多

阅读：8432|2022-11-06

6 女斗合众国:win7系统cf与主机连接不稳定怎

今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题，可能很多用户都不

阅读：9345|2022-11-06

7 0xc000022-[cf烟雾头]cf怎么调烟雾头

电脑对日常生活的重要性小编就不多说了，可是一旦碰到win7系统设置cf烟雾头的问题，很

阅读：8397|2022-11-06

8 qizideyouhuo:应用程序无法正常启动0xc0000

我们在日常使用电脑的时候，有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法

阅读：7833|2022-11-06

9 ipz-185:win7系统vcf文件怎么打开

今天小编告诉大家如何对win7系统打开vcf文件进行设置，可能很多用户都不知道怎么对win

阅读：8387|2022-11-06

10 傻哥蹦迪:win10系统s4怎么打开usb调试

今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置，可能很多用户都不知道怎

阅读：7380|2022-11-06

客服电话

电子邮件