本文整理汇总了C++中ATL_AlignPtr函数的典型用法代码示例。如果您正苦于以下问题:C++ ATL_AlignPtr函数的具体用法?C++ ATL_AlignPtr怎么用?C++ ATL_AlignPtr使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。
在下文中一共展示了ATL_AlignPtr函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: cblas_cher2
void cblas_cher2(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const void *alpha,
const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda)
{
int info = 2000;
void *vx, *vy;
float *x0, *y0;
const float *x=X, *y=Y, *alp=alpha;
const float one[2]={ATL_rone, ATL_rzero};
#ifndef NoCblasErrorChecks
if (Order != CblasColMajor && Order != CblasRowMajor)
info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
CblasRowMajor, CblasColMajor, Order);
if (Uplo != CblasUpper && Uplo != CblasLower)
info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
CblasUpper, CblasLower, Uplo);
if (N < 0) info = cblas_errprn(3, info,
"N cannot be less than zero; is set to %d.", N);
if (!incX) info = cblas_errprn(6, info,
"incX cannot be zero; is set to %d.", incX);
if (!incY) info = cblas_errprn(8, info,
"incY cannot be zero; is set to %d.", incY);
if (lda < N || lda < 1)
info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d N=%d",
lda, N);
if (info != 2000)
{
cblas_xerbla(info, "cblas_cher2", "");
return;
}
#endif
if (incX < 0) x += (1-N)*incX<<1;
if (incY < 0) y += (1-N)*incY<<1;
if (Order == CblasColMajor)
ATL_cher2(Uplo, N, alpha, x, incX, y, incY, A, lda);
else if (alp[0] != ATL_rzero || alp[1] != ATL_rzero)
{
vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
ATL_assert(vx != NULL && vy != NULL);
x0 = ATL_AlignPtr(vx);
y0 = ATL_AlignPtr(vy);
ATL_cmoveConj(N, alpha, y, incY, y0, 1);
ATL_ccopyConj(N, x, incX, x0, 1);
ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
N, one, y0, 1, x0, 1, A, lda);
free(vx);
free(vy);
}
else ATL_cher2(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
N, alpha, y, incY, x, incX, A, lda);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:56,代码来源:cblas_cher2.c
示例2: Mjoin
void Mjoin(Mjoin(Mjoin(PATL,herk),UploNM),N)
(const int N, const int K, const void *valpha, const void *A, const int lda,
const void *vbeta, void *C, const int ldc)
{
void *vc;
TYPE *c;
TYPE alpha[2];
const TYPE beta = *( (const TYPE *)vbeta );
const TYPE zero[2] = {0.0, 0.0};
alpha[0] = *( (const TYPE *)valpha );
if (K > HERK_Xover)
{
alpha[1] = 0.0;
vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
ATL_assert(vc);
c = ATL_AlignPtr(vc);
CgemmNC(N, N, K, alpha, A, lda, A, lda, zero, c, N);
if ( beta == 1.0 ) Mjoin(her_put,_b1)(N, c, vbeta, C, ldc);
else if ( beta == 0.0 ) Mjoin(her_put,_b0)(N, c, vbeta, C, ldc);
else Mjoin(her_put,_bXi0)(N, c, vbeta, C, ldc);
free(vc);
}
else Mjoin(PATL,refherk)(Uplo_, AtlasNoTrans, N, K, *alpha, A, lda,
beta, C, ldc);
}
开发者ID:certik,项目名称:vendor,代码行数:26,代码来源:ATL_herk_N.c
示例3: Mjoin
int Mjoin(PATL,her2kLN)
#endif
#endif
(const int N, const int K, const void *valpha, const void *A, const int lda,
const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
int i;
void *vc=NULL;
TYPE *c;
const TYPE beta =*( (const TYPE *)vbeta );
const TYPE zero[2]={0.0, 0.0};
i = ATL_MulBySize(N)*N;
if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
if (vc == NULL) return(1);
c = ATL_AlignPtr(vc);
#ifdef Transpose_
ATL_ammm(AtlasConjTrans, AtlasNoTrans, N, N, K, valpha, A, lda, B, ldb,
#else
ATL_ammm(AtlasNoTrans, AtlasConjTrans, N, N, K, valpha, A, lda, B, ldb,
#endif
zero, c, N);
if ( beta == 1.0 ) Mjoin(her2k_put,_b1)(N, c, vbeta, C, ldc);
else if ( beta == 0.0 ) Mjoin(her2k_put,_b0)(N, c, vbeta, C, ldc);
else Mjoin(her2k_put,_bXi0)(N, c, vbeta, C, ldc);
free(vc);
return(0);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:28,代码来源:ATL_her2k.c
示例4: Mjoin
void Mjoin(Mjoin(PATL,symmL),UploNM)
(const int M, const int N, const void *valpha, const void *A, const int lda,
const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
#ifdef TREAL
const SCALAR alpha=*( (const SCALAR *)valpha );
const SCALAR beta =*( (const SCALAR *)vbeta );
const SCALAR one=1.0;
#else
#define alpha valpha
#define beta vbeta
#endif
TYPE *a;
void *va;
if (N > SYMM_Xover)
{
va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
ATL_assert(va);
a = ATL_AlignPtr(va);
#ifdef TREAL
if ( SCALAR_IS_ONE(alpha) )
Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(M, alpha, A, lda, a);
else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(M, alpha, A, lda, a);
CgemmTN(M, N, M, one, a, M, B, ldb, beta, C, ldc);
#else
Mjoin(Mjoin(PATL,sycopy),UploNM)(M, A, lda, a);
CgemmTN(M, N, M, valpha, a, M, B, ldb, vbeta, C, ldc);
#endif
free(va);
}
else Mjoin(PATL,refsymm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
beta, C, ldc);
}
开发者ID:Leobin7,项目名称:Kaldi,代码行数:34,代码来源:ATL_symmL.c
示例5: Mjoin
int Mjoin(PATL,syr2kLT)
#endif
(const int N, const int K, const void *valpha, const void *A, const int lda,
const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
int i;
void *vc=NULL;
TYPE *c;
#ifdef TREAL
const SCALAR alpha=*( (const SCALAR *)valpha );
const SCALAR beta =*( (const SCALAR *)vbeta );
const SCALAR one=1.0, zero=0.0;
#else
#define alpha valpha
const TYPE *beta=vbeta;
const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
#endif
i = ATL_MulBySize(N)*N;
if (i <= ATL_MaxMalloc) vc = malloc(ATL_Cachelen+i);
if (vc == NULL) return(1);
c = ATL_AlignPtr(vc);
CgemmTN(N, N, K, alpha, A, lda, B, ldb, zero, c, N);
if ( SCALAR_IS_ONE(beta) ) Mjoin(syr2k_put,_b1)(N, c, beta, C, ldc);
else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr2k_put,_b0)(N, c, beta, C, ldc);
#ifdef TCPLX
else if (SCALAR_IS_NONE(beta)) Mjoin(syr2k_put,_bn1)(N, c, beta, C, ldc);
else if (beta[1] == *zero) Mjoin(syr2k_put,_bXi0)(N, c, beta, C, ldc);
#endif
else Mjoin(syr2k_put,_bX)(N, c, beta, C, ldc);
free(vc);
return(0);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:33,代码来源:ATL_syr2k_T.c
示例6: Mjoin
void Mjoin(Mjoin(Mjoin(PATL,syrk),UploNM),T)
(const int N, const int K, const void *valpha, const void *A, const int lda,
const void *vbeta, void *C, const int ldc)
{
void *vc;
TYPE *c;
#ifdef TREAL
const SCALAR alpha=*( (const SCALAR *)valpha );
const SCALAR beta =*( (const SCALAR *)vbeta );
const SCALAR one=1.0, zero=0.0;
#else
#define alpha valpha
const TYPE *beta=vbeta;
const TYPE one[2]={1.0,0.0}, zero[2]={0.0,0.0};
#endif
if (K > SYRK_Xover)
{
vc = malloc(ATL_Cachelen+ATL_MulBySize(N)*N);
ATL_assert(vc);
c = ATL_AlignPtr(vc);
CgemmTN(N, N, K, alpha, A, lda, A, lda, zero, c, N);
if ( SCALAR_IS_ONE(beta) ) Mjoin(syr_put,_b1)(N, c, beta, C, ldc);
else if ( SCALAR_IS_ZERO(beta) ) Mjoin(syr_put,_b0)(N, c, beta, C, ldc);
#ifdef TCPLX
else if ( SCALAR_IS_NONE(beta) )
Mjoin(syr_put,_bn1)(N, c, beta, C, ldc);
else if (beta[1] == *zero) Mjoin(syr_put,_bXi0)(N, c, beta, C, ldc);
#endif
else Mjoin(syr_put,_bX)(N, c, beta, C, ldc);
free(vc);
}
else Mjoin(PATL,refsyrk)(Uplo_, AtlasTrans, N, K, alpha, A, lda,
beta, C, ldc);
}
开发者ID:certik,项目名称:vendor,代码行数:35,代码来源:ATL_syrk_T.c
示例7: Mjoin
void Mjoin(Mjoin(PATL,symmR),UploNM)
(const int M, const int N, const void *valpha, const void *A, const int lda,
const void *B, const int ldb, const void *vbeta, void *C, const int ldc)
{
#ifdef TREAL
const SCALAR alpha=*( (const SCALAR *)valpha );
const SCALAR beta =*( (const SCALAR *)vbeta );
const SCALAR one=1.0;
#else
#define alpha valpha
#define beta vbeta
#endif
void *va;
TYPE *a;
if (M > SYMM_Xover)
{
va = malloc(ATL_Cachelen + ATL_MulBySize(N)*N);
ATL_assert(va);
a = ATL_AlignPtr(va);
#ifdef TREAL
if ( SCALAR_IS_ONE(alpha) )
Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_a1)(N, alpha, A, lda, a);
else Mjoin(Mjoin(Mjoin(PATL,sycopy),UploNM),_aX)(N, alpha, A, lda, a);
ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, one, B, ldb, a, N, beta, C, ldc);
#else
Mjoin(Mjoin(PATL,sycopy),UploNM)(N, A, lda, a);
ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, N, valpha, B, ldb, a, N, vbeta, C, ldc);
#endif
free(va);
}
else Mjoin(PATL,refsymm)(AtlasRight, Uplo_, M, N, alpha, A, lda, B, ldb,
beta, C, ldc);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:34,代码来源:ATL_symmR.c
示例8: Mjoin
void Mjoin(Mjoin(PATL,trmmL),ATLP)
(const int M, const int N, const void *valpha, const void *A, const int lda,
void *C, const int ldc)
{
#ifdef TREAL
const SCALAR alpha=*( (const SCALAR *)valpha );
const SCALAR one=1.0, zero=0.0;
#else
const TYPE zero[2]={0.0,0.0};
#define alpha valpha
#endif
void *va;
TYPE *a;
if (N > TRMM_Xover)
{
va = malloc(ATL_Cachelen + ATL_MulBySize(M)*M);
ATL_assert(va);
a = ATL_AlignPtr(va);
#ifdef TREAL
if ( SCALAR_IS_ONE(alpha) ) Mjoin(ATL_trcopy,_a1)(M, alpha, A, lda, a);
else Mjoin(ATL_trcopy,_aX)(M, alpha, A, lda, a);
CAgemmTN(M, N, M, one, a, M, C, ldc, zero, C, ldc);
#else
ATL_trcopy(M, A, lda, a);
CAgemmTN(M, N, M, valpha, a, M, C, ldc, zero, C, ldc);
#endif
free(va);
}
else Mjoin(PATL,reftrmm)(AtlasLeft, Uplo_, Trans_, Unit_, M, N, alpha,
A, lda, C, ldc);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:32,代码来源:ATL_trmmL.c
示例9: cblas_zgerc
void cblas_zgerc(const enum CBLAS_ORDER Order, const int M, const int N,
const void *alpha, const void *X, const int incX,
const void *Y, const int incY, void *A, const int lda)
{
int info = 2000;
const double *x = X, *y = Y;
void *vy;
double *y0;
double one[2] = {ATL_rone, ATL_rzero};
#ifndef NoCblasErrorChecks
if (M < 0) info = cblas_errprn(2, info,
"M cannot be less than zero; is set to %d.", M);
if (N < 0) info = cblas_errprn(3, info,
"N cannot be less than zero; is set to %d.", N);
if (!incX) info = cblas_errprn(6, info,
"incX cannot be zero; is set to %d.", incX);
if (!incY) info = cblas_errprn(8, info,
"incY cannot be zero; is set to %d.", incY);
if (Order == CblasColMajor)
{
if (lda < M || lda < 1)
info = cblas_errprn(10, info, "lda must be >= MAX(M,1): lda=%d M=%d",
lda, M);
}
else if (Order == CblasRowMajor)
{
if (lda < N || lda < 1)
info = cblas_errprn(10, info, "lda must be >= MAX(N,1): lda=%d M=%d",
lda, N);
}
else
info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
CblasRowMajor, CblasColMajor, Order);
if (info != 2000)
{
cblas_xerbla(info, "cblas_zgerc", "");
return;
}
#endif
if (incX < 0) x += (1-M)*incX<<1;
if (incY < 0) y += (1-N)*incY<<1;
if (Order == CblasColMajor)
ATL_zgerc(M, N, alpha, x, incX, y, incY, A, lda);
else
{
vy = malloc(ATL_Cachelen + ATL_MulBySize(N));
ATL_assert(vy);
y0 = ATL_AlignPtr(vy);
ATL_zmoveConj(N, alpha, y, incY, y0, 1);
ATL_zgeru(N, M, one, y0, 1, x, incX, A, lda);
free(vy);
}
}
开发者ID:GorgonCryoEM,项目名称:Gorgon-CVS,代码行数:56,代码来源:cblas_zgerc.c
示例10: cblas_zher
void cblas_zher(const enum CBLAS_ORDER Order, const enum CBLAS_UPLO Uplo,
const int N, const double alpha,
const void *X, const int incX, void *A, const int lda)
{
int info = 2000;
void *vx;
double one[2] = {ATL_rone, ATL_rzero};
double *x0;
const double *x=X;
#ifndef NoCblasErrorChecks
if (Order != CblasColMajor && Order != CblasRowMajor)
info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
CblasRowMajor, CblasColMajor, Order);
if (Uplo != CblasUpper && Uplo != CblasLower)
info = cblas_errprn(2, info, "UPLO must be %d or %d, but is set to %d",
CblasUpper, CblasLower, Uplo);
if (N < 0) info = cblas_errprn(3, info,
"N cannot be less than zero; is set to %d.", N);
if (!incX) info = cblas_errprn(6, info,
"incX cannot be zero; is set to %d.", incX);
if (lda < N || lda < 1)
info = cblas_errprn(8, info, "lda must be >= MAX(N,1): lda=%d N=%d",
lda, N);
if (info != 2000)
{
cblas_xerbla(info, "cblas_zher", "");
return;
}
#endif
if (incX < 0) x += (1-N)*incX<<1;
if (Order == CblasColMajor)
ATL_zher(Uplo, N, alpha, x, incX, A, lda);
else if (alpha != ATL_rzero)
{
vx = malloc(ATL_Cachelen + ATL_MulBySize(N));
ATL_assert(vx);
x0 = ATL_AlignPtr(vx);
ATL_zmoveConj(N, one, x, incX, x0, 1);
ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
N, alpha, x0, 1, A, lda);
free(vx);
}
else
ATL_zher(( (Uplo == CblasUpper) ? CblasLower : CblasUpper ),
N, ATL_rzero, x, incX, A, lda);
}
开发者ID:apollos,项目名称:atlas,代码行数:49,代码来源:cblas_zher.c
示例11: ATL_flushcache
double ATL_flushcache(long long size)
/*
* flush cache by reading enough mem; note that if the compiler gets
* really smart, may be necessary to make vp a global variable so it
* can't figure out it's not being modified other than during setup;
* the fact that ATL_dzero is external will confuse most compilers
*/
{
static void *vp=NULL;
static long long N = 0;
double *cache;
double dret=0.0;
size_t i;
if (size < 0) /* flush cache */
{
ATL_assert(vp);
cache = ATL_AlignPtr(vp);
if (N > 0) for (i=0; i != N; i++) dret += cache[i];
}
else if (size > 0) /* initialize */
{
vp = malloc(ATL_Cachelen + size);
ATL_assert(vp);
N = size / sizeof(double);
cache = ATL_AlignPtr(vp);
ATL_dzero(N, cache, 1);
}
else if (size == 0) /* free cache */
{
if (vp) free(vp);
vp = NULL;
N = 0;
}
return(dret);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:36,代码来源:ATL_flushcache.c
示例12: Mjoin
void Mjoin(Mjoin(PATL,hemmL),UploNM)
(const int M, const int N, const void *alpha, const void *A, const int lda,
const void *B, const int ldb, const void *beta, void *C, const int ldc)
{
TYPE *a;
void *va;
if (N > HEMM_Xover)
{
va = malloc(ATL_Cachelen + (ATL_MulBySize(M)*M));
ATL_assert(va);
a = ATL_AlignPtr(va);
Mjoin(Mjoin(PATL,hecopy),UploNM)(M, A, lda, a);
ATL_ammm(AtlasNoTrans, AtlasNoTrans, M, N, M, alpha, a, M, B, ldb,
beta, C, ldc);
free(va);
}
else Mjoin(PATL,refhemm)(AtlasLeft, Uplo_, M, N, alpha, A, lda, B, ldb,
beta, C, ldc);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:20,代码来源:ATL_hemmL.c
示例13: ATL_ptflushcache
double ATL_ptflushcache(long long size)
/*
* flush cache by reading enough mem; note that if the compiler gets
* really smart, may be necessary to make vp a global variable so it
* can't figure out it's not being modified other than during setup;
* the fact that ATL_dzero is external will confuse most compilers
*/
{
static void *vp=NULL;
static double *cache=NULL;
double dret=0.0;
static long long i, N = 0;
ATL_FC fct[ATL_NTHREADS];
if (size < 0) /* flush cache */
{
ATL_assert(cache);
for (i=0; i < ATL_NTHREADS; i++)
{
fct[i].N = N;
fct[i].dp = cache+i*N;
}
ATL_goparallel(ATL_NTHREADS, ATL_DoWorkFC, fct, NULL);
}
else if (size > 0) /* initialize */
{
vp = malloc(ATL_Cachelen + (size * ATL_NTHREADS));
ATL_assert(vp);
cache = ATL_AlignPtr(vp);
N = size / sizeof(double);
ATL_dzero(N*ATL_NTHREADS, cache, 1);
}
else if (size == 0) /* free cache */
{
if (vp) free(vp);
vp = cache = NULL;
N = 0;
}
return(dret);
}
开发者ID:kevinoid,项目名称:atlas-debian,代码行数:40,代码来源:ATL_ptflushcache.c
示例14: Mjoin
void Mjoin(Mjoin(PATL,trsmR),ATLP)
(const int M, const int N, const void *valpha, const void *A, const int lda,
void *C, const int ldc)
{
const TYPE *alpha=valpha;
#ifdef TREAL
#if defined(Transpose_) || defined(ConjTrans_)
if ( M > (N<<2) )
{
void *va;
TYPE *a;
va = malloc(ATL_Cachelen + (ATL_MulBySize(N*N)));
ATL_assert(va);
a = ATL_AlignPtr(va);
#ifdef TREAL
Mjoin(ATL_trcopy,_a1)(N, ATL_rone, A, lda, a);
#else
ATL_trcopy(N, A, lda, a);
#endif
Mjoin(Mjoin(PATL,trsmKR),ATLPt)(M, N, *alpha, a, N, C, ldc);
free(va);
}
else Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, *alpha,
A, lda, C, ldc);
#else
Mjoin(Mjoin(PATL,trsmKR),ATLP)(M, N, *alpha, A, lda, C, ldc);
#endif
#else
if (M > (N<<2) && N <= 4)
Mjoin(PATL,CtrsmKR)(Uplo_, Trans_, Unit_, M, N, valpha, A, lda, C, ldc);
else
Mjoin(PATL,reftrsm)(AtlasRight, Uplo_, Trans_, Unit_, M, N, alpha,
A, lda, C, ldc);
#endif
}
开发者ID:onenoc,项目名称:QuantSoftwareToolkit,代码行数:36,代码来源:ATL_trsmR.c
示例15: Mjoin
int Mjoin(PC2F,ormrq)
(const enum CBLAS_SIDE Side, const enum CBLAS_TRANSPOSE TA,
ATL_CINT M, ATL_CINT N, ATL_CINT K, TYPE *A, ATL_CINT lda, TYPE *TAU,
TYPE *C, ATL_CINT ldc)
{
TYPE work[2];
void *vp;
TYPE *wrk;
ATL_INT lwrk;
int iret;
/*
* Query routine for optimal workspace, allocate it, and call routine with it
*/
ATL_assert(!Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,
work, -1));
lwrk = work[0];
vp = malloc(ATL_MulBySize(lwrk) + ATL_Cachelen);
ATL_assert(vp);
wrk = ATL_AlignPtr(vp);
iret = Mjoin(PC2F,ormrq_wrk)(Side, TA, M, N, K, A, lda, TAU, C, ldc,
wrk, lwrk);
free(vp);
return(iret);
}
开发者ID:apollos,项目名称:atlas,代码行数:24,代码来源:ATL_C2Formrq.c
示例16: clapack_sgetri
int clapack_sgetri(const enum CBLAS_ORDER Order, const int N, float *A,
const int lda, const int *ipiv)
{
int ierr=0, lwrk;
int Mjoin(PATL,GetNB)();
void *vp;
lwrk = Mjoin(PATL,GetNB)();
if (lwrk <= N) lwrk *= N;
else lwrk = N*N;
vp = malloc(ATL_Cachelen + ATL_MulBySize(lwrk));
if (vp)
{
ierr = ATL_getri(Order, N, A, lda, ipiv, ATL_AlignPtr(vp), &lwrk);
free(vp);
}
else
{
cblas_xerbla(7, "clapack_sgetri",
"Cannot allocate workspace of %d\n", lwrk);
return(-7);
}
return(ierr);
}
开发者ID:apollos,项目名称:atlas,代码行数:24,代码来源:clapack_sgetri.c
示例17: cblas_cger2c
void cblas_cger2c(const enum CBLAS_ORDER Order, ATL_CINT M, ATL_CINT N,
const void *alpha, const void *X, ATL_CINT incX,
const void *Y, ATL_CINT incY, const void *beta,
const void *W, ATL_CINT incW,
const void *Z, ATL_CINT incZ, void *A, ATL_CINT lda)
{
int info = 2000;
const float *x = X, *y = Y, *w = W, *z = Z;
void *vy;
float *y0, *z0;
float one[2] = {ATL_rone, ATL_rzero};
#ifndef NoCblasErrorChecks
if (M < 0) info = cblas_errprn(2, info,
"M cannot be less than zero; is set to %d.", M);
if (N < 0) info = cblas_errprn(3, info,
"N cannot be less than zero; is set to %d.", N);
if (!incX) info = cblas_errprn(6, info,
"incX cannot be zero; is set to %d.", incX);
if (!incY) info = cblas_errprn(8, info,
"incY cannot be zero; is set to %d.", incY);
if (!incW) info = cblas_errprn(11, info,
"incW cannot be zero; is set to %d.", incW);
if (!incZ) info = cblas_errprn(13, info,
"incZ cannot be zero; is set to %d.", incZ);
if (Order == CblasColMajor)
{
if (lda < M || lda < 1)
info = cblas_errprn(15, info, "lda must be >= MAX(M,1): lda=%d M=%d",
lda, M);
}
else if (Order == CblasRowMajor)
{
if (lda < N || lda < 1)
info = cblas_errprn(15, info, "lda must be >= MAX(N,1): lda=%d M=%d",
lda, N);
}
else
info = cblas_errprn(1, info, "Order must be %d or %d, but is set to %d",
CblasRowMajor, CblasColMajor, Order);
if (info != 2000)
{
cblas_xerbla(info, "cblas_cger2c", "");
return;
}
#endif
if (incX < 0) x += (1-M)*incX<<1;
if (incY < 0) y += (1-N)*incY<<1;
if (incW < 0) w += (1-M)*incW<<1;
if (incZ < 0) z += (1-N)*incZ<<1;
if (Order == CblasColMajor)
ATL_cger2c(M, N, alpha, x, incX, y, incY, beta, w, incW, z, incZ, A, lda);
else
{
vy = malloc(ATL_Cachelen+ATL_Cachelen + ATL_MulBySize(N+N));
ATL_assert(vy);
y0 = ATL_AlignPtr(vy);
z0 = y0 + N;
z0 = ATL_AlignPtr(z0);
ATL_cmoveConj(N, alpha, y, incY, y0, 1);
ATL_cmoveConj(N, alpha, z, incZ, z0, 1);
ATL_cger2u(N, M, one, y0, 1, x, incX, beta, w, incW, z, incZ, A, lda);
free(vy);
}
}
开发者ID:apollos,项目名称:atlas,代码行数:67,代码来源:cblas_cger2c.c
示例18: Mmin
//.........这里部分代码省略.........
nb = clapack_ilaenv(LAIS_OPT_NB, LAormqr, MYOPT+LARight+LAUpper, M, N, K,-1);
/*
* If it is a workspace query, return the size of work required.
* wrksz = wrksz of ATL_larfb + ATL_larft + ATL_geqr2
*/
if (LWORK < 0)
{
if(SIDE == CblasLeft)
{
*WORK = ( N*nb + nb*nb + maxMN ) ;
}
else
{
*WORK = ( M*nb + nb*nb + maxMN ) ;
}
return(0);
}
else if (M < 1 || N < 1) /* quick return if no work to do */
return(0);
/*
* If the user gives us too little space, see if we can allocate it ourselves
*/
else
{
if(SIDE == CblasLeft)
{
if (LWORK < (N*nb + nb*nb + maxMN))
{
vp = malloc(ATL_MulBySize(N*nb + nb*nb + maxMN) + ATL_Cachelen);
if (!vp)
return(-7);
WORK = ATL_AlignPtr(vp);
}
}
else
{
if (LWORK < (M*nb + nb*nb + maxMN))
{
vp = malloc(ATL_MulBySize(M*nb + nb*nb + maxMN) + ATL_Cachelen);
if (!vp)
return(-7);
WORK = ATL_AlignPtr(vp);
}
} /* if CblasRight */
}
/*
* Assign workspace areas for ATL_larft, ATL_geqr2, ATL_larfb
*/
ws_T = WORK; /* T at begining of work */
ws_QR2 = WORK +(nb SHIFT)*nb; /* After T Work space */
ws_larfb = ws_QR2 + (maxMN SHIFT); /* After workspace for T and QR2 */
if (SIDE == CblasLeft)
{
if ( TRANS == CblasNoTrans )
{
j = (K/nb)*nb;
if (j == K)
{
j=K -nb;
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:67,代码来源:ATL_ormqr.c
示例19: Mjoin
int Mjoin(PATL,mmJKI)(const enum ATLAS_TRANS TA, const enum ATLAS_TRANS TB,
const int M, const int N, const int K,
const SCALAR alpha, const TYPE *A, const int lda,
const TYPE *B, const int ldb, const SCALAR beta,
TYPE *C, const int ldc)
/*
* This gemm is for small K, so we build gemm out of AXPY (outer product)
* rather than dot (inner product).
*/
{
int Mp, mp, m, k, ldaa=lda;
void *vA=NULL;
TYPE *pA;
const TYPE CONE[2]={ATL_rone, ATL_rzero}, CNONE[2]={ATL_rnone, ATL_rzero};
const SCALAR alp=alpha;
/*
* Compute M partition necessary to promote reuse in the L1 cache. Check
* NB^2 in addition to L1elts, to catch machines where L1 is not used by FPU.
* If this gives a small Mp, use CacheEdge instead (reuse in L2 instead of L1).
*/
Mp = NB*NB;
m = ATL_L1elts >> 1;
Mp = (m > Mp) ? m : Mp;
Mp /= ((K+2)<<1);
if (Mp < 128)
{
#if !defined(CacheEdge) || CacheEdge == 0
Mp = M;
#else
Mp = (CacheEdge) / ((K+2)*ATL_sizeof);
if (Mp < 128)
Mp = M;
#endif
}
if (Mp > M)
Mp = M;
/*
* Change Mp if remainder is very small
*/
else
{
Mp -= 16; /* small safety margin on filling cache */
mp = M / Mp;
m = M - mp*Mp;
if (m && m < 32)
Mp += (m+mp-1)/mp;
}
/*
* If A not in NoTrans format, need to copy so it can use axpy wt stride=1.
* NOTE: this routine should not be called when you can't afford this copy
*/
if (TA != AtlasNoTrans)
{
vA = malloc(ATL_Cachelen + Mp*ATL_MulBySize(K));
if (!vA) return(-1);
pA = ATL_AlignPtr(vA);
alp = CONE;
ldaa = Mp;
pA += Mp+Mp;
}
else
pA = (TYPE *) A;
for (m=0; m < M; m += Mp)
{
mp = M - m;
if (mp > Mp)
mp = Mp;
/*
* If the thing is in Trans format, copy to NoTrans
*/
if (vA)
{
pA -= (Mp+Mp);
if (TA == AtlasConjTrans)
{
for (k=0; k < K; k++)
{
Mjoin(PATL,copy)(mp, A+k+k, lda, pA+((k*ldaa)<<1), 1);
Mjoin(PATLU,scal)(mp, ATL_rnone, pA+1+((k*ldaa)<<1), 2);
if (!SCALAR_IS_ONE(alpha))
Mjoin(PATL,scal)(mp, alpha, pA+((k*ldaa)<<1), 1);
}
}
else
{
for (k=0; k < K; k++)
Mjoin(PATL,cpsc)(mp, alpha, A+k+k, lda, pA+((k*ldaa)<<1), 1);
}
A += mp*(lda+lda);
}
Mjoin(PATL,mm_axpy)(AtlasNoTrans, TB, mp, N, K, alp, pA, ldaa, B, ldb,
beta, C, ldc);
pA += mp+mp;
C += mp+mp;
}
if (vA) free(vA);
return(0);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:99,代码来源:ATL_cmmJKI.c
示例20: ATL_her
void ATL_her(const enum ATLAS_UPLO Uplo, ATL_CINT N, const TYPE alpha,
const TYPE *X, ATL_CINT incX, TYPE *A, ATL_CINT lda)
{
const TYPE calpha[2] = {alpha, ATL_rzero};
void *vp=NULL;
TYPE *x, *xt;
ATL_r1kern_t gerk;
ATL_INT CacheElts;
const int ALP1 = (alpha == ATL_rone);
int COPYX, COPYXt;
int mu, nu, minM, minN, alignX, alignXt, FNU, ALIGNX2A;
if (N < 1 || (alpha == ATL_rzero))
return;
/*
* For very small problems, avoid overhead of func calls & data copy
*/
if (N < 50)
{
Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);
return;
}
/*
* Determine the GER kernel to use, and its parameters
*/
gerk = ATL_GetR1Kern(N-ATL_s1L_NU, ATL_s1L_NU, A, lda, &mu, &nu,
&minM, &minN, &alignX, &ALIGNX2A, &alignXt,
&FNU, &CacheElts);
/*
* Determine if we need to copy the vectors
*/
COPYX = (incX != 1);
if (!COPYX) /* may still need to copy due to alignment issues */
{
/*
* ATL_Cachelen is the highest alignment that can be requested, so
* make X's % with Cachelen match that of A if you want A & X to have
* the same alignment
*/
if (ALIGNX2A)
{
size_t t1 = (size_t) A, t2 = (size_t) X;
COPYX = (t1 - ATL_MulByCachelen(ATL_DivByCachelen(t1))) !=
(t2 - ATL_MulByCachelen(ATL_DivByCachelen(t2)));
}
else if (alignX)
{
size_t t1 = (size_t) X;
COPYX = ((t1/alignX)*alignX != t1);
}
}
vp = malloc((ATL_Cachelen+ATL_MulBySize(N))*(1+COPYX));
if (!vp)
{
Mjoin(PATL,refher)(Uplo, N, alpha, X, incX, A, lda);
return;
}
xt = ATL_AlignPtr(vp);
if (COPYX)
{
x = xt + N+N;
x = ALIGNX2A ? ATL_Align2Ptr(x, A) : ATL_AlignPtr(x);
Mjoin(PATL,copy)(N, X, incX, x, 1);
}
else
x = (TYPE*) X;
if (ALP1)
Mjoin(PATL,copyConj)(N, X, incX, xt, 1);
else
Mjoin(PATL,moveConj)(N, calpha, X, incX, xt, 1);
if (Uplo == AtlasUpper)
Mjoin(PATL,her_kU)(gerk, N, alpha, x, xt, A, lda);
else
Mjoin(PATL,her_kL)(gerk, N, alpha, x, xt, A, lda);
if (vp)
free(vp);
}
开发者ID:AIDman,项目名称:Kaldi,代码行数:76,代码来源:ATL_her.c
注:本文中的ATL_AlignPtr函数示例整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。 |
请发表评论