本文整理汇总了C++中teuchos::BLAS类的典型用法代码示例。如果您正苦于以下问题:C++ BLAS类的具体用法?C++ BLAS怎么用?C++ BLAS使用的例子?那么恭喜您, 这里精选的类代码示例或许可以为您提供帮助。
在下文中一共展示了BLAS类的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的C++代码示例。
示例1: main
int main(int argc, char* argv[])
{
#ifdef HAVE_MPI
MPI_Init(&argc,&argv);
#endif
// Creating an instance of the BLAS class for double-precision kernels looks like:
Teuchos::BLAS<int, double> blas;
// This instance provides the access to all the BLAS kernels listed in Figure \ref{blas_kernels}:
const int n = 10;
double alpha = 2.0;
double x[ n ];
for ( int i=0; i<n; i++ ) { x[i] = i; }
blas.SCAL( n, alpha, x, 1 );
int max_idx = blas.IAMAX( n, x, 1 );
cout<< "The index of the maximum magnitude entry of x[] is the "
<< max_idx <<"-th and x[ " << max_idx-1 << " ] = "<< x[max_idx-1]
<< endl;
#ifdef HAVE_MPI
MPI_Finalize();
#endif
return 0;
}
开发者ID:EllieGong,项目名称:trilinos,代码行数:25,代码来源:ex2.cpp
示例2: main
int main(int argc, char* argv[])
{
std::cout << Teuchos::Teuchos_Version() << std::endl << std::endl;
// Creating an instance of the BLAS class for double-precision kernels looks like:
Teuchos::BLAS<int, double> blas;
// This instance provides the access to all the BLAS kernels like _SCAL.
const int n = 10;
double alpha = 2.0;
double x[ n ];
for ( int i=0; i<n; i++ ) { x[i] = i; }
blas.SCAL( n, alpha, x, 1 );
int max_idx = blas.IAMAX( n, x, 1 );
std::cout<< "The index of the maximum magnitude entry of x[] is the "
<< max_idx <<"-th and x[ " << max_idx-1 << " ] = "<< x[max_idx-1]
<< std::endl;
return 0;
}
开发者ID:Blevs,项目名称:Trilinos_tutorial,代码行数:26,代码来源:Teuchos_BLAS.cpp
示例3: timer
double
do_time_teuchos_fad_dot(unsigned int m, unsigned int ndot, unsigned int nloop)
{
Sacado::Random<double> urand(0.0, 1.0);
Teuchos::BLAS<int,FadType> blas;
std::vector<FadType> X(m), Y(m);
for (unsigned int i=0; i<m; i++) {
X[i] = FadType(ndot, urand.number());
Y[i] = FadType(ndot, urand.number());
for (unsigned int k=0; k<ndot; k++) {
X[i].fastAccessDx(k) = urand.number();
Y[i].fastAccessDx(k) = urand.number();
}
}
Teuchos::Time timer("Teuchos Fad DOT", false);
timer.start(true);
for (unsigned int j=0; j<nloop; j++) {
FadType z = blas.DOT(m, &X[0], 1, &Y[0], 1);
}
timer.stop();
return timer.totalElapsedTime() / nloop;
}
开发者ID:00liujj,项目名称:trilinos,代码行数:25,代码来源:fad_blas.cpp
示例4: computeIntegral
/*
Computes integrals of monomials over a given reference cell.
*/
double computeIntegral(shards::CellTopology & cellTopology, int cubDegree, int xDeg, int yDeg, int zDeg) {
DefaultCubatureFactory<double> cubFactory; // create factory
Teuchos::RCP<Cubature<double> > myCub = cubFactory.create(cellTopology, cubDegree); // create default cubature
double val = 0.0;
int cubDim = myCub->getDimension();
int numCubPoints = myCub->getNumPoints();
FieldContainer<double> point(cubDim);
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
FieldContainer<double> functValues(numCubPoints);
myCub->getCubature(cubPoints, cubWeights);
for (int i=0; i<numCubPoints; i++) {
for (int j=0; j<cubDim; j++) {
point(j) = cubPoints(i,j);
}
functValues(i) = computeMonomial(point, xDeg, yDeg, zDeg);
}
Teuchos::BLAS<int, double> myblas;
int inc = 1;
val = myblas.DOT(numCubPoints, &functValues[0], inc, &cubWeights[0], inc);
return val;
}
开发者ID:00liujj,项目名称:trilinos,代码行数:32,代码来源:test_05.cpp
示例5: GEMM
static void
GEMM (const Teuchos::ETransp transA,
const Teuchos::ETransp transB,
const Scalar& alpha,
const View<const Scalar**, LayoutLeft, DeviceType>& A,
const View<const Scalar**, LayoutLeft, DeviceType>& B,
const Scalar& beta,
const View<Scalar**, LayoutLeft, DeviceType>& C)
{
const int n = static_cast<int> (C.dimension_1 ());
const int lda = static_cast<int> (Impl::getStride2DView (A));
Teuchos::BLAS<int,Scalar> blas;
// For some BLAS implementations (e.g., MKL), GEMM when B has
// one column may be signficantly less efficient than GEMV.
if (n == 1 && transB == Teuchos::NO_TRANS) {
blas.GEMV (transA, A.dimension_0 (), A.dimension_1 (),
alpha, A.ptr_on_device (), lda,
B.ptr_on_device (), static_cast<int> (1),
beta, C.ptr_on_device (), static_cast<int> (1));
}
else {
const int m = static_cast<int> (C.dimension_0 ());
const int k = static_cast<int> (transA == Teuchos::NO_TRANS ?
A.dimension_1 () : A.dimension_0 ());
const int ldb = static_cast<int> (Impl::getStride2DView (B));
const int ldc = static_cast<int> (Impl::getStride2DView (C));
blas.GEMM (transA, transB, m, n, k, alpha,
A.ptr_on_device(), lda,
B.ptr_on_device(), ldb,
beta, C.ptr_on_device(), ldc);
}
}
开发者ID:cihanuq,项目名称:Trilinos,代码行数:34,代码来源:Kokkos_MV_GEMM.hpp
示例6:
void
Stokhos::GramSchmidtBasis<ordinal_type, value_type>::
transformCoeffs(const value_type *in, value_type *out) const
{
Teuchos::BLAS<ordinal_type, value_type> blas;
for (ordinal_type i=0; i<sz; i++)
out[i] = in[i];
blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::LOWER_TRI, Teuchos::TRANS,
Teuchos::UNIT_DIAG, sz, 1, 1.0, gs_mat.values(), sz, out, sz);
}
开发者ID:haripandey,项目名称:trilinos,代码行数:10,代码来源:Stokhos_GramSchmidtBasisImp.hpp
示例7:
void
Stokhos::MonoProjPCEBasis<ordinal_type, value_type>::
transformCoeffs(const value_type *in, value_type *out) const
{
// Transform coefficients to normalized basis
Teuchos::BLAS<ordinal_type, value_type> blas;
blas.GEMV(Teuchos::NO_TRANS, pce_sz, this->p+1,
value_type(1.0), basis_vecs.values(), pce_sz,
in, ordinal_type(1), value_type(0.0), out, ordinal_type(1));
// Transform from normalized to original
for (ordinal_type i=0; i<pce_sz; i++)
out[i] /= pce_norms[i];
}
开发者ID:00liujj,项目名称:trilinos,代码行数:14,代码来源:Stokhos_MonoProjPCEBasisImp.hpp
示例8: _Orthogonalize
void Arnoldi::_Orthogonalize(cSDV& q){
Teuchos::BLAS<int, complex<double> > blas;
complex<double> OrthogFactor;
for( int j = 0; j <= _k; ++j ){
OrthogFactor = blas.DOT(_length, q.values(), 1, _Q[j], 1);
_H[_k][j] = OrthogFactor;
complex<double>* iterQ = _Q[j];
complex<double>* iterq = q.values();
for( int i = 0; i < _length; ++i, ++iterQ, ++iterq ){
*iterq -= OrthogFactor*(*iterQ);
}
}
}
开发者ID:jlconlin,项目名称:PhDThesis,代码行数:14,代码来源:Arnoldi.cpp
示例9: defined
KOKKOS_INLINE_FUNCTION
int
Gemm<Trans::ConjTranspose,Trans::NoTranspose,
AlgoGemm::ExternalBlas,Variant::One>
::invoke(PolicyType &policy,
MemberType &member,
const ScalarType alpha,
DenseExecViewTypeA &A,
DenseExecViewTypeB &B,
const ScalarType beta,
DenseExecViewTypeC &C) {
// static_assert( Kokkos::Impl::is_same<
// typename DenseMatrixTypeA::space_type,
// typename DenseMatrixTypeB::space_type
// >::value &&
// Kokkos::Impl::is_same<
// typename DenseMatrixTypeB::space_type,
// typename DenseMatrixTypeC::space_type
// >::value,
// "Space type of input matrices does not match" );
if (member.team_rank() == 0) {
#if \
defined( HAVE_SHYLUTACHO_TEUCHOS ) && \
defined( KOKKOS_ACTIVE_EXECUTION_MEMORY_SPACE_HOST )
typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
typedef typename DenseExecViewTypeA::value_type value_type;
Teuchos::BLAS<ordinal_type,value_type> blas;
const ordinal_type m = C.NumRows();
const ordinal_type n = C.NumCols();
const ordinal_type k = B.NumRows();
if (m > 0 && n > 0 && k > 0)
blas.GEMM(Teuchos::CONJ_TRANS, Teuchos::NO_TRANS,
m, n, k,
alpha,
A.ValuePtr(), A.BaseObject().ColStride(),
B.ValuePtr(), B.BaseObject().ColStride(),
beta,
C.ValuePtr(), C.BaseObject().ColStride());
#else
TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
}
return 0;
}
开发者ID:uppatispr,项目名称:trilinos-official,代码行数:49,代码来源:Tacho_Gemm_ConjTrans_NoTrans_ExternalBlas.hpp
示例10: alpha
double
do_time_teuchos_fad_gemm(unsigned int m, unsigned int n, unsigned int k,
unsigned int ndot, unsigned int nloop)
{
Sacado::Random<double> urand(0.0, 1.0);
Teuchos::BLAS<int,FadType> blas;
std::vector<FadType> A(m*k), B(k*n), C(m*n);
for (unsigned int j=0; j<k; j++) {
for (unsigned int i=0; i<m; i++) {
A[i+j*m] = FadType(ndot, urand.number());
for (unsigned int l=0; l<ndot; l++)
A[i+j*m].fastAccessDx(l) = urand.number();
}
}
for (unsigned int j=0; j<n; j++) {
for (unsigned int i=0; i<k; i++) {
B[i+j*k] = FadType(ndot, urand.number());
for (unsigned int l=0; l<ndot; l++)
B[i+j*k].fastAccessDx(l) = urand.number();
}
}
for (unsigned int j=0; j<n; j++) {
for (unsigned int i=0; i<m; i++) {
C[i+j*m] = FadType(ndot, urand.number());
for (unsigned int l=0; l<ndot; l++)
C[i+j*m].fastAccessDx(l) = urand.number();
}
}
FadType alpha(ndot, urand.number());
FadType beta(ndot, urand.number());
for (unsigned int l=0; l<ndot; l++) {
alpha.fastAccessDx(l) = urand.number();
beta.fastAccessDx(l) = urand.number();
}
Teuchos::Time timer("Teuchos Fad GEMM", false);
timer.start(true);
for (unsigned int j=0; j<nloop; j++) {
blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, m, n, k, alpha, &A[0], m,
&B[0], k, beta, &C[0], m);
}
timer.stop();
return timer.totalElapsedTime() / nloop;
}
开发者ID:00liujj,项目名称:trilinos,代码行数:46,代码来源:fad_blas.cpp
示例11:
KOKKOS_INLINE_FUNCTION
int
Trsm<Side::Left,Uplo::Upper,Trans::NoTranspose,
AlgoTrsm::ExternalBlas,Variant::One>
::invoke(PolicyType &policy,
const MemberType &member,
const int diagA,
const ScalarType alpha,
DenseExecViewTypeA &A,
DenseExecViewTypeB &B) {
// static_assert( Kokkos::Impl::is_same<
// typename DenseMatrixTypeA::space_type,
// Kokkos::Cuda
// >::value,
// "Cuda space is not available for calling external BLAS" );
// static_assert( Kokkos::Impl::is_same<
// typename DenseMatrixTypeA::space_type,
// typename DenseMatrixTypeB::space_type
// >::value,
// "Space type of input matrices does not match" );
//typedef typename DenseExecViewTypeA::space_type space_type;
typedef typename DenseExecViewTypeA::ordinal_type ordinal_type;
typedef typename DenseExecViewTypeA::value_type value_type;
if (member.team_rank() == 0) {
#ifdef HAVE_SHYLUTACHO_TEUCHOS
Teuchos::BLAS<ordinal_type,value_type> blas;
const ordinal_type m = A.NumRows();
const ordinal_type n = B.NumCols();
blas.TRSM(Teuchos::LEFT_SIDE, Teuchos::UPPER_TRI, Teuchos::NO_TRANS,
(diagA == Diag::Unit ? Teuchos::UNIT_DIAG : Teuchos::NON_UNIT_DIAG),
m, n,
alpha,
A.ValuePtr(), A.BaseObject().ColStride(),
B.ValuePtr(), B.BaseObject().ColStride());
#else
TACHO_TEST_FOR_ABORT( true, MSG_NOT_HAVE_PACKAGE("Teuchos") );
#endif
}
return 0;
}
开发者ID:agrippa,项目名称:Trilinos,代码行数:46,代码来源:Tacho_Trsm_Left_Upper_NoTrans_ExternalBlas.hpp
示例12: if
static void
GEMM (const Teuchos::ETransp transA,
const Teuchos::ETransp transB,
const double& alpha,
const View<const double**, LayoutLeft, DeviceType>& A,
const View<const double**, LayoutLeft, DeviceType>& B,
const double& beta,
const View<double**, LayoutLeft, DeviceType>& C)
{
const int n = static_cast<int> (C.dimension_1 ());
// For some BLAS implementations (e.g., MKL), GEMM when B has
// one column may be signficantly less efficient than GEMV.
if (n == 1 && transB == Teuchos::NO_TRANS) {
char trans = 'N';
if (transA == Teuchos::TRANS) {
trans = 'T';
}
else if (transA == Teuchos::CONJ_TRANS) {
trans = 'C';
}
auto B_0 = Kokkos::subview (B, Kokkos::ALL (), 0);
auto C_0 = Kokkos::subview (C, Kokkos::ALL (), 0);
KokkosBlas::gemv (&trans, alpha, A, B_0, beta, C_0);
}
else {
const int m = static_cast<int> (C.dimension_0 ());
const int k = static_cast<int> (transA == Teuchos::NO_TRANS ? A.dimension_1 () : A.dimension_0 ());
const int lda = static_cast<int> (Impl::getStride2DView (A));
const int ldb = static_cast<int> (Impl::getStride2DView (B));
const int ldc = static_cast<int> (Impl::getStride2DView (C));
Teuchos::BLAS<int,double> blas;
blas.GEMM (transA, transB, m, n, k, alpha,
A.ptr_on_device(), lda,
B.ptr_on_device(), ldb,
beta, C.ptr_on_device(), ldc);
}
}
开发者ID:cihanuq,项目名称:Trilinos,代码行数:39,代码来源:Kokkos_MV_GEMM.hpp
示例13: computeIntegral
/*
Computes integrals of monomials over a given reference cell.
*/
void computeIntegral(Teuchos::Array<double>& testIntFixDeg, shards::CellTopology & cellTopology, int cubDegree) {
DefaultCubatureFactory<double> cubFactory; // create factory
Teuchos::RCP<Cubature<double> > myCub = cubFactory.create(cellTopology, cubDegree); // create default cubature
int cubDim = myCub->getDimension();
int numCubPoints = myCub->getNumPoints();
int numPolys = (cubDegree+1)*(cubDegree+2)*(cubDegree+3)/6;
FieldContainer<double> point(cubDim);
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
FieldContainer<double> functValues(numCubPoints, numPolys);
myCub->getCubature(cubPoints, cubWeights);
int polyCt = 0;
for (int xDeg=0; xDeg <= cubDegree; xDeg++) {
for (int yDeg=0; yDeg <= cubDegree-xDeg; yDeg++) {
for (int zDeg=0; zDeg <= cubDegree-xDeg-yDeg; zDeg++) {
for (int i=0; i<numCubPoints; i++) {
for (int j=0; j<cubDim; j++) {
point(j) = cubPoints(i,j);
}
functValues(i,polyCt) = computeMonomial(point, xDeg, yDeg, zDeg);
}
polyCt++;
}
}
}
Teuchos::BLAS<int, double> myblas;
int inc = 1;
double alpha = 1.0;
double beta = 0.0;
myblas.GEMV(Teuchos::NO_TRANS, numPolys, numCubPoints, alpha, &functValues(0,0), numPolys,
&cubWeights(0), inc, beta, &testIntFixDeg[0], inc);
}
开发者ID:KineticTheory,项目名称:Trilinos,代码行数:41,代码来源:test_06.cpp
示例14: computeIntegral
/*
Computes integrals of monomials over a given reference cell.
*/
void computeIntegral(Teuchos::Array<double>& testIntFixDeg, int cubDegree) {
CubatureGenSparse<double,3> myCub(cubDegree);
int cubDim = myCub.getDimension();
int numCubPoints = myCub.getNumPoints();
int numPolys = (cubDegree+1)*(cubDegree+2)*(cubDegree+3)/6;
FieldContainer<double> point(cubDim);
FieldContainer<double> cubPoints(numCubPoints, cubDim);
FieldContainer<double> cubWeights(numCubPoints);
FieldContainer<double> functValues(numCubPoints, numPolys);
myCub.getCubature(cubPoints, cubWeights);
int polyCt = 0;
for (int xDeg=0; xDeg <= cubDegree; xDeg++) {
for (int yDeg=0; yDeg <= cubDegree-xDeg; yDeg++) {
for (int zDeg=0; zDeg <= cubDegree-xDeg-yDeg; zDeg++) {
for (int i=0; i<numCubPoints; i++) {
for (int j=0; j<cubDim; j++) {
point(j) = cubPoints(i,j);
}
functValues(i,polyCt) = computeMonomial(point, xDeg, yDeg, zDeg);
}
polyCt++;
}
}
}
Teuchos::BLAS<int, double> myblas;
int inc = 1;
double alpha = 1.0;
double beta = 0.0;
myblas.GEMV(Teuchos::NO_TRANS, numPolys, numCubPoints, alpha, &functValues(0,0), numPolys,
&cubWeights(0), inc, beta, &testIntFixDeg[0], inc);
}
开发者ID:KineticTheory,项目名称:Trilinos,代码行数:40,代码来源:test_09.cpp
示例15: main
int main(int argc, char **argv)
{
const unsigned int n = 5;
Sacado::Fad::Vector<unsigned int, FadType> A(n*n,0),B(n,n), C(n,n);
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
A[i+j*n] = FadType(Teuchos::ScalarTraits<double>::random());
B[i] = FadType(n, Teuchos::ScalarTraits<double>::random());
for (unsigned int j=0; j<n; j++)
B[i].fastAccessDx(j) = Teuchos::ScalarTraits<double>::random();
C[i] = 0.0;
}
double *a = A.vals();
double *b = B.vals();
double *bdx = B.dx();
std::vector<double> c(n), cdx(n*n);
Teuchos::BLAS<int,double> blas;
blas.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &a[0], n, &b[0], 1, 0.0, &c[0], 1);
blas.GEMM(Teuchos::NO_TRANS, Teuchos::NO_TRANS, n, n, n, 1.0, &a[0], n, &bdx[0], n, 0.0, &cdx[0], n);
// Teuchos::BLAS<int,FadType> blas_fad;
// blas_fad.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &A[0], n, &B[0], 1, 0.0, &C[0], 1);
Teuchos::BLAS<int,FadType> sacado_fad_blas(false);
sacado_fad_blas.GEMV(Teuchos::NO_TRANS, n, n, 1.0, &A[0], n, &B[0], 1, 0.0, &C[0], 1);
// Print the results
int p = 4;
int w = p+7;
std::cout.setf(std::ios::scientific);
std::cout.precision(p);
std::cout << "BLAS GEMV calculation:" << std::endl;
std::cout << "a = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << a[i+j*n];
std::cout << std::endl;
}
std::cout << "b = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << b[i];
}
std::cout << std::endl;
std::cout << "bdot = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << bdx[i+j*n];
std::cout << std::endl;
}
std::cout << "c = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << c[i];
}
std::cout << std::endl;
std::cout << "cdot = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << cdx[i+j*n];
std::cout << std::endl;
}
std::cout << std::endl << std::endl;
std::cout << "FAD BLAS GEMV calculation:" << std::endl;
std::cout << "A.val() (should = a) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << A[i+j*n].val();
std::cout << std::endl;
}
std::cout << "B.val() (should = b) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << B[i].val();
}
std::cout << std::endl;
std::cout << "B.dx() (should = bdot) = " << std::endl;
double *Bdx = B.dx();
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << Bdx[i+j*n];
std::cout << std::endl;
}
std::cout << "C.val() (should = c) = " << std::endl;
for (unsigned int i=0; i<n; i++) {
std::cout << " " << std::setw(w) << C[i].val();
}
std::cout << std::endl;
std::cout << "C.dx() (should = cdot) = " << std::endl;
double *Cdx = C.dx();
for (unsigned int i=0; i<n; i++) {
for (unsigned int j=0; j<n; j++)
std::cout << " " << std::setw(w) << Cdx[i+j*n];
std::cout << std::endl;
}
double tol = 1.0e-14;
bool failed = false;
for (unsigned int i=0; i<n; i++) {
//.........这里部分代码省略.........
开发者ID:00liujj,项目名称:trilinos,代码行数:101,代码来源:vector_blas_example.cpp
示例16: matGen
void
randomGlobalMatrix (Generator* const pGenerator,
MatrixViewType& A_local,
const typename Teuchos::ScalarTraits< typename MatrixViewType::scalar_type >::magnitudeType singular_values[],
MessengerBase< typename MatrixViewType::ordinal_type >* const ordinalMessenger,
MessengerBase< typename MatrixViewType::scalar_type >* const scalarMessenger)
{
using Teuchos::NO_TRANS;
using std::vector;
typedef typename MatrixViewType::ordinal_type ordinal_type;
typedef typename MatrixViewType::scalar_type scalar_type;
const bool b_local_debug = false;
const int rootProc = 0;
const int nprocs = ordinalMessenger->size();
const int myRank = ordinalMessenger->rank();
Teuchos::BLAS<ordinal_type, scalar_type> blas;
const ordinal_type nrowsLocal = A_local.nrows();
const ordinal_type ncols = A_local.ncols();
// Theory: Suppose there are P processors. Proc q wants an m_q by n
// component of the matrix A, which we write as A_q. On Proc 0, we
// generate random m_q by n orthogonal matrices Q_q (in explicit
// form), and send Q_q to Proc q. The m by n matrix [Q_0; Q_1; ...;
// Q_{P-1}] is not itself orthogonal. However, the m by n matrix
// Q = [Q_0 / P; Q_1 / P; ...; Q_{P-1} / P] is orthogonal:
//
// \sum_{q = 0}^{P-1} (Q_q^T * Q_q) / P = I.
if (myRank == rootProc)
{
typedef Random::MatrixGenerator< ordinal_type, scalar_type, Generator > matgen_type;
matgen_type matGen (*pGenerator);
// Generate a random ncols by ncols upper triangular matrix
// R with the given singular values.
Matrix< ordinal_type, scalar_type > R (ncols, ncols, scalar_type(0));
matGen.fill_random_R (ncols, R.get(), R.lda(), singular_values);
// Broadcast R to all the processors.
scalarMessenger->broadcast (R.get(), ncols*ncols, rootProc);
// Generate (for myself) a random nrowsLocal x ncols
// orthogonal matrix, stored in explicit form.
Matrix< ordinal_type, scalar_type > Q_local (nrowsLocal, ncols);
matGen.explicit_Q (nrowsLocal, ncols, Q_local.get(), Q_local.lda());
// Scale the (local) orthogonal matrix by the number of
// processors P, to make the columns of the global matrix Q
// orthogonal. (Otherwise the norm of each column will be P
// instead of 1.)
const scalar_type P = static_cast< scalar_type > (nprocs);
// Do overflow check. If casting P back to scalar_type
// doesn't produce the same value as nprocs, the cast
// overflowed. We take the real part, because scalar_type
// might be complex.
if (nprocs != static_cast<int> (Teuchos::ScalarTraits<scalar_type>::real (P)))
throw std::runtime_error ("Casting nprocs to Scalar failed");
scaleMatrix (Q_local, P);
// A_local := Q_local * R
blas.GEMM (NO_TRANS, NO_TRANS, nrowsLocal, ncols, ncols,
scalar_type(1), Q_local.get(), Q_local.lda(),
R.get(), R.lda(),
scalar_type(0), A_local.get(), A_local.lda());
for (int recvProc = 1; recvProc < nprocs; ++recvProc)
{
// Ask the receiving processor how big (i.e., how many rows)
// its local component of the matrix is.
ordinal_type nrowsRemote = 0;
ordinalMessenger->recv (&nrowsRemote, 1, recvProc, 0);
if (b_local_debug)
{
std::ostringstream os;
os << "For Proc " << recvProc << ": local block is "
<< nrowsRemote << " by " << ncols << std::endl;
std::cerr << os.str();
}
// Make sure Q_local is big enough to hold the data for
// the current receiver proc.
Q_local.reshape (nrowsRemote, ncols);
// Compute a random nrowsRemote * ncols orthogonal
// matrix Q_local, for the current receiving processor.
matGen.explicit_Q (nrowsRemote, ncols, Q_local.get(), Q_local.lda());
// Send Q_local to the current receiving processor.
scalarMessenger->send (Q_local.get(), nrowsRemote*ncols, recvProc, 0);
}
}
else
{
// Receive the R factor from Proc 0. There's only 1 R
//.........这里部分代码省略.........
开发者ID:agrippa,项目名称:Trilinos,代码行数:101,代码来源:Tsqr_Random_GlobalMatrix.hpp
示例17: main
//.........这里部分代码省略.........
Epetra_FEVector u(globalMapG);
Epetra_FEVector Ku(globalMapG);
u.Random();
std::cout << "About to start ref element matrix\n";
// ************************** Compute element HGrad stiffness matrices *******************************
refQuadNodes(0,0,0) = 0.0;
refQuadNodes(0,0,1) = 0.0;
refQuadNodes(0,1,0) = hx;
refQuadNodes(0,1,1) = 0.0;
refQuadNodes(0,2,0) = hx;
refQuadNodes(0,2,1) = hy;
refQuadNodes(0,3,0) = 0.0;
refQuadNodes(0,3,1) = hy;
// Compute cell Jacobians, their inverses and their determinants
CellTools::setJacobian(refQuadJacobian, cubPoints, refQuadNodes, quad_4);
CellTools::setJacobianInv(refQuadJacobInv, refQuadJacobian );
CellTools::setJacobianDet(refQuadJacobDet, refQuadJacobian );
// transform from [-1,1]^2 to [0,hx]x[0,hy]
fst::HGRADtransformGRAD<double>(quadGradsTransformed, refQuadJacobInv, quadGrads);
// compute weighted measure
fst::computeCellMeasure<double>(weightedMeasure, refQuadJacobDet, cubWeights);
// multiply values with weighted measure
fst::multiplyMeasure<double>(quadGradsTransformedWeighted,
weightedMeasure, quadGradsTransformed);
// integrate to compute element stiffness matrix
fst::integrate<double>(localStiffMatrix,
quadGradsTransformed, quadGradsTransformedWeighted, COMP_BLAS);
std::cout << "Finished with reference element matrix\n";
// now we will scatter global degrees of freedom, apply the local stiffness matrix
// with BLAS, and then gather the results
FieldContainer<double> uScattered(numElems,numFieldsG);
FieldContainer<double> KuScattered(numElems,numFieldsG);
// to extract info from u
u.GlobalAssemble();
Epetra_Time multTimer(Comm);
Ku.PutScalar(0.0);
Ku.GlobalAssemble();
double *uVals = u[0];
double *KuVals = Ku[0];
Teuchos::BLAS<int,double> blas;
Epetra_Time scatterTime(Comm);
std::cout << "Scattering\n";
// Scatter
for (int k=0; k<numElems; k++)
{
for (int i=0;i<numFieldsG;i++)
{
uScattered(k,i) = uVals[ltgMapping(k,i)];
}
}
开发者ID:crtrott,项目名称:Trilinos,代码行数:67,代码来源:example_06.cpp
示例18: exampleDenseCholByBlocks
KOKKOS_INLINE_FUNCTION
int exampleDenseCholByBlocks(const OrdinalType mmin,
const OrdinalType mmax,
const OrdinalType minc,
const OrdinalType mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const bool check,
const bool verbose) {
typedef ValueType value_type;
typedef OrdinalType ordinal_type;
typedef SizeType size_type;
typedef TaskFactory<Kokkos::Experimental::TaskPolicy<SpaceType>,
Kokkos::Experimental::Future<int,SpaceType> > TaskFactoryType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,SpaceType,MemoryTraits> DenseMatrixBaseType;
typedef DenseMatrixView<DenseMatrixBaseType> DenseMatrixViewType;
typedef TaskView<DenseMatrixViewType,TaskFactoryType> DenseTaskViewType;
typedef DenseMatrixBase<DenseTaskViewType,ordinal_type,size_type,SpaceType,MemoryTraits> DenseHierMatrixBaseType;
typedef DenseMatrixView<DenseHierMatrixBaseType> DenseHierMatrixViewType;
typedef TaskView<DenseHierMatrixViewType,TaskFactoryType> DenseHierTaskViewType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
cout << "DenseCholByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc << endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewType)+196); // when 128 error
//cout << "max task size = "<< max_task_size << endl;
typename TaskFactoryType::policy_type policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
TaskFactoryType::setMaxTaskDependence(max_task_dependence);
TaskFactoryType::setPolicy(&policy);
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
DenseMatrixBaseType TT("TT", m, m), AA("AA", m, m), AB("AB", m, m);
// random T matrix
for (ordinal_type j=0;j<AA.NumCols();++j) {
for (ordinal_type i=0;i<AA.NumRows();++i)
TT.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
TT.Value(j,j) = abs(TT.Value(j,j));
}
{
Teuchos::BLAS<ordinal_type,value_type> blas;
// should be square
const ordinal_type nn = AA.NumRows();
const ordinal_type kk = TT.NumRows();
blas.HERK(Teuchos::UPPER_TRI, Teuchos::CONJ_TRANS,
nn, kk,
1.0,
TT.ValuePtr(), TT.ColStride(),
0.0,
AA.ValuePtr(), AA.ColStride());
AB.copy(AA);
}
const double flop = get_flop_chol<value_type>(m);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(1);
#endif
int ierr = 0;
if (check) {
timer.reset();
DenseTaskViewType A(&AB);
ierr = Chol<Uplo::Upper,AlgoChol::ExternalLapack>::invoke
(TaskFactoryType::Policy(),
TaskFactoryType::Policy().member_single(),
A);
t = timer.seconds();
if (ierr)
ERROR(">> Fail to perform Cholesky (serial) : no reference solution -> no numeric error information");
cout << "DenseCholByBlocks:: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs]" << endl;
}
{
DenseHierMatrixBaseType HA;
DenseMatrixHelper::flat2hier(AA, HA, mb, mb);
DenseHierTaskViewType TA(&HA);
timer.reset();
auto future = TaskFactoryType::Policy().create_team
(Chol<Uplo::Upper,AlgoChol::DenseByBlocks>
::TaskFunctor<DenseHierTaskViewType>(TA), 0);
TaskFactoryType::Policy().spawn(future);
//.........这里部分代码省略.........
开发者ID:agrippa,项目名称:Trilinos,代码行数:101,代码来源:example_dense_chol_by_blocks.hpp
示例19: exampleDenseCholByBlocks
int exampleDenseCholByBlocks(const ordinal_type mmin,
const ordinal_type mmax,
const ordinal_type minc,
const ordinal_type mb,
const int max_concurrency,
const int max_task_dependence,
const int team_size,
const int mkl_nthreads,
const bool check,
const bool verbose) {
typedef typename
Kokkos::Impl::is_space<DeviceSpaceType>::host_mirror_space::execution_space HostSpaceType ;
const bool detail = false;
std::cout << "DeviceSpace:: "; DeviceSpaceType::print_configuration(std::cout, detail);
std::cout << "HostSpace:: "; HostSpaceType::print_configuration(std::cout, detail);
typedef Kokkos::Experimental::TaskPolicy<DeviceSpaceType> PolicyType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,HostSpaceType> DenseMatrixBaseHostType;
typedef DenseMatrixView<DenseMatrixBaseHostType> DenseMatrixViewHostType;
typedef DenseMatrixBase<value_type,ordinal_type,size_type,DeviceSpaceType> DenseMatrixBaseDeviceType;
typedef DenseMatrixView<DenseMatrixBaseDeviceType> DenseMatrixViewDeviceType;
typedef TaskView<DenseMatrixViewDeviceType> DenseTaskViewDeviceType;
typedef DenseMatrixBase<DenseTaskViewDeviceType,ordinal_type,size_type,DeviceSpaceType> DenseHierMatrixBaseDeviceType;
typedef DenseMatrixView<DenseHierMatrixBaseDeviceType> DenseHierMatrixViewDeviceType;
typedef TaskView<DenseHierMatrixViewDeviceType> DenseHierTaskViewDeviceType;
int r_val = 0;
Kokkos::Impl::Timer timer;
double t = 0.0;
std::cout << "DenseCholByBlocks:: test matrices "
<<":: mmin = " << mmin << " , mmax = " << mmax << " , minc = " << minc
<< " , mb = " << mb << std::endl;
const size_t max_task_size = (3*sizeof(DenseTaskViewDeviceType)+sizeof(PolicyType)+128);
PolicyType policy(max_concurrency,
max_task_size,
max_task_dependence,
team_size);
std::ostringstream os;
os.precision(3);
os << std::scientific;
for (ordinal_type m=mmin;m<=mmax;m+=minc) {
os.str("");
// host matrices
DenseMatrixBaseHostType AA_host("AA_host", m, m), AB_host("AB_host"), TT_host("TT_host");
// random T matrix
{
TT_host.createConfTo(AA_host);
for (ordinal_type j=0;j<TT_host.NumCols();++j) {
for (ordinal_type i=0;i<TT_host.NumRows();++i)
TT_host.Value(i,j) = 2.0*((value_type)rand()/(RAND_MAX)) - 1.0;
TT_host.Value(j,j) = std::fabs(TT_host.Value(j,j));
}
}
// create SPD matrix
{
Teuchos::BLAS<ordinal_type,value_type> blas;
blas.HERK(ArgUplo == Uplo::Upper ? Teuchos::UPPER_TRI : Teuchos::LOWER_TRI,
Teuchos::CONJ_TRANS,
m, m,
1.0,
TT_host.ValuePtr(), TT_host.ColStride(),
0.0,
AA_host.ValuePtr(), AA_host.ColStride());
// preserve a copy of A
AB_host.createConfTo(AA_host);
DenseMatrixTools::copy(AB_host, AA_host);
}
const double flop = DenseFlopCount<value_type>::Chol(m);
#ifdef HAVE_SHYLUTACHO_MKL
mkl_set_num_threads(mkl_nthreads);
#endif
os << "DenseCholByBlocks:: m = " << m << " ";
int ierr = 0;
if (check) {
timer.reset();
DenseMatrixViewHostType A_host(AB_host);
ierr = Chol<ArgUplo,AlgoChol::ExternalLapack,Variant::One>::invoke
(policy, policy.member_single(),
A_host);
t = timer.seconds();
TACHO_TEST_FOR_ABORT( ierr, "Fail to perform Cholesky (serial)");
os << ":: Serial Performance = " << (flop/t/1.0e9) << " [GFLOPs] ";
}
//.........这里部分代码省略.........
开发者ID:agrippa,项目名称:Trilinos,代码行数:101,代码来源:Tacho_ExampleDenseCholByBlocks.hpp
示例20: Xval
void Constraint<Scalar, LocalOrdinal, GlobalOrdinal, Node>::Setup(const MultiVector& B, const MultiVector& Bc, RCP<const CrsGraph> Ppattern) {
const size_t NSDim = Bc.getNumVectors();
Ppattern_ = Ppattern;
size_t numRows = Ppattern_->getNodeNumRows();
XXtInv_.resize(numRows);
RCP<const Import> importer = Ppattern_->getImporter();
X_ = MultiVectorFactory::Build(Ppattern_->getColMap(), NSDim);
if (!importer.is_null())
X_->doImport(Bc, *importer, Xpetra::INSERT);
else
*X_ = Bc;
std::vector<const SC*> Xval(NSDim);
for (size_t j = 0; j < NSDim; j++)
Xval[j] = X_->getData(j).get();
SC zero = Teuchos::ScalarTraits<SC>::zero
|
请发表评论