C++ MPI_Allreduce函数代码示例

OGeek|极客世界-中国程序员成长平台 › 门户 › 编程› C++›C++教程

原作者: [db:作者] 来自: [db:来源] 收藏邀请

本文整理汇总了C++中MPI_Allreduce函数的典型用法代码示例。如果您正苦于以下问题：C++ MPI_Allreduce函数的具体用法？C++ MPI_Allreduce怎么用？C++ MPI_Allreduce使用的例子？那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。

在下文中一共展示了MPI_Allreduce函数的20个代码示例，这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞，您的评价将有助于我们的系统推荐出更棒的C++代码示例。

示例1: main


//.........这里部分代码省略.........
  MPI_Bcast( &nprows, 1, MPI_INT, 0, MPI_COMM_WORLD );
  MPI_Bcast( &npcols, 1, MPI_INT, 0, MPI_COMM_WORLD );

  if ( nprows * npcols != nprocs ){
    printf( "mesh not of right size\n" );
    exit( 0 );
  }
  
  /* Figure out what my index is */
  mycol = me / nprows;
  myrow = me % nprows;

  /* create a communicator for the row of which I am part */
  MPI_Comm_split( MPI_COMM_WORLD, myrow, mycol, &comm_row );

  /* create a communicator for the column of which I am part */
  MPI_Comm_split( MPI_COMM_WORLD, mycol, myrow, &comm_col );

  /* create buffers into which to hold the global A, B, C (everyone will have a copy) */
  global_A = ( double * ) malloc ( sizeof( double ) * n * n );
  global_B = ( double * ) malloc ( sizeof( double ) * n * n );
  global_C = ( double * ) malloc ( sizeof( double ) * n * n );

  /* create random matrices on node zero and share with all nodes */
  if ( me == 0 ){
    random_matrix( n, n, global_A, n );
    random_matrix( n, n, global_B, n );
    random_matrix( n, n, global_C, n );
  }
  MPI_Bcast( global_A, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
  MPI_Bcast( global_B, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );
  MPI_Bcast( global_C, n*n, MPI_DOUBLE, 0, MPI_COMM_WORLD );

  /* compute local matrix sizes */
  local_m   = n / nprows + ( myrow < n % nprows ? 1 : 0 );
  local_n   = n / npcols + ( mycol < n % npcols ? 1 : 0 );

  /* create buffer into which to hold the ocal A, B, C */
  local_A = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
  local_B = ( double * ) malloc ( sizeof( double ) * local_m * local_n );
  local_C = ( double * ) malloc ( sizeof( double ) * local_m * local_n );

  /* copy the local parts */
  CopyMatrixGlobalToLocal( n, n, 
			   global_A, n, 
			   local_A, local_m, 
			   comm_row, comm_col );
  CopyMatrixGlobalToLocal( n, n, 
			   global_B, n, 
			   local_B, local_m, 
			   comm_row, comm_col );
  CopyMatrixGlobalToLocal( n, n, 
			   global_C, n, 
			   local_C, local_m, 
			   comm_row, comm_col );

  /* Compute parallel matrix-matrix multiply */
  ParallelMMult( n, n, n, 
		 local_A, local_m, 
		 local_B, local_m, 
		 local_C, local_m, 
		 comm_row, comm_col );

  /* Compute sequential matrix-matrix multiply on all nodes */
  dgemm_( "N", "N", &n, &n, &n,
  	  &d_one, global_A, &n, global_B, &n,
  	  &d_one, global_C, &n );

  local_C_ref = ( double * ) malloc ( sizeof( double ) * local_m * local_n );

  CopyMatrixGlobalToLocal( n, n, 
			   global_C, n, 
			   local_C_ref, local_m, 
			   comm_row, comm_col );

  local_diff = compare_matrices( local_m, local_n, 
				 local_C, local_m, 
				 local_C_ref, local_m );

  MPI_Allreduce( &local_diff, &diff, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD );

  if ( me == 0 )
    printf("\ndiff = %le\n", diff );

  free( global_A );
  free( global_B );
  free( global_C );
  free( local_A );
  free( local_B );
  free( local_C );
  free( local_C_ref);

  MPI_Comm_free( &comm_row );
  MPI_Comm_free( &comm_col );

  /* Cleanup up the MPI environment */
  MPI_Finalize();

  exit( 0 );
}

开发者ID:scottenriquez，项目名称:parallel-matrix-multiply，代码行数:101，代码来源:driver.c

示例2: triangulateAndFormProl

static PetscErrorCode triangulateAndFormProl(IS selected_2,PetscInt data_stride,PetscReal coords[],PetscInt nselected_1,const PetscInt clid_lid_1[],const PetscCoarsenData *agg_lists_1,
                                             const PetscInt crsGID[],PetscInt bs,Mat a_Prol,PetscReal *a_worst_best)
{
#if defined(PETSC_HAVE_TRIANGLE)
  PetscErrorCode       ierr;
  PetscInt             jj,tid,tt,idx,nselected_2;
  struct triangulateio in,mid;
  const PetscInt       *selected_idx_2;
  PetscMPIInt          rank;
  PetscInt             Istart,Iend,nFineLoc,myFine0;
  int                  kk,nPlotPts,sid;
  MPI_Comm             comm;
  PetscReal            tm;

  PetscFunctionBegin;
  ierr = PetscObjectGetComm((PetscObject)a_Prol,&comm);CHKERRQ(ierr);
  ierr = MPI_Comm_rank(comm,&rank);CHKERRQ(ierr);
  ierr = ISGetSize(selected_2, &nselected_2);CHKERRQ(ierr);
  if (nselected_2 == 1 || nselected_2 == 2) { /* 0 happens on idle processors */
    *a_worst_best = 100.0; /* this will cause a stop, but not globalized (should not happen) */
  } else *a_worst_best = 0.0;
  ierr = MPI_Allreduce(a_worst_best, &tm, 1, MPIU_REAL, MPIU_MAX, comm);CHKERRQ(ierr);
  if (tm > 0.0) {
    *a_worst_best = 100.0;
    PetscFunctionReturn(0);
  }
  ierr     = MatGetOwnershipRange(a_Prol, &Istart, &Iend);CHKERRQ(ierr);
  nFineLoc = (Iend-Istart)/bs; myFine0 = Istart/bs;
  nPlotPts = nFineLoc; /* locals */
  /* traingle */
  /* Define input points - in*/
  in.numberofpoints          = nselected_2;
  in.numberofpointattributes = 0;
  /* get nselected points */
  ierr = PetscMalloc1(2*nselected_2, &in.pointlist);CHKERRQ(ierr);
  ierr = ISGetIndices(selected_2, &selected_idx_2);CHKERRQ(ierr);

  for (kk=0,sid=0; kk<nselected_2; kk++,sid += 2) {
    PetscInt lid = selected_idx_2[kk];
    in.pointlist[sid]   = coords[lid];
    in.pointlist[sid+1] = coords[data_stride + lid];
    if (lid>=nFineLoc) nPlotPts++;
  }
  if (sid != 2*nselected_2) SETERRQ2(PETSC_COMM_SELF,PETSC_ERR_PLIB,"sid %D != 2*nselected_2 %D",sid,nselected_2);

  in.numberofsegments      = 0;
  in.numberofedges         = 0;
  in.numberofholes         = 0;
  in.numberofregions       = 0;
  in.trianglelist          = 0;
  in.segmentmarkerlist     = 0;
  in.pointattributelist    = 0;
  in.pointmarkerlist       = 0;
  in.triangleattributelist = 0;
  in.trianglearealist      = 0;
  in.segmentlist           = 0;
  in.holelist              = 0;
  in.regionlist            = 0;
  in.edgelist              = 0;
  in.edgemarkerlist        = 0;
  in.normlist              = 0;

  /* triangulate */
  mid.pointlist = 0;            /* Not needed if -N switch used. */
  /* Not needed if -N switch used or number of point attributes is zero: */
  mid.pointattributelist = 0;
  mid.pointmarkerlist    = 0; /* Not needed if -N or -B switch used. */
  mid.trianglelist       = 0;    /* Not needed if -E switch used. */
  /* Not needed if -E switch used or number of triangle attributes is zero: */
  mid.triangleattributelist = 0;
  mid.neighborlist          = 0; /* Needed only if -n switch used. */
  /* Needed only if segments are output (-p or -c) and -P not used: */
  mid.segmentlist = 0;
  /* Needed only if segments are output (-p or -c) and -P and -B not used: */
  mid.segmentmarkerlist = 0;
  mid.edgelist          = 0;    /* Needed only if -e switch used. */
  mid.edgemarkerlist    = 0; /* Needed if -e used and -B not used. */
  mid.numberoftriangles = 0;

  /* Triangulate the points.  Switches are chosen to read and write a  */
  /*   PSLG (p), preserve the convex hull (c), number everything from  */
  /*   zero (z), assign a regional attribute to each element (A), and  */
  /*   produce an edge list (e), a Voronoi diagram (v), and a triangle */
  /*   neighbor list (n).                                            */
  if (nselected_2 != 0) { /* inactive processor */
    char args[] = "npczQ"; /* c is needed ? */
    triangulate(args, &in, &mid, (struct triangulateio*) NULL);
    /* output .poly files for 'showme' */
    if (!PETSC_TRUE) {
      static int level = 1;
      FILE       *file; char fname[32];

      sprintf(fname,"C%d_%d.poly",level,rank); file = fopen(fname, "w");
      /*First line: <# of vertices> <dimension (must be 2)> <# of attributes> <# of boundary markers (0 or 1)>*/
      fprintf(file, "%d  %d  %d  %d\n",in.numberofpoints,2,0,0);
      /*Following lines: <vertex #> <x> <y> */
      for (kk=0,sid=0; kk<in.numberofpoints; kk++,sid += 2) {
        fprintf(file, "%d %e %e\n",kk,in.pointlist[sid],in.pointlist[sid+1]);
      }
      /*One line: <# of segments> <# of boundary markers (0 or 1)> */
//.........这里部分代码省略.........

开发者ID:pombredanne，项目名称:petsc，代码行数:101，代码来源:geo.c

示例3: isFinished

		bool isFinished() {
			int ret;
			MPI_Allreduce(&active, &ret, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);
			return ret == 0;
		}

开发者ID:qinglan233，项目名称:PDC-2016-project，代码行数:5，代码来源:Communicator.hpp

示例4: poisson_main


//.........这里部分代码省略.........

  double solve_time = fei::utils::cpu_time() - start_solve_time;

  if (localProc == 0) {
    FEI_COUT << "FEI cpu-times:" << FEI_ENDL
	 << "    init. phase: " << iTime << FEI_ENDL
	 << "    load  phase: " << lTime << FEI_ENDL
	 << "    solve  time: " << sTime << FEI_ENDL;
  }

  double norm = 0.0;
  FEI_COUT.setf(IOS_SCIENTIFIC, IOS_FLOATFIELD);
  CHK_ERR( fei->residualNorm(1, 1, &fieldIDs[0], &norm) );
  if (localProc == 0) {
    FEI_COUT << "returned residual norm: " << norm << FEI_ENDL;
  }

  int itersTaken = 0;
  CHK_ERR( fei->iterations(itersTaken) );

  //
  //We oughtta make sure the solution we just computed is correct...
  //

  int numNodes = 0;
  CHK_ERR( fei->getNumLocalNodes(numNodes) );

  double maxErr = 0.0;
  if (numNodes > 0) {
    int lenNodeIDs = numNodes;
    GlobalID* nodeIDs = new GlobalID[lenNodeIDs];
    double* soln = new double[lenNodeIDs];
    if (nodeIDs != NULL && soln != NULL) {
      CHK_ERR( fei->getLocalNodeIDList(numNodes, nodeIDs, lenNodeIDs) );

      int fieldID = 1;
      CHK_ERR( fei->getNodalFieldSolution(fieldID, numNodes, nodeIDs, soln));

      for(int i=0; i<numNodes; i++) {
	int nID = (int)nodeIDs[i];
	double x = (1.0* ((nID-1)%(L+1)))/L;
	double y = (1.0* ((nID-1)/(L+1)))/L;

	double exactSoln = x*x + y*y;
	double error = std::abs(exactSoln - soln[i]);
	if (maxErr < error) maxErr = error;
      }

      delete [] nodeIDs;
      delete [] soln;
    }
    else {
      fei::console_out() << "allocation of nodeIDs or soln failed." << FEI_ENDL; 
    }

  }

#ifndef FEI_SER
  double globalMaxErr = 0.0;
  MPI_Allreduce(&maxErr, &globalMaxErr, 1, MPI_DOUBLE, MPI_MAX, comm);
  maxErr = globalMaxErr;
#endif
  bool testPassed = true;
  if (maxErr > 1.e-6) testPassed = false;

  if (testPassed && localProc == 0) {
    FEI_COUT << "poisson: TEST PASSED, maxErr = " << maxErr << ", iterations: "
	 << itersTaken << FEI_ENDL;
    //This is something the SIERRA runtest tool looks for in test output...
    FEI_COUT << "SIERRA execution successful" << FEI_ENDL;
  }
  if (testPassed == false && localProc == 0) {
    FEI_COUT << "maxErr = " << maxErr << ", TEST FAILED" << FEI_ENDL;
    FEI_COUT << "(Test is deemed to have passed if the maximum difference"
	 << " between the exact and computed solutions is 1.e-6 or less.)"
	 << FEI_ENDL;
  }

  double elapsed_cpu_time = fei::utils::cpu_time() - start_time;

  //The following IOS_... macros are defined in base/fei_macros.h
  FEI_COUT.setf(IOS_FIXED, IOS_FLOATFIELD);
  if (localProc==0) {
    FEI_COUT << "Proc0 cpu times (seconds):" << FEI_ENDL
	 << "   FEI initialize:  " << fei_init_time << FEI_ENDL
         << "   FEI load:        " << fei_load_time << FEI_ENDL
         << "      solve:        " << solve_time << FEI_ENDL
         << "Total program time: " << elapsed_cpu_time << FEI_ENDL;
  }

  wrapper.reset();
  fei.reset();
  factory.reset();
  //If Prometheus is being used, we need to make sure that the
  //LibraryWrapper inside factory is deleted before MPI_Finalize() is called.
  //(That's why we call the 'reset' methods on these shared-pointers rather
  //than just letting them destroy themselves when they go out of scope.)

  return(0);
}

开发者ID:cakeisalie，项目名称:oomphlib_003，代码行数:101，代码来源:poisson_main.cpp

示例5: cell_mindt

double cell_mindt( struct Cell *** theCells, struct Sim * theSim, struct GravMass * theGravMasses ){
    int i_m,j_m,k_m;
    double dt_m = 1.e100;//HUGE_VAL;
    double a_m,r_m,dx_m;
    double mag_vel_m;
    int i,j,k;
    for( k=sim_Nghost_min(theSim,Z_DIR) ; k<sim_N(theSim,Z_DIR)-sim_Nghost_max(theSim,Z_DIR) ; ++k ){
        double zm = sim_FacePos(theSim,k-1,Z_DIR);
        double zp = sim_FacePos(theSim,k,Z_DIR);
        double dz = zp-zm;
        for( i=sim_Nghost_min(theSim,R_DIR) ; i<sim_N(theSim,R_DIR)-sim_Nghost_max(theSim,R_DIR) ; ++i ){
            double rm = sim_FacePos(theSim,i-1,R_DIR);
            double rp = sim_FacePos(theSim,i,R_DIR);
            double dr = rp-rm;
            double r = .5*(rp+rm);
            for( j=0 ; j<sim_N_p(theSim,i) ; ++j ){
                int jm = j-1;
                if( j==0 ) jm = sim_N_p(theSim,i)-1;
                double w = .5*(theCells[k][i][j].wiph+theCells[k][i][jm].wiph);
                double dx = dr;
                double rdphi = .5*(rp+rm)*theCells[k][i][j].dphi;
                if( rdphi<dr ) {
                    dx = rdphi;
                }
                if( dx>dz ) {
                    dx = dz;
                }
                double a  = maxvel( theCells[k][i][j].prim , w , r ,theSim);
                double rho = theCells[k][i][j].prim[RHO]; 
                double Pp  = theCells[k][i][j].prim[PPP]; 


                double dt = sim_CFL(theSim)*dx/a;
                if( sim_EXPLICIT_VISCOSITY(theSim)>0.0 ){
                    double nu;
                    if (sim_VISC_CONST(theSim)==1){
                        nu = sim_EXPLICIT_VISCOSITY(theSim);
                    } else{
                        double tiph = theCells[k][i][j].tiph - 0.5*theCells[k][i][j].dphi;
                        if (sim_InitialDataType(theSim)==SHEAR){
                            double HoR = 0.1;
                            //nu = sim_EXPLICIT_VISCOSITY(theSim)*HoR*HoR*pow(fabs((r*cos(tiph))),1.5);
                            nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(fabs(r*cos(tiph)),2.0);
                            if (r*cos(tiph)>20.) nu=0.000000001;
                        } else{
                            double M0 = gravMass_M(theGravMasses,0);
                            double M1 = gravMass_M(theGravMasses,1);
                            double dist_bh0 = gravMass_dist(theGravMasses,0,r,tiph,0.);
                            double dist_bh1 = gravMass_dist(theGravMasses,1,r,tiph,0.);
                            double alpha = sim_EXPLICIT_VISCOSITY(theSim);
                            double eps = sim_G_EPS(theSim);
                            //nu = sim_EXPLICIT_VISCOSITY(theSim)*sim_GAMMALAW(theSim)*Pp/rho*pow(r,1.5);
                            nu = alpha*Pp/rho/sqrt(pow(dist_bh0*dist_bh0+eps*eps,-1.5)*M0+pow(dist_bh1*dist_bh1+eps*eps,-1.5)*M1);
                        }
                    }
                    double dt_visc = .25*dx*dx/nu;
                    dt = dt/( 1. + dt/dt_visc );
                }
                if( dt_m > dt ) {
                    dt_m = dt;
                }
            } 
        }
    }
    double dt2;
    MPI_Allreduce( &dt_m , &dt2 , 1 , MPI_DOUBLE , MPI_MIN , sim_comm );
    return( dt2 );

}

开发者ID:brianfarris，项目名称:Disco2，代码行数:69，代码来源:cell_mindt.c

示例6: gmx_check_thread_affinity_set

/* Check the process affinity mask and if it is found to be non-zero,
 * will honor it and disable mdrun internal affinity setting.
 * Note that this will only work on Linux as we use a GNU feature.
 */
void
gmx_check_thread_affinity_set(FILE            *fplog,
                              const t_commrec *cr,
                              gmx_hw_opt_t    *hw_opt,
                              int  gmx_unused  nthreads_hw_avail,
                              gmx_bool         bAfterOpenmpInit)
{
    GMX_RELEASE_ASSERT(hw_opt, "hw_opt must be a non-NULL pointer");

    if (!bAfterOpenmpInit)
    {
        /* Check for externally set OpenMP affinity and turn off internal
         * pinning if any is found. We need to do this check early to tell
         * thread-MPI whether it should do pinning when spawning threads.
         * TODO: the above no longer holds, we should move these checks later
         */
        if (hw_opt->thread_affinity != threadaffOFF)
        {
            char *message;
            if (!gmx_omp_check_thread_affinity(&message))
            {
                /* TODO: with -pin auto we should only warn when using all cores */
                md_print_warn(cr, fplog, "%s", message);
                sfree(message);
                hw_opt->thread_affinity = threadaffOFF;
            }
        }

        /* With thread-MPI this is needed as pinning might get turned off,
         * which needs to be known before starting thread-MPI.
         * With thread-MPI hw_opt is processed here on the master rank
         * and passed to the other ranks later, so we only do this on master.
         */
        if (!SIMMASTER(cr))
        {
            return;
        }
#ifndef GMX_THREAD_MPI
        return;
#endif
    }

#ifdef HAVE_SCHED_AFFINITY
    int       ret;
    cpu_set_t mask_current;

    if (hw_opt->thread_affinity == threadaffOFF)
    {
        /* internal affinity setting is off, don't bother checking process affinity */
        return;
    }

    CPU_ZERO(&mask_current);
    if ((ret = sched_getaffinity(0, sizeof(cpu_set_t), &mask_current)) != 0)
    {
        /* failed to query affinity mask, will just return */
        if (debug)
        {
            fprintf(debug, "Failed to query affinity mask (error %d)", ret);
        }
        return;
    }

    /* Before proceeding with the actual check, make sure that the number of
     * detected CPUs is >= the CPUs in the current set.
     * We need to check for CPU_COUNT as it was added only in glibc 2.6. */
#ifdef CPU_COUNT
    if (nthreads_hw_avail < CPU_COUNT(&mask_current))
    {
        if (debug)
        {
            fprintf(debug, "%d hardware threads detected, but %d was returned by CPU_COUNT",
                    nthreads_hw_avail, CPU_COUNT(&mask_current));
        }
        return;
    }
#endif /* CPU_COUNT */

    gmx_bool bAllSet = TRUE;
    for (int i = 0; (i < nthreads_hw_avail && i < CPU_SETSIZE); i++)
    {
        bAllSet = bAllSet && (CPU_ISSET(i, &mask_current) != 0);
    }

#ifdef GMX_LIB_MPI
    gmx_bool  bAllSet_All;

    MPI_Allreduce(&bAllSet, &bAllSet_All, 1, MPI_INT, MPI_LAND, MPI_COMM_WORLD);
    bAllSet = bAllSet_All;
#endif

    if (!bAllSet)
    {
        if (hw_opt->thread_affinity == threadaffAUTO)
        {
            if (!bAfterOpenmpInit)
//.........这里部分代码省略.........

开发者ID:pjohansson，项目名称:gromacs，代码行数:101，代码来源:thread_affinity.cpp

示例7: hunt_problem


//.........这里部分代码省略.........
#endif
	}

	/*
	 * PRINT OUT VALUES OF EXTRA UNKNOWNS 
	 * FROM AUGMENTING CONDITIONS 
	 */

	if (nAC > 0) 
          {
	    
	    DPRINTF(stderr, "\n------------------------------\n");
	    DPRINTF(stderr, "Augmenting Conditions:    %4d\n", nAC);
	    DPRINTF(stderr, "Number of extra unknowns: %4d\n\n", nAC);

            for (iAC = 0; iAC < nAC; iAC++)
             {
              if (augc[iAC].Type == AC_USERBC)
               {
                DPRINTF(stderr, "\tAC[%4d] DF[%4d] = %10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
              else if (augc[iAC].Type == AC_USERMAT  ||
                       augc[iAC].Type == AC_FLUX_MAT )
               {
                DPRINTF(stderr, "\n MT[%4d] MP[%4d] = %10.6e\n",
                        augc[iAC].MTID, augc[iAC].MPID, x_AC[iAC]);
               }
              else if(augc[iAC].Type == AC_VOLUME)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] VC[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
	      else if(augc[iAC].Type == AC_POSITION)
               {
                evol_local = augc[iAC].evol;
#ifdef PARALLEL
                if( Num_Proc > 1 ) {
                     MPI_Allreduce( &evol_local, &evol_global, 1,
                                    MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
                }
                evol_local = evol_global;
#endif
                DPRINTF(stderr, "\tMT[%4d] XY[%4d]=%10.6e Param=%10.6e\n",
                        augc[iAC].MTID, augc[iAC].VOLID, evol_local,
                        x_AC[iAC]);
               }
               else if(augc[iAC].Type == AC_FLUX)
               {
                DPRINTF(stderr, "\tBC[%4d] DF[%4d]=%10.6e\n",
                        augc[iAC].BCID, augc[iAC].DFID, x_AC[iAC]);
               }
             }
	  }

      /* Check element quality */
      good_mesh = element_quality(exo, x, ams[0]->proc_config);

开发者ID:gomatestbot，项目名称:goma，代码行数:67，代码来源:ac_hunt.c

示例8: main

int main( int argc, char **argv )
{

    MPI_Comm comm;
    int      *sbuf, *rbuf;
    int      rank, size;
    int      *sendcounts, *recvcounts, *rdispls, *sdispls;
    int      i, j, *p, err, toterr;
    
    MPI_Init( &argc, &argv );
    err = 0;
    
    comm = MPI_COMM_WORLD;

    /* Create the buffer */
    MPI_Comm_size( comm, &size );
    MPI_Comm_rank( comm, &rank );
    sbuf = (int *)malloc( size * size * sizeof(int) );
    rbuf = (int *)malloc( size * size * sizeof(int) );
    if (!sbuf || !rbuf) {
	fprintf( stderr, "Could not allocated buffers!\n" );
	MPI_Abort( comm, 1 );
    }
    
    /* Load up the buffers */
    for (i=0; i<size*size; i++) {
	sbuf[i] = i + 100*rank;
	rbuf[i] = -i;
    }

    /* Create and load the arguments to alltoallv */
    sendcounts = (int *)malloc( size * sizeof(int) );
    recvcounts = (int *)malloc( size * sizeof(int) );
    rdispls    = (int *)malloc( size * sizeof(int) );
    sdispls    = (int *)malloc( size * sizeof(int) );
    if (!sendcounts || !recvcounts || !rdispls || !sdispls) {
	fprintf( stderr, "Could not allocate arg items!\n" );
	MPI_Abort( comm, 1 );
    }
    for (i=0; i<size; i++) {
	sendcounts[i] = i;
	recvcounts[i] = rank;
	rdispls[i]    = i * rank;
	sdispls[i]    = (i * (i+1))/2;
    }
    MPI_Alltoallv( sbuf, sendcounts, sdispls, MPI_INT,
		   rbuf, recvcounts, rdispls, MPI_INT, comm );

    /* Check rbuf */
    for (i=0; i<size; i++) {
	p = rbuf + rdispls[i];
	for (j=0; j<rank; j++) {
	    if (p[j] != i * 100 + (rank*(rank+1))/2 + j) {
		fprintf( stderr, "[%d] got %d expected %d for %dth\n",
			 rank, p[j],(i*(i+1))/2 + j, j );
		err++;
	    }
	}
    }

    free( sdispls );
    free( rdispls );
    free( recvcounts );
    free( sendcounts );
    free( rbuf );
    free( sbuf );

    MPI_Allreduce( &err, &toterr, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD );
    MPI_Comm_rank( MPI_COMM_WORLD, &rank );
    if (rank == 0) {
	if (toterr > 0) 
	    fprintf( stderr, "Test FAILED with %d errors\n", toterr );
	else
	    fprintf( stderr, "Test passed\n" );
    }
	
    MPI_Finalize();
    return 0;
}

开发者ID:MartinLidh，项目名称:tddc78，代码行数:79，代码来源:alltoallv.c

示例9: LU_decomp

void LU_decomp(struct problem *info, struct fmatrix *X, int *reorder, MPI_Datatype pivot_type, MPI_Op best_pivot_op)
{
	MPI_Request req_spiv, req_sa, req_sm;
	MPI_Status status;
	
	number_type *m = malloc(info->blksz * sizeof(*m));
	int diag;
	for (diag = 0; diag < info->n; diag++) {
		/* we do partial pivoting, so the proc with the pivot is on this column: */
		int pivot_h = diag / info->blksz;
		int r, c, i;
		
		double start_time = MPI_Wtime();
		double start_time2;

		struct pivot pivot = { -1, 0. };
		/* choose pivot across the column */
		if (info->coords[HDIM] == pivot_h) {
			/* column with pivot in block */
			int pivot_c = diag % info->blksz;
			/* Argo doesn't want aliasing in allreduce */
			struct pivot pivot_cand = { -1, 0. };
			for (i = 0; i < info->blksz; i++) {
				if (reorder[i] > diag && fabs(CELL(X, i, pivot_c)) > fabs(pivot_cand.value)) {
					pivot_cand.row = info->blksz*info->coords[VDIM] + i;
					pivot_cand.value = CELL(X, i, pivot_c);
				}
			}
			start_time2 = MPI_Wtime();
			MPI_Allreduce(&pivot_cand, &pivot, 1, pivot_type, best_pivot_op, info->vcomm);
			pivot_allr_time += MPI_Wtime() - start_time2;
		}
		/* broadcast pivot choice across row towards the right */
		start_time2 = MPI_Wtime();
		pipeline_right(info, pivot_h, &pivot, 1, pivot_type, 45, &req_spiv);
		pivot_bcast_time += MPI_Wtime() - start_time2;
		
		pivot_time += MPI_Wtime() - start_time;
		
		/* find rank of proc with pivot on the vertical communicator */
		int pivot_v = pivot.row / info->blksz;
		
		/* fill in reorder */
		if (info->coords[VDIM] == pivot_v) {
			reorder[pivot.row % info->blksz] = diag;
		}
		
		/* calculate and distribute the ms */
		for (r = 0; r < info->blksz; r++) {
			if (reorder[r] > diag) {
				if (info->coords[HDIM] == pivot_h) {
					int pivot_c = diag % info->blksz;
					m[r] = CELL(X, r, pivot_c) / pivot.value;
					CELL(X, r, pivot_c) = m[r];
				}
				/* broadcast m towards right */
				start_time = MPI_Wtime();
				pipeline_right(info, pivot_h, &m[r], 1, MPI_number_type, 64, &req_sm);
				m_bcast_time += MPI_Wtime() - start_time;
			}
		}
		
		/* distribute the pivot row and eliminate */
		int startc = 0;
		if (info->coords[HDIM] == pivot_h) startc = (diag+1) % info->blksz;
		if (info->coords[HDIM] < pivot_h) startc = info->blksz;
		/* elimination */
		for (c = startc; c < info->blksz; c++) {
			number_type a;
			if (info->coords[VDIM] == pivot_v) {
				a = CELL(X, pivot.row % info->blksz, c);
			}
			
			start_time = MPI_Wtime();
			
			int up = (info->coords[VDIM]+info->sqp-1)%info->sqp;
			int down = (info->coords[VDIM]+1)%info->sqp;
			if (info->coords[VDIM] != pivot_v) {
				MPI_Recv(&a, 1, MPI_number_type, up, 78, info->vcomm, &status);
			}
			if (down != pivot_v) {
				MPI_Isend(&a, 1, MPI_number_type, down, 78, info->vcomm, &req_sa);
			}
 			
			a_bcast_time += MPI_Wtime() - start_time;
			
			for (r = 0; r < info->blksz; r++) {
				if (reorder[r] > diag) {
					CELL(X, r,c) -= m[r]*a;
				}
			}
			if (down != pivot_v) MPI_Wait(&req_sa, &status);
		}
	}
}

开发者ID:cstroe，项目名称:PP-MM-A03，代码行数:95，代码来源:lu2d.c

示例10: DMPlexPreallocateOperator

PetscErrorCode DMPlexPreallocateOperator(DM dm, PetscInt bs, PetscSection section, PetscSection sectionGlobal, PetscInt dnz[], PetscInt onz[], PetscInt dnzu[], PetscInt onzu[], Mat A, PetscBool fillMatrix)
{
  MPI_Comm           comm;
  MatType            mtype;
  PetscSF            sf, sfDof, sfAdj;
  PetscSection       leafSectionAdj, rootSectionAdj, sectionAdj, anchorSectionAdj;
  PetscInt           nroots, nleaves, l, p;
  const PetscInt    *leaves;
  const PetscSFNode *remotes;
  PetscInt           dim, pStart, pEnd, numDof, globalOffStart, globalOffEnd, numCols;
  PetscInt          *tmpAdj = NULL, *adj, *rootAdj, *anchorAdj = NULL, *cols, *remoteOffsets;
  PetscInt           adjSize;
  PetscLayout        rLayout;
  PetscInt           locRows, rStart, rEnd, r;
  PetscMPIInt        size;
  PetscBool          doCommLocal, doComm, debug = PETSC_FALSE, isSymBlock, isSymSeqBlock, isSymMPIBlock;
  PetscBool          useAnchors;
  PetscErrorCode     ierr;

  PetscFunctionBegin;
  PetscValidHeaderSpecific(dm, DM_CLASSID, 1);
  PetscValidHeaderSpecific(section, PETSC_SECTION_CLASSID, 3);
  PetscValidHeaderSpecific(sectionGlobal, PETSC_SECTION_CLASSID, 4);
  PetscValidHeaderSpecific(A, MAT_CLASSID, 9);
  if (dnz)  PetscValidPointer(dnz,5);
  if (onz)  PetscValidPointer(onz,6);
  if (dnzu) PetscValidPointer(dnzu,7);
  if (onzu) PetscValidPointer(onzu,8);
  ierr = PetscLogEventBegin(DMPLEX_Preallocate,dm,0,0,0);CHKERRQ(ierr);
  ierr = PetscObjectGetComm((PetscObject)dm,&comm);CHKERRQ(ierr);
  ierr = PetscOptionsGetBool(NULL, "-dm_view_preallocation", &debug, NULL);CHKERRQ(ierr);
  ierr = MPI_Comm_size(comm, &size);CHKERRQ(ierr);
  ierr = DMGetDimension(dm, &dim);CHKERRQ(ierr);
  ierr = DMGetPointSF(dm, &sf);CHKERRQ(ierr);
  ierr = PetscSFGetGraph(sf, &nroots, NULL, NULL, NULL);CHKERRQ(ierr);
  doCommLocal = (size > 1) && (nroots >= 0) ? PETSC_TRUE : PETSC_FALSE;
  ierr = MPI_Allreduce(&doCommLocal, &doComm, 1, MPIU_BOOL, MPI_LAND, comm);CHKERRQ(ierr);
  /* Create dof SF based on point SF */
  if (debug) {
    ierr = PetscPrintf(comm, "Input Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(section, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input Global Section for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSectionView(sectionGlobal, PETSC_VIEWER_STDOUT_WORLD);CHKERRQ(ierr);
    ierr = PetscPrintf(comm, "Input SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sf, NULL);CHKERRQ(ierr);
  }
  ierr = PetscSFCreateRemoteOffsets(sf, section, section, &remoteOffsets);CHKERRQ(ierr);
  ierr = PetscSFCreateSectionSF(sf, section, remoteOffsets, section, &sfDof);CHKERRQ(ierr);
  if (debug) {
    ierr = PetscPrintf(comm, "Dof SF for Preallocation:\n");CHKERRQ(ierr);
    ierr = PetscSFView(sfDof, NULL);CHKERRQ(ierr);
  }
  /* Create section for dof adjacency (dof ==> # adj dof) */
  ierr = PetscSectionGetChart(section, &pStart, &pEnd);CHKERRQ(ierr);
  ierr = PetscSectionGetStorageSize(section, &numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &leafSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(leafSectionAdj, 0, numDof);CHKERRQ(ierr);
  ierr = PetscSectionCreate(comm, &rootSectionAdj);CHKERRQ(ierr);
  ierr = PetscSectionSetChart(rootSectionAdj, 0, numDof);CHKERRQ(ierr);
  /*   Fill in the ghost dofs on the interface */
  ierr = PetscSFGetGraph(sf, NULL, &nleaves, &leaves, &remotes);CHKERRQ(ierr);
  /* use constraints in finding adjacency in this routine */
  ierr = DMPlexGetAdjacencyUseAnchors(dm,&useAnchors);CHKERRQ(ierr);
  ierr = DMPlexSetAdjacencyUseAnchors(dm,PETSC_TRUE);CHKERRQ(ierr);

  /*
   section        - maps points to (# dofs, local dofs)
   sectionGlobal  - maps points to (# dofs, global dofs)
   leafSectionAdj - maps unowned local dofs to # adj dofs
   rootSectionAdj - maps   owned local dofs to # adj dofs
   adj            - adj global dofs indexed by leafSectionAdj
   rootAdj        - adj global dofs indexed by rootSectionAdj
   sf    - describes shared points across procs
   sfDof - describes shared dofs across procs
   sfAdj - describes shared adjacent dofs across procs
   ** The bootstrapping process involves six rounds with similar structure of visiting neighbors of each point.
  (0). If there are point-to-point constraints, add the adjacencies of constrained points to anchors in anchorAdj
       (This is done in DMPlexComputeAnchorAdjacencies())
    1. Visit unowned points on interface, count adjacencies placing in leafSectionAdj
       Reduce those counts to rootSectionAdj (now redundantly counting some interface points)
    2. Visit owned points on interface, count adjacencies placing in rootSectionAdj
       Create sfAdj connecting rootSectionAdj and leafSectionAdj
    3. Visit unowned points on interface, write adjacencies to adj
       Gather adj to rootAdj (note that there is redundancy in rootAdj when multiple procs find the same adjacencies)
    4. Visit owned points on interface, write adjacencies to rootAdj
       Remove redundancy in rootAdj
   ** The last two traversals use transitive closure
    5. Visit all owned points in the subdomain, count dofs for each point (sectionAdj)
       Allocate memory addressed by sectionAdj (cols)
    6. Visit all owned points in the subdomain, insert dof adjacencies into cols
   ** Knowing all the column adjacencies, check ownership and sum into dnz and onz
  */

  ierr = DMPlexComputeAnchorAdjacencies(dm,section,sectionGlobal,&anchorSectionAdj,&anchorAdj);CHKERRQ(ierr);

  for (l = 0; l < nleaves; ++l) {
    PetscInt dof, off, d, q, anDof;
    PetscInt p = leaves[l], numAdj = PETSC_DETERMINE;

    if ((p < pStart) || (p >= pEnd)) continue;
//.........这里部分代码省略.........

开发者ID:PeiLiu90，项目名称:petsc，代码行数:101，代码来源:plexpreallocate.c

示例11: MPI_Comm_rank

struct fft_plan_3d *fft_3d_create_plan(
       MPI_Comm comm, int nfast, int nmid, int nslow,
       int in_ilo, int in_ihi, int in_jlo, int in_jhi,
       int in_klo, int in_khi,
       int out_ilo, int out_ihi, int out_jlo, int out_jhi,
       int out_klo, int out_khi,
       int scaled, int permute, int *nbuf)

{
  struct fft_plan_3d *plan;
  int me,nprocs;
  int /*i,num,*/flag,remapflag;//,fftflag;
  int first_ilo,first_ihi,first_jlo,first_jhi,first_klo,first_khi;
  int second_ilo,second_ihi,second_jlo,second_jhi,second_klo,second_khi;
  int third_ilo,third_ihi,third_jlo,third_jhi,third_klo,third_khi;
  int out_size,first_size,second_size,third_size,copy_size,scratch_size;
  int np1,np2,ip1,ip2;
  //  int list[50];

/* system specific variables */

#ifdef FFT_INTEL
  FFT_DATA dummy;
#endif
#ifdef FFT_T3E
  FFT_DATA dummy[5];
  int isign,isys;
  double scalef;
#endif

/* query MPI info */

  MPI_Comm_rank(comm,&me);
  MPI_Comm_size(comm,&nprocs);

/* compute division of procs in 2 dimensions not on-processor */

  bifactor(nprocs,&np1,&np2);
  ip1 = me % np1;
  ip2 = me/np1;

/* allocate memory for plan data struct */

  plan = (struct fft_plan_3d *) malloc(sizeof(struct fft_plan_3d));
  if (plan == NULL) return NULL;

/* remap from initial distribution to layout needed for 1st set of 1d FFTs
   not needed if all procs own entire fast axis initially
   first indices = distribution after 1st set of FFTs */

  if (in_ilo == 0 && in_ihi == nfast-1)
    flag = 0;
  else
    flag = 1;

  MPI_Allreduce(&flag,&remapflag,1,MPI_INT,MPI_MAX,comm);

  if (remapflag == 0) {
    first_ilo = in_ilo;
    first_ihi = in_ihi;
    first_jlo = in_jlo;
    first_jhi = in_jhi;
    first_klo = in_klo;
    first_khi = in_khi;
    plan->pre_plan = NULL;
  }
  else {
    first_ilo = 0;
    first_ihi = nfast - 1;
    first_jlo = ip1*nmid/np1;
    first_jhi = (ip1+1)*nmid/np1 - 1;
    first_klo = ip2*nslow/np2;
    first_khi = (ip2+1)*nslow/np2 - 1;
    plan->pre_plan =
      remap_3d_create_plan(comm,in_ilo,in_ihi,in_jlo,in_jhi,in_klo,in_khi,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
			   FFT_PRECISION,0,0,2);
    if (plan->pre_plan == NULL) return NULL;
  }

/* 1d FFTs along fast axis */

  plan->length1 = nfast;
  plan->total1 = nfast * (first_jhi-first_jlo+1) * (first_khi-first_klo+1);

/* remap from 1st to 2nd FFT
   choose which axis is split over np1 vs np2 to minimize communication
   second indices = distribution after 2nd set of FFTs */

  second_ilo = ip1*nfast/np1;
  second_ihi = (ip1+1)*nfast/np1 - 1;
  second_jlo = 0;
  second_jhi = nmid - 1;
  second_klo = ip2*nslow/np2;
  second_khi = (ip2+1)*nslow/np2 - 1;
  plan->mid1_plan =
      remap_3d_create_plan(comm,
			   first_ilo,first_ihi,first_jlo,first_jhi,
			   first_klo,first_khi,
//.........这里部分代码省略.........

开发者ID:darien0，项目名称:Mara，代码行数:101，代码来源:fft_3d.c

示例12: run_bfs


//.........这里部分代码省略.........
  int64_t global_queue_summary_size = g_global_queue_summary_size;
  int64_t global_queue_size = g_global_queue_size;

#define SWIZZLE_VERTEX(c) (((int64_t)(VERTEX_OWNER(c)) << lg_local_queue_size) | (int64_t)(VERTEX_LOCAL(c)))
#if 0
  int64_t* restrict column_swizzled = (int64_t*)xmalloc(nlocaledges * sizeof(int64_t));
  {
    size_t i;
    for (i = 0; i < nlocaledges; ++i) {
      int64_t c = column[i];
      column_swizzled[i] = SWIZZLE_VERTEX(c);
    }
  }
#endif

  unsigned long* restrict in_queue = g_in_queue;
  memset(in_queue, 0, global_queue_size * sizeof(unsigned long));
  unsigned long* restrict in_queue_summary = g_in_queue_summary;
  memset(in_queue_summary, 0, global_queue_summary_size * sizeof(unsigned long));
  unsigned long* restrict out_queue = g_out_queue;
  unsigned long* restrict out_queue_summary = g_out_queue_summary;
  unsigned long* restrict visited = g_visited;
  memset(visited, 0, local_queue_size * sizeof(unsigned long));

#define SET_IN(v) do {int64_t vs = SWIZZLE_VERTEX(v); size_t word_idx = vs / ulong_bits; int bit_idx = vs % ulong_bits; unsigned long mask = (1UL << bit_idx); in_queue_summary[word_idx / ulong_bits] |= (1UL << (word_idx % ulong_bits)); in_queue[word_idx] |= mask;} while (0)
#define TEST_IN(vs) (((in_queue_summary[vs / ulong_bits / ulong_bits] & (1UL << ((vs / ulong_bits) % ulong_bits))) != 0) && ((in_queue[vs / ulong_bits] & (1UL << (vs % ulong_bits))) != 0))
#define TEST_VISITED_LOCAL(v) ((visited[(v) / ulong_bits] & (1UL << ((v) % ulong_bits))) != 0)
// #define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); __sync_fetch_and_or(&visited[word_idx], mask); __sync_fetch_and_or(&out_queue[word_idx], mask);} while (0)
#define SET_VISITED_LOCAL(v) do {size_t word_idx = (v) / ulong_bits; int bit_idx = (v) % ulong_bits; unsigned long mask = (1UL << bit_idx); visited[word_idx] |= mask; out_queue[word_idx] |= mask;} while (0)

  SET_IN(root);
  {ptrdiff_t i; _Pragma("omp parallel for schedule(static)") for (i = 0; i < nlocalverts; ++i) pred[i] = -1;}
  if (VERTEX_OWNER(root) == rank) {
    pred[VERTEX_LOCAL(root)] = root;
    SET_VISITED_LOCAL(VERTEX_LOCAL(root));
  }
  uint16_t cur_level = 0;
  while (1) {
    ++cur_level;
#if 0
    if (rank == 0) fprintf(stderr, "BFS level %" PRIu16 "\n", cur_level);
#endif
    memset(out_queue, 0, (nlocalverts + ulong_bits - 1) / ulong_bits * sizeof(unsigned long));
    // memset(out_queue_summary, 0, (nlocalverts + ulong_bits_squared - 1) / ulong_bits_squared * sizeof(unsigned long));
    ptrdiff_t i, ii;
#if 0
#pragma omp parallel for schedule(static)
    for (i = 0; i < global_queue_summary_size; ++i) {
      unsigned long val = 0UL;
      int j;
      unsigned long mask = 1UL;
      for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
        if (in_queue[i * ulong_bits + j]) val |= mask;
      }
      in_queue_summary[i] = val;
    }
#endif
    unsigned long not_done = 0;
#pragma omp parallel for schedule(static) reduction(|:not_done)
    for (ii = 0; ii < nlocalverts; ii += ulong_bits) {
      size_t i, i_end = ii + ulong_bits;
      if (i_end > nlocalverts) i_end = nlocalverts;
      for (i = ii; i < i_end; ++i) {
        if (!TEST_VISITED_LOCAL(i)) {
          size_t j, j_end = rowstarts[i + 1];
          for (j = rowstarts[i]; j < j_end; ++j) {
            int64_t v1 = column[j];
            int64_t v1_swizzled = SWIZZLE_VERTEX(v1);
            if (TEST_IN(v1_swizzled)) {
              pred[i] = (v1 & INT64_C(0xFFFFFFFFFFFF)) | ((int64_t)cur_level << 48);
              not_done |= 1;
              SET_VISITED_LOCAL(i);
              break;
            }
          }
        }
      }
    }
#if 1
#pragma omp parallel for schedule(static)
    for (i = 0; i < local_queue_summary_size; ++i) {
      unsigned long val = 0UL;
      int j;
      unsigned long mask = 1UL;
      for (j = 0; j < ulong_bits; ++j, mask <<= 1) {
        unsigned long full_val = out_queue[i * ulong_bits + j];
        visited[i * ulong_bits + j] |= full_val;
        if (full_val) val |= mask;
      }
      out_queue_summary[i] = val;
      // not_done |= val;
    }
#endif
    MPI_Allreduce(MPI_IN_PLACE, &not_done, 1, MPI_UNSIGNED_LONG, MPI_BOR, MPI_COMM_WORLD);
    if (not_done == 0) break;
    MPI_Allgather(out_queue, local_queue_size, MPI_UNSIGNED_LONG, in_queue, local_queue_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
    MPI_Allgather(out_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, in_queue_summary, local_queue_summary_size, MPI_UNSIGNED_LONG, MPI_COMM_WORLD);
  }
  deallocate_memory();
}

开发者ID:ShiZhan，项目名称:graph500-win32，代码行数:101，代码来源:bfs_replicated.c

示例13: star_density


//.........这里部分代码省略.........
	}

      /* do local particles and prepare export list */
      for(nexport = 0; i >= 0; i = NextActiveParticle[i])
	{
	  if(P[i].Type == 4)
	    {
	      if(star_density_evaluate(i, 0, &nexport, Send_count) < 0)
		break;
	    }
	}

#ifdef MYSORT
      mysort_dataindex(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#else
      qsort(DataIndexTable, nexport, sizeof(struct data_index), data_index_compare);
#endif

      MPI_Allgather(Send_count, NTask, MPI_INT, Sendcount_matrix, NTask, MPI_INT, MPI_COMM_WORLD);

      for(j = 0, nimport = 0, Recv_offset[0] = 0, Send_offset[0] = 0; j < NTask; j++)
	{
	  Recv_count[j] = Sendcount_matrix[j * NTask + ThisTask];
	  nimport += Recv_count[j];

	  if(j > 0)
	    {
	      Send_offset[j] = Send_offset[j - 1] + Send_count[j - 1];
	      Recv_offset[j] = Recv_offset[j - 1] + Recv_count[j - 1];
	    }
	}

      StarDataGet = (struct stardata_in *) mymalloc(nimport * sizeof(struct stardata_in));
      StarDataIn = (struct stardata_in *) mymalloc(nexport * sizeof(struct stardata_in));

      /* prepare particle data for export */
      for(j = 0; j < nexport; j++)
	{
	  place = DataIndexTable[j].Index;

	  StarDataIn[j].Pos[0] = P[place].Pos[0];
	  StarDataIn[j].Pos[1] = P[place].Pos[1];
	  StarDataIn[j].Pos[2] = P[place].Pos[2];
	  StarDataIn[j].Hsml = PPP[place].Hsml;
	  StarDataIn[j].Density = P[place].DensAroundStar;
	  StarDataIn[j].Mass = P[place].Mass;

	  memcpy(StarDataIn[j].NodeList,
		 DataNodeList[DataIndexTable[j].IndexGet].NodeList, NODELISTLENGTH * sizeof(int));

	}

      /* exchange particle data */
      for(ngrp = 1; ngrp < (1 << PTask); ngrp++)
	{
	  sendTask = ThisTask;
	  recvTask = ThisTask ^ ngrp;

	  if(recvTask < NTask)
	    {
	      if(Send_count[recvTask] > 0 || Recv_count[recvTask] > 0)
		{
		  /* get the particles */
		  MPI_Sendrecv(&StarDataIn[Send_offset[recvTask]],
			       Send_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
			       recvTask, TAG_DENS_A,
			       &StarDataGet[Recv_offset[recvTask]],
			       Recv_count[recvTask] * sizeof(struct stardata_in), MPI_BYTE,
			       recvTask, TAG_DENS_A, MPI_COMM_WORLD, MPI_STATUS_IGNORE);
		}
	    }
	}

      myfree(StarDataIn);


      /* now do the particles that were sent to us */

      for(j = 0; j < nimport; j++)
	star_density_evaluate(j, 1, &dummy, &dummy);

      /* check whether this is the last iteration */
      if(i < 0)
	ndone_flag = 1;
      else
	ndone_flag = 0;

      MPI_Allreduce(&ndone_flag, &ndone, 1, MPI_INT, MPI_SUM, MPI_COMM_WORLD);

      myfree(StarDataGet);
    }
  while(ndone < NTask);

  myfree(DataNodeList);
  myfree(DataIndexTable);
  myfree(Ngblist);

#endif //for EDDINGTON_TENSOR_STARS

}

开发者ID:surftour，项目名称:astrotools，代码行数:101，代码来源:get_Je.c

示例14: heavyEdgeMatchAgg


//.........这里部分代码省略.........
            n /= 2;
            ierr = PetscMalloc((2 + 2*n + n*CHUNCK_SIZE)*sizeof(PetscInt) + 2*sizeof(MPI_Request), &sbuff);CHKERRQ(ierr);
            /* PetscMalloc4(2+2*n,PetscInt,sbuffs1[nSend1],n*CHUNCK_SIZE,PetscInt,rbuffs1[nSend1],1,MPI_Request,rreqs2[nSend1],1,MPI_Request,sreqs2[nSend1]); */
            /* save requests */
            sbuffs1[nSend1] = sbuff;
            request = (MPI_Request*)sbuff;
            sbuff = pt = (PetscInt*)(request+1);
            *pt++ = n; *pt++ = rank;

            ierr = PetscCDGetHeadPos(deleted_list,proc,&pos);CHKERRQ(ierr);
            while(pos){
              PetscInt lid0, cpid, gid;
              ierr = PetscLLNGetID(pos, &cpid);CHKERRQ(ierr);
              gid = (PetscInt)PetscRealPart(cpcol_gid[cpid]);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              ierr = PetscLLNGetID(pos, &lid0);CHKERRQ(ierr);
              ierr = PetscCDGetNextPos(deleted_list,proc,&pos);CHKERRQ(ierr);
              *pt++ = gid; *pt++ = lid0;
            }
            /* send request tag1 [n, proc, n*[gid1,lid0] ] */
            ierr = MPI_Isend(sbuff, 2*n+2, MPIU_INT, proc, tag1, wcomm, request);CHKERRQ(ierr);
            /* post recieve */
            request = (MPI_Request*)pt;
            rreqs2[nSend1] = request; /* cache recv request */
            pt = (PetscInt*)(request+1);
            ierr = MPI_Irecv(pt, n*CHUNCK_SIZE, MPIU_INT, proc, tag2, wcomm, request);CHKERRQ(ierr);
            /* clear list */
            ierr = PetscCDRemoveAll(deleted_list, proc);CHKERRQ(ierr);
            nSend1++;
          }
        }
        /* recieve requests, send response, clear lists */
        kk = nactive_edges;
        ierr =

鲜花

握手

雷人

路过

鸡蛋

该文章已有0人参与评论

请发表评论

全部评论

专题导读

More+

10-27 六六分期app的软件客服如何联系？(六六分期

11-06 可心卡盟:win10系统火狐flash插件崩溃怎么

11-06 亲亲特价:怎么删除回收站图标

11-06 济南大学虚拟社区:鲁大师节能降温的具体办

11-06 xlueops.exe:无线网络安装向导

11-06 女斗合众国:win7系统cf与主机连接不稳定怎

11-06 0xc000022-[cf烟雾头]cf怎么调烟雾头

11-06 qizideyouhuo:应用程序无法正常启动0xc0000

11-06 ipz-185:win7系统vcf文件怎么打开

11-06 傻哥蹦迪:win10系统s4怎么打开usb调试

11-06 八神浩树gtaste:回收站清空了怎么恢复

11-06 妖尾之黑色守护:win10系统电脑没有1440x900

11-06 校园至尊魔王小说:win7系统浏览网页时字体

11-06 女斗合众国:win10系统访问共享文件夹提示请

11-06 tokyo hot n0654:恢复win7系统默认字体一招

11-06 雨酷仙境:设置win7系统转移临时文件夹腾出

11-06 阿穆纳伊之杖:win7系统开始菜单在右边还原

11-06 tunespotting:win10系统火狐flash插件总是

11-06 甘尔葛分析师：计谋网站seo关键词暴涨有什

11-06 蔡贵霖: 计谋网站seo关键词暴涨有什么秘密

11-06 博益网首页:ao3网页版进入不了解决方法

11-06 漏斗子专栏: 网站数据分析小白易懂精华篇

11-06 见证双虹怎么做:win7系统开启telnet命令的

11-06 颾狐蝶蜋:系统资源不足无法完成请求的服务

11-06 国光中学校歌:提交网站到alexa查询详细步骤

11-06 西安有情天:静态网页和动态网页的区别

11-06 红木雅尚斋:外部链接构造对网站的好处

11-06 前官礼遇：防止域名劫持–增强域安全性的10

11-06 密传二转答案: 中文分词算法有哪些

11-06 金泉家园邮编:百度快照劫持的表现及应对方

C++ MPI_Alltoall函数代码示例发布时间：2022-05-30

C++ MPI_Alloc_mem函数代码示例发布时间：2022-05-30

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

2023-10-27

florent37/ViewAnimator: A fluent Android

2022-08-15

florent37/Shrine-MaterialDesign2: implem

2022-08-17

CVE-2020-36276

2022-09-23

SimpleSoftwareIO/simple-sms: Send and re

2022-08-13

阅读排行榜

1 六六分期app的软件客服如何联系？(六六分期

六六分期app的软件客服如何联系？不知道吗？加qq群【895510560】即可！标题：六六分期

阅读：18216|2023-10-27

2 可心卡盟:win10系统火狐flash插件崩溃怎么

今天小编告诉大家如何处理win10系统火狐flash插件总是崩溃的问题，可能很多用户都不知

阅读：9658|2022-11-06

3 亲亲特价:怎么删除回收站图标

今天小编告诉大家如何对win10系统删除桌面回收站图标进行设置，可能很多用户都不知道

阅读：8172|2022-11-06

4 济南大学虚拟社区:鲁大师节能降温的具体办

今天小编告诉大家如何对win10系统电脑设置节能降温的设置方法，想必大家都遇到过需要

阅读：8543|2022-11-06

5 xlueops.exe:无线网络安装向导

我们在使用xp系统的过程中,经常需要对xp系统无线网络安装向导设置进行设置，可能很多

阅读：8449|2022-11-06

6 女斗合众国:win7系统cf与主机连接不稳定怎

今天小编告诉大家如何处理win7系统玩cf老是与主机连接不稳定的问题，可能很多用户都不

阅读：9375|2022-11-06

7 0xc000022-[cf烟雾头]cf怎么调烟雾头

电脑对日常生活的重要性小编就不多说了，可是一旦碰到win7系统设置cf烟雾头的问题，很

阅读：8420|2022-11-06

8 qizideyouhuo:应用程序无法正常启动0xc0000

我们在日常使用电脑的时候，有的小伙伴们可能在打开应用的时候会遇见提示应用程序无法

阅读：7855|2022-11-06

9 ipz-185:win7系统vcf文件怎么打开

今天小编告诉大家如何对win7系统打开vcf文件进行设置，可能很多用户都不知道怎么对win

阅读：8405|2022-11-06

10 傻哥蹦迪:win10系统s4怎么打开usb调试

今天小编告诉大家如何对win10系统s4开启USB调试模式进行设置，可能很多用户都不知道怎

阅读：7391|2022-11-06

客服电话

电子邮件

C++ MPI_Allreduce函数代码示例

示例1: main

示例2: triangulateAndFormProl

示例3: isFinished

示例4: poisson_main

示例5: cell_mindt

示例6: gmx_check_thread_affinity_set

示例7: hunt_problem

示例8: main

示例9: LU_decomp

示例10: DMPlexPreallocateOperator

示例11: MPI_Comm_rank

示例12: run_bfs

示例13: star_density

示例14: heavyEdgeMatchAgg

请发表评论

全部评论

上一篇：

下一篇：

dphi-official/Machine_Learning_Bootcamp

Delphi Variants-VarIsEmpty、VarIsNull

nrk/redis-lua: A Lua client library for

win7系统注册表编辑器打开的操作方法

affenwiesel/matlab-formatter-vscode

剪的笔顺,诠释剪的笔画,认识剪的部首

六六分期app的软件客服如何联系？(六六分期

florent37/ViewAnimator: A fluent Android

florent37/Shrine-MaterialDesign2: implem

CVE-2020-36276

SimpleSoftwareIO/simple-sms: Send and re

关于我们

产品与服务

解决方案

139-2527-9053