#include #include #include #include #include "poissn.h" /***/ #define NITER 10000 #define STEPITER 1000 #define MX 20000 static float *f[MX], *pf, *cf[MX]; static float *rhs[MX], *prhs, *crhs[MX]; static int my_number, n_of_nodes, totalmx, partmx, leftmx, mx, my; static FILE *fp; static float *pcof[2], *corhs; static int icof; /***/ int main( int argc, char **argv ) { int i, j, n; double howlong; float *pprev, *pnext; float addcoeff[10]; /***/ shmem_init( &argc, &argv ); my_number = shmem_my_pe(); n_of_nodes = shmem_n_pes(); if ( argc != 3 ) { if ( !my_number ) { fprintf( stderr, "Usage: %s \n", argv[0] ); } return (-1); } totalmx = mx = (int)atol( argv[1] ); my = (int)atol( argv[2] ); if ( my < 1 ) { if ( !my_number ) { fprintf( stderr, "Number of columns (%d) should be positive\n", my ); } return (-1); } /* Compute the number of rows per node. It should be even for all but the last: */ mx = mx/n_of_nodes; if ( mx%2 ) mx++; /* This is the number of rows for all but the last: */ partmx = mx; /* This is the number of rows for the last: */ /* It cannot be greater than partmx, but it can be non-positive: */ leftmx = totalmx - partmx*(n_of_nodes-1); if ( leftmx < 1 ) { if ( !my_number ) { fprintf( stderr, "Cannot distribute rows, too many processors\n" ); } return (-1); } if ( my_number == (n_of_nodes-1) ) mx = leftmx; /* End rows distribution. */ partmx += 2; mx += 2; my += 2; pf = (float*)emalloc( (long)(mx*my*sizeof(*pf)) ); prhs = (float*)emalloc( (long)(mx*my*sizeof(*prhs)) ); if ( (!pf) || (!prhs) ) { fprintf( stderr, "No memory in node %d\n", my_number ); return (-1); } for ( i = 0; i < mx; i++ ) { f[i] = pf + i*my; rhs[i] = prhs + i*my; } shmem_coarray_all( (void*)pf, (long)(mx*my*sizeof(*pf)), (void**)cf ); shmem_coarray_all( (void*)prhs, (long)(mx*my*sizeof(*prhs)), (void**)crhs ); if ( !my_number ) { printf( "Solving heat conduction task on %d by %d grid by %d processors\n", totalmx, my-2, n_of_nodes ); fflush( stdout ); } initval( f, rhs, mx, my, 1.0, 0.0, 0.0, 0.0 ); allocate( mx, my, pcof, &corhs ); icof = 0; upload( f, rhs, mx, my, pcof[icof], corhs ); upload( f, rhs, mx, my, pcof[1-icof], 0 ); /* Compute the addresses where to copy data: */ if ( my_number == 0 ) pprev = 0; else pprev = cf[my_number-1]+my*(partmx-1)+1; if ( my_number == (n_of_nodes-1) ) pnext = 0; else pnext = cf[my_number+1]+1; /* Iteration loop: */ howlong = shmem_time(); for ( n = 0; n < NITER; n++ ) { if ( !my_number ) { if ( !(n%STEPITER) ) printf( "Iteration %d\n", n ); } /* Do all the transfers: */ shmem_barrier_all(); if ( my_number > 0 ) { download( f+1, rhs+1, 1, my, pcof[icof]+my, 0 ); shmem_float_put( pprev, &f[1][1], my-2, my_number-1 ); } if ( my_number < (n_of_nodes-1) ) { download( f+(mx-2), rhs+(mx-2), 1, my, pcof[icof]+(mx-2)*my, 0 ); shmem_float_put( pnext, &f[mx-2][1], my-2, my_number+1 ); } shmem_barrier_all(); upload( f, rhs, 1, my, pcof[icof], 0 ); upload( f+(mx-1), rhs+(mx-1), 1, my, pcof[icof]+(mx-1)*my, 0 ); /* Step of calculation starts here: */ poissn( f, rhs, pcof, icof, corhs, mx, my, 1.0, 1.0, addcoeff ); icof = 1 - icof; } if ( !my_number ) { printf( "Elapsed time: %f sec\n", (float)(shmem_time()-howlong) ); fp = fopen( "progrev_shmem_cuda.dat", "w" ); fclose( fp ); } download( f, rhs, mx, my, pcof[icof], 0 ); for ( j = 0; j < n_of_nodes; j++ ) { shmem_barrier_all(); if ( j == my_number ) { fp = fopen( "progrev_shmem_cuda.dat", "a" ); for ( i = 1; i < (mx-1); i++ ) { fwrite( &f[i][1], my-2, sizeof(f[0][0]), fp ); // int k; // for ( k = 1; k < my-1; k++ ) fprintf( fp, "%f ", f[i][k] ); // fprintf( fp, "\n" ); } fclose( fp ); } } shmem_finalize(); return 0; }