Commit 327af640 authored by Johannes Spazier's avatar Johannes Spazier

Updated transfer of POIs from GPU to CPU. The different points are now copied individually.

parent 0a06e166
......@@ -87,7 +87,7 @@ int main( int argc, char **argv )
/* FIXME: check if Par.poiDt can be used for those purposes */
if( Par.filePOIs && Par.poiDt && ((Par.time/Par.poiDt)*Par.poiDt == Par.time) ) {
Node.copyIntermediate();
Node.copyPOIs();
ewSavePOIs();
}
......@@ -114,6 +114,7 @@ int main( int argc, char **argv )
if( Par.outDump ) {
if( (elapsed-lastDump) >= Par.outDump ) {
Node.copyIntermediate();
/* FIXME: needs tArr as well */
ewDumpPOIs();
ewDump2D();
lastDump = elapsed;
......
......@@ -86,6 +86,9 @@ int ewSavePOIs();
int ewDumpPOIs();
int ewDumpPOIsCompact( int istage );
extern int NPOIs;
extern long *idxPOI;
/* verbose printf: only executed if -verbose was set */
#define printf_v( Args, ... ) if( Par.verbose ) printf( Args, ##__VA_ARGS__);
......
......@@ -248,7 +248,37 @@ int CGpuNode::copyIntermediate() {
return 0;
}
clock_t cdur = 0;
int CGpuNode::copyPOIs() {
Params& dp = params;
if( copied )
return 0;
VGpu *vgpu;
for( int n = 0; n < NPOIs; n++ ) {
int i = idxPOI[n] / dp.nJ + 1;
int j = idxPOI[n] % dp.nJ + 1;
for( int id = 0; id < num_virtual_gpus; id++ ) {
if( vgpus[id].hasLine( i ) ) {
vgpu = &(vgpus[id]);
break;
}
}
int id = vgpu->data.idx( vgpu->getRel(i), j );
CUDA_CALL( cudaSetDevice( vgpu->dev ) )
CUDA_CALL( cudaMemcpy( h + idxPOI[n], vgpu->data.h + dp.lpad + id, sizeof(float), cudaMemcpyDeviceToHost ) );
}
return 0;
}
int CGpuNode::freeMem() {
......
......@@ -60,7 +60,7 @@ public:
__device__ int ri( int ij ) { return ij + params.pI; }
__device__ int up( int ij ) { return ij + 1; }
__device__ int dn( int ij ) { return ij - 1; }
__device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; }
__host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; }
};
class VGpu {
......@@ -121,6 +121,7 @@ public:
int copyToGPU();
int copyFromGPU();
int copyIntermediate();
int copyPOIs();
int freeMem();
int run();
......
......@@ -16,6 +16,7 @@ public:
virtual int copyToGPU() = 0;
virtual int copyFromGPU() = 0;
virtual int copyIntermediate() = 0;
virtual int copyPOIs() = 0;
virtual int freeMem() = 0;
virtual int run() = 0;
};
......@@ -68,7 +69,7 @@ public:
int copyToGPU() { return 0; }
int copyFromGPU() { return 0; }
int copyIntermediate() { return 0; }
int copyPOIs() { return 0; }
};
#pragma pack(push, 1)
......@@ -154,6 +155,7 @@ public:
virtual int copyToGPU() { return 0; }
virtual int copyFromGPU() { return 0; }
virtual int copyIntermediate() { return 0; }
virtual int copyPOIs() { return 0; }
};
#pragma pack(pop)
......
......@@ -7,9 +7,9 @@
//#define SSHMAX_TO_SINGLE_FILE 0
static int MaxPOIs;
static int NPOIs;
int NPOIs;
static char **idPOI;
static long *idxPOI;
long *idxPOI;
static int *flagRunupPOI;
static int NtPOI;
static int *timePOI;
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment