Commit 36782b77 authored by Johannes Spazier's avatar Johannes Spazier

Fixed double free caused by multiple GPUs.

Made number of real and virtual GPUs adjustable via command line arguments.
parent 92c192e0
......@@ -184,6 +184,8 @@ int commandLineHelp( void )
printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" );
printf( " negative value considered as relative threshold\n" );
printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" );
printf( "-gpu-nreal <n> use n physical GPUs in parallel\n" );
printf( "-gpu-nvirt <m> use m virtual GPUs for each physical one\n" );
printf( "-verbose generate verbose output on stdout\n" );
printf( "\nExample:\n" );
printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" );
......
......@@ -79,6 +79,8 @@ struct EWPARAMS {
float sshTransparencyThreshold;
float sshArrivalThreshold;
bool gpu;
int gpu_nreal;
int gpu_nvirt;
bool adjustZtop;
bool verbose;
};
......
......@@ -46,9 +46,8 @@ CGpuNode::CGpuNode() {
pitch = 0;
copied = true;
/* TODO: make dynamic */
num_virtual_gpus = 4;
num_real_gpus = 2;
num_real_gpus = Par.gpu_nreal;
num_virtual_gpus = Par.gpu_nvirt * num_real_gpus;
vgpus = new VGpu[num_virtual_gpus];
gpus = new Gpu[num_real_gpus];
......@@ -261,7 +260,7 @@ int CGpuNode::copyToGPU() {
/* POI handling. */
int *relIdxs = (int*) malloc( NPOIs * sizeof(int) );
host_idxs = (long*) malloc( NPOIs * sizeof(long) );
poi_data.host_idxs = (long*) malloc( NPOIs * sizeof(long) );
poi_data.num_pois = 0;
poi_data.it = 0;
for(int n = 0; n < NPOIs; n++) {
......@@ -269,7 +268,7 @@ int CGpuNode::copyToGPU() {
int j = idxPOI[n] % data.params.nJ + 1;
if( vgpu.hasLine( i ) ) {
relIdxs[poi_data.num_pois] = vgpu.data.idx( vgpu.getRel(i), j );
host_idxs[poi_data.num_pois] = n;
poi_data.host_idxs[poi_data.num_pois] = n;
poi_data.num_pois++;
}
}
......@@ -346,7 +345,7 @@ int CGpuNode::copyPOIs() {
for(int n = 0; n < poi_data.num_pois; n++) {
for(int i = 0; i < poi_data.it; i++) {
int host_n = host_idxs[n];
int host_n = poi_data.host_idxs[n];
if( flagRunupPOI[host_n] )
ampFactor = pow( d[idxPOI[host_n]], 0.25 );
else
......@@ -398,7 +397,7 @@ int CGpuNode::freeMem() {
CUDA_CALL( cudaFree( poi_data.pois ) );
CUDA_CALL( cudaFree( poi_data.idxs ) );
free( host_idxs );
free( poi_data.host_idxs );
}
cudaFreeHost( extend );
......
......@@ -98,9 +98,10 @@ public:
class PoiData {
public:
float *pois;
int *idxs;
int num_pois;
int it;
int *idxs;
int num_pois;
int it;
long *host_idxs;
};
class Gpu {
......@@ -154,7 +155,6 @@ protected:
Gpu *gpus;
int4 *extend;
long *host_idxs;
/* line size in bytes */
size_t pitch;
......
......@@ -192,10 +192,19 @@ int ewParam( int argc, char **argv )
else
Par.verbose = false;
if( ( argn = utlCheckCommandLineOption( argc, argv, "gpu-nreal", 9 ) ) != 0 )
Par.gpu_nreal = atoi(argv[argn+1]);
else
Par.gpu_nreal = 1;
if( ( argn = utlCheckCommandLineOption( argc, argv, "gpu-nvirt", 9 ) ) != 0 )
Par.gpu_nvirt = atoi(argv[argn+1]);
else
Par.gpu_nvirt = 1;
return 0;
}
void ewLogParams(void)
{
Log.print("\nModel parameters for this simulation:");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment