Commit 36d192c6 authored by Johannes Spazier's avatar Johannes Spazier

- Added switch "-adjust-ztop" to enable ZTOP-patch dynamically.

- Added some time measurement and a verbose mode that can be enabled with "-verbose".
parent 6d88cc91
...@@ -11,6 +11,20 @@ ...@@ -11,6 +11,20 @@
CNode *gNode; CNode *gNode;
double diff(timespec start, timespec end) {
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return (double)((double)temp.tv_nsec / 1000000000.0 + (double)temp.tv_sec);
}
int commandLineHelp( void ); int commandLineHelp( void );
int main( int argc, char **argv ) int main( int argc, char **argv )
...@@ -64,6 +78,9 @@ int main( int argc, char **argv ) ...@@ -64,6 +78,9 @@ int main( int argc, char **argv )
// Main loop // Main loop
Log.print("Starting main loop..."); Log.print("Starting main loop...");
timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
for( Par.time=0,loop=1,lastProgress=Par.outProgress,lastPropagation=Par.outPropagation,lastDump=0; for( Par.time=0,loop=1,lastProgress=Par.outProgress,lastPropagation=Par.outPropagation,lastDump=0;
Par.time<=Par.timeMax; loop++,Par.time+=Par.dt,lastProgress+=Par.dt,lastPropagation+=Par.dt ) { Par.time<=Par.timeMax; loop++,Par.time+=Par.dt,lastProgress+=Par.dt,lastPropagation+=Par.dt ) {
...@@ -103,6 +120,7 @@ int main( int argc, char **argv ) ...@@ -103,6 +120,7 @@ int main( int argc, char **argv )
} }
} // main loop } // main loop
clock_gettime(CLOCK_MONOTONIC, &end);
Log.print("Finishing main loop"); Log.print("Finishing main loop");
/* TODO: check if theses calls can be combined */ /* TODO: check if theses calls can be combined */
...@@ -116,6 +134,8 @@ int main( int argc, char **argv ) ...@@ -116,6 +134,8 @@ int main( int argc, char **argv )
Node.freeMem(); Node.freeMem();
printf_v("Runtime: %.3lf\n", diff(start, end) * 1000.0);
delete gNode; delete gNode;
return 0; return 0;
...@@ -148,6 +168,7 @@ int commandLineHelp( void ) ...@@ -148,6 +168,7 @@ int commandLineHelp( void )
printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" ); printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" );
printf( " negative value considered as relative threshold\n" ); printf( " negative value considered as relative threshold\n" );
printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" ); printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" );
printf( "-verbose generate verbose output on stdout\n" );
printf( "\nExample:\n" ); printf( "\nExample:\n" );
printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" ); printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" );
......
...@@ -47,6 +47,8 @@ struct EWPARAMS { ...@@ -47,6 +47,8 @@ struct EWPARAMS {
float sshTransparencyThreshold; float sshTransparencyThreshold;
float sshArrivalThreshold; float sshArrivalThreshold;
bool gpu; bool gpu;
bool adjustZtop;
bool verbose;
}; };
extern struct EWPARAMS Par; extern struct EWPARAMS Par;
...@@ -84,11 +86,11 @@ int ewSavePOIs(); ...@@ -84,11 +86,11 @@ int ewSavePOIs();
int ewDumpPOIs(); int ewDumpPOIs();
int ewDumpPOIsCompact( int istage ); int ewDumpPOIsCompact( int istage );
/* verbose printf: only executed if -verbose was set */
#define printf_v( Args, ... ) if( Par.verbose ) printf( Args, ##__VA_ARGS__);
#include "ewNode.h" #include "ewNode.h"
extern CNode *gNode; extern CNode *gNode;
/* unset to remove ZTOP_PATCH */
#define ZTOP_PATCH
#endif /* EASYWAVE_H */ #endif /* EASYWAVE_H */
...@@ -5,6 +5,13 @@ CGpuNode::CGpuNode() { ...@@ -5,6 +5,13 @@ CGpuNode::CGpuNode() {
pitch = 0; pitch = 0;
copied = true; copied = true;
for( int i = 0; i < 5; i++ ) {
cudaEventCreate( &(evtStart[i]) );
cudaEventCreate( &(evtEnd[i]) );
dur[i] = 0.0;
}
} }
int CGpuNode::mallocMem() { int CGpuNode::mallocMem() {
...@@ -127,6 +134,13 @@ int CGpuNode::freeMem() { ...@@ -127,6 +134,13 @@ int CGpuNode::freeMem() {
CUDA_CALL( cudaFree( data.cB3 ) ); CUDA_CALL( cudaFree( data.cB3 ) );
CUDA_CALL( cudaFree( data.cB4 ) ); CUDA_CALL( cudaFree( data.cB4 ) );
float total_dur = 0.f;
for( int j = 0; j < 5; j++ ) {
printf_v("Duration %u: %.3f\n", j, dur[j]);
total_dur += dur[j];
}
printf_v("Duration total: %.3f\n",total_dur);
CArrayNode::freeMem(); CArrayNode::freeMem();
return 0; return 0;
...@@ -150,23 +164,40 @@ int CGpuNode::run() { ...@@ -150,23 +164,40 @@ int CGpuNode::run() {
dp.mTime = Par.time; dp.mTime = Par.time;
CUDA_CALL( cudaEventRecord( evtStart[0], 0 ) );
runWaveUpdateKernel<<<blocks,threads>>>( data ); runWaveUpdateKernel<<<blocks,threads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[0], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[1], 0 ) );
runWaveBoundaryKernel<<<nBlocks,nThreads>>>( data ); runWaveBoundaryKernel<<<nBlocks,nThreads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[1], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[2], 0 ) );
runFluxUpdateKernel<<<blocks,threads>>>( data ); runFluxUpdateKernel<<<blocks,threads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[2], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[3], 0 ) );
runFluxBoundaryKernel<<<nBlocks,nThreads>>>( data ); runFluxBoundaryKernel<<<nBlocks,nThreads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[3], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[4], 0 ) );
runGridExtendKernel1<<<nBlocks,nThreads>>>( data ); runGridExtendKernel1<<<nBlocks,nThreads>>>( data );
runGridExtendKernel2<<<1,1>>>( data ); runGridExtendKernel2<<<1,1>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[4], 0 ) );
void *g_mm; void *g_mm;
CUDA_CALL( cudaGetSymbolAddress( &g_mm, g_MinMax ) ); CUDA_CALL( cudaGetSymbolAddress( &g_mm, g_MinMax ) );
int4 MinMax; int4 MinMax;
CUDA_CALL( cudaMemcpy( &MinMax, g_mm, sizeof(int4), cudaMemcpyDeviceToHost ) ); CUDA_CALL( cudaMemcpy( &MinMax, g_mm, sizeof(int4), cudaMemcpyDeviceToHost ) );
cudaDeviceSynchronize();
Imin = dp.iMin = MinMax.x; Imin = dp.iMin = MinMax.x;
Imax = dp.iMax = MinMax.y; Imax = dp.iMax = MinMax.y;
Jmin = dp.jMin = MinMax.z; Jmin = dp.jMin = MinMax.z;
Jmax = dp.jMax = MinMax.w; Jmax = dp.jMax = MinMax.w;
float _dur;
for( int j = 0; j < 5; j++ ) {
cudaEventElapsedTime( &_dur, evtStart[j], evtEnd[j]);
dur[j] += _dur;
}
/* data has changed now -> copy becomes necessary */ /* data has changed now -> copy becomes necessary */
copied = false; copied = false;
......
...@@ -71,6 +71,10 @@ protected: ...@@ -71,6 +71,10 @@ protected:
/* specifies if data was already copied in the current calculation step */ /* specifies if data was already copied in the current calculation step */
bool copied; bool copied;
cudaEvent_t evtStart[5];
cudaEvent_t evtEnd[5];
float dur[5];
public: public:
CGpuNode(); CGpuNode();
int mallocMem(); int mallocMem();
......
...@@ -150,6 +150,16 @@ int ewParam( int argc, char **argv ) ...@@ -150,6 +150,16 @@ int ewParam( int argc, char **argv )
else else
Par.gpu = false; Par.gpu = false;
if( ( argn = utlCheckCommandLineOption( argc, argv, "adjust_ztop", 11 ) ) != 0 )
Par.adjustZtop = true;
else
Par.adjustZtop = false;
if( ( argn = utlCheckCommandLineOption( argc, argv, "verbose", 7 ) ) != 0 )
Par.verbose = true;
else
Par.verbose = false;
return 0; return 0;
} }
......
...@@ -49,51 +49,55 @@ int ewSource() ...@@ -49,51 +49,55 @@ int ewSource()
ierr = eq.read( Par.fileSource ); if(ierr) return ierr; ierr = eq.read( Par.fileSource ); if(ierr) return ierr;
#ifdef ZTOP_PATCH if( Par.adjustZtop ) {
// check fault parameters
Err.disable(); // check fault parameters
ierr = eq.finalizeInput(); Err.disable();
while( ierr ) { ierr = eq.finalizeInput();
i = ierr/10; while( ierr ) {
ierr = ierr - 10*i; i = ierr/10;
if( ierr == FLT_ERR_STRIKE ) { ierr = ierr - 10*i;
Log.print( "No strike on input: Employing effective symmetric source model" ); if( ierr == FLT_ERR_STRIKE ) {
if( eq.nfault > 1 ) { Err.enable(); return Err.post("Symmetric source assumes only 1 fault"); } Log.print( "No strike on input: Employing effective symmetric source model" );
eq.fault[0].strike = 0.; if( eq.nfault > 1 ) { Err.enable(); return Err.post("Symmetric source assumes only 1 fault"); }
effSymSource = 1; eq.fault[0].strike = 0.;
} effSymSource = 1;
else if( ierr == FLT_ERR_ZTOP ) { }
Log.print( "Automatic depth correction to fault top @ 10 km" ); else if( ierr == FLT_ERR_ZTOP ) {
eq.fault[i].depth = eq.fault[i].width/2 * sindeg(eq.fault[i].dip) + 10.e3; Log.print( "Automatic depth correction to fault top @ 10 km" );
} eq.fault[i].depth = eq.fault[i].width/2 * sindeg(eq.fault[i].dip) + 10.e3;
else { }
else {
Err.enable();
return ierr;
}
ierr = eq.finalizeInput();
}
Err.enable(); Err.enable();
return ierr;
} } else {
ierr = eq.finalizeInput();
} // check fault parameters
Err.enable(); Err.disable();
#else ierr = eq.finalizeInput();
// check fault parameters if( ierr ) {
Err.disable(); i = ierr/10;
ierr = eq.finalizeInput(); ierr = ierr - 10*i;
if( ierr ) { if( ierr != FLT_ERR_STRIKE ) {
i = ierr/10; Err.enable();
ierr = ierr - 10*i; ierr = eq.finalizeInput();
if( ierr != FLT_ERR_STRIKE ) { return ierr;
Err.enable(); }
ierr = eq.finalizeInput(); Log.print( "No strike on input: Employing effective symmetric source model" );
return ierr; Err.enable();
} if( eq.nfault > 1 ) return Err.post("symmetric source assumes only 1 fault");
Log.print( "No strike on input: Employing effective symmetric source model" ); eq.fault[0].strike = 0.;
Err.enable(); effSymSource = 1;
if( eq.nfault > 1 ) return Err.post("symmetric source assumes only 1 fault"); ierr = eq.finalizeInput(); if(ierr) return ierr;
eq.fault[0].strike = 0.; }
effSymSource = 1; Err.enable();
ierr = eq.finalizeInput(); if(ierr) return ierr;
} }
Err.enable();
#endif
// calculate uplift on a rectangular grid // calculate uplift on a rectangular grid
// set grid resolution, grid dimensions will be set automatically // set grid resolution, grid dimensions will be set automatically
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment