Commit 36d192c6 authored by Johannes Spazier's avatar Johannes Spazier

- Added switch "-adjust-ztop" to enable ZTOP-patch dynamically.

- Added some time measurement and a verbose mode that can be enabled with "-verbose".
parent 6d88cc91
......@@ -11,6 +11,20 @@
CNode *gNode;
double diff(timespec start, timespec end) {
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return (double)((double)temp.tv_nsec / 1000000000.0 + (double)temp.tv_sec);
}
int commandLineHelp( void );
int main( int argc, char **argv )
......@@ -64,6 +78,9 @@ int main( int argc, char **argv )
// Main loop
Log.print("Starting main loop...");
timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
for( Par.time=0,loop=1,lastProgress=Par.outProgress,lastPropagation=Par.outPropagation,lastDump=0;
Par.time<=Par.timeMax; loop++,Par.time+=Par.dt,lastProgress+=Par.dt,lastPropagation+=Par.dt ) {
......@@ -103,6 +120,7 @@ int main( int argc, char **argv )
}
} // main loop
clock_gettime(CLOCK_MONOTONIC, &end);
Log.print("Finishing main loop");
/* TODO: check if theses calls can be combined */
......@@ -116,6 +134,8 @@ int main( int argc, char **argv )
Node.freeMem();
printf_v("Runtime: %.3lf\n", diff(start, end) * 1000.0);
delete gNode;
return 0;
......@@ -148,6 +168,7 @@ int commandLineHelp( void )
printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" );
printf( " negative value considered as relative threshold\n" );
printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" );
printf( "-verbose generate verbose output on stdout\n" );
printf( "\nExample:\n" );
printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" );
......
......@@ -47,6 +47,8 @@ struct EWPARAMS {
float sshTransparencyThreshold;
float sshArrivalThreshold;
bool gpu;
bool adjustZtop;
bool verbose;
};
extern struct EWPARAMS Par;
......@@ -84,11 +86,11 @@ int ewSavePOIs();
int ewDumpPOIs();
int ewDumpPOIsCompact( int istage );
/* verbose printf: only executed if -verbose was set */
#define printf_v( Args, ... ) if( Par.verbose ) printf( Args, ##__VA_ARGS__);
#include "ewNode.h"
extern CNode *gNode;
/* unset to remove ZTOP_PATCH */
#define ZTOP_PATCH
#endif /* EASYWAVE_H */
......@@ -5,6 +5,13 @@ CGpuNode::CGpuNode() {
pitch = 0;
copied = true;
for( int i = 0; i < 5; i++ ) {
cudaEventCreate( &(evtStart[i]) );
cudaEventCreate( &(evtEnd[i]) );
dur[i] = 0.0;
}
}
int CGpuNode::mallocMem() {
......@@ -127,6 +134,13 @@ int CGpuNode::freeMem() {
CUDA_CALL( cudaFree( data.cB3 ) );
CUDA_CALL( cudaFree( data.cB4 ) );
float total_dur = 0.f;
for( int j = 0; j < 5; j++ ) {
printf_v("Duration %u: %.3f\n", j, dur[j]);
total_dur += dur[j];
}
printf_v("Duration total: %.3f\n",total_dur);
CArrayNode::freeMem();
return 0;
......@@ -150,23 +164,40 @@ int CGpuNode::run() {
dp.mTime = Par.time;
CUDA_CALL( cudaEventRecord( evtStart[0], 0 ) );
runWaveUpdateKernel<<<blocks,threads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[0], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[1], 0 ) );
runWaveBoundaryKernel<<<nBlocks,nThreads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[1], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[2], 0 ) );
runFluxUpdateKernel<<<blocks,threads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[2], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[3], 0 ) );
runFluxBoundaryKernel<<<nBlocks,nThreads>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[3], 0 ) );
CUDA_CALL( cudaEventRecord( evtStart[4], 0 ) );
runGridExtendKernel1<<<nBlocks,nThreads>>>( data );
runGridExtendKernel2<<<1,1>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[4], 0 ) );
void *g_mm;
CUDA_CALL( cudaGetSymbolAddress( &g_mm, g_MinMax ) );
int4 MinMax;
CUDA_CALL( cudaMemcpy( &MinMax, g_mm, sizeof(int4), cudaMemcpyDeviceToHost ) );
cudaDeviceSynchronize();
Imin = dp.iMin = MinMax.x;
Imax = dp.iMax = MinMax.y;
Jmin = dp.jMin = MinMax.z;
Jmax = dp.jMax = MinMax.w;
float _dur;
for( int j = 0; j < 5; j++ ) {
cudaEventElapsedTime( &_dur, evtStart[j], evtEnd[j]);
dur[j] += _dur;
}
/* data has changed now -> copy becomes necessary */
copied = false;
......
......@@ -71,6 +71,10 @@ protected:
/* specifies if data was already copied in the current calculation step */
bool copied;
cudaEvent_t evtStart[5];
cudaEvent_t evtEnd[5];
float dur[5];
public:
CGpuNode();
int mallocMem();
......
......@@ -150,6 +150,16 @@ int ewParam( int argc, char **argv )
else
Par.gpu = false;
if( ( argn = utlCheckCommandLineOption( argc, argv, "adjust_ztop", 11 ) ) != 0 )
Par.adjustZtop = true;
else
Par.adjustZtop = false;
if( ( argn = utlCheckCommandLineOption( argc, argv, "verbose", 7 ) ) != 0 )
Par.verbose = true;
else
Par.verbose = false;
return 0;
}
......
......@@ -49,51 +49,55 @@ int ewSource()
ierr = eq.read( Par.fileSource ); if(ierr) return ierr;
#ifdef ZTOP_PATCH
// check fault parameters
Err.disable();
ierr = eq.finalizeInput();
while( ierr ) {
i = ierr/10;
ierr = ierr - 10*i;
if( ierr == FLT_ERR_STRIKE ) {
Log.print( "No strike on input: Employing effective symmetric source model" );
if( eq.nfault > 1 ) { Err.enable(); return Err.post("Symmetric source assumes only 1 fault"); }
eq.fault[0].strike = 0.;
effSymSource = 1;
}
else if( ierr == FLT_ERR_ZTOP ) {
Log.print( "Automatic depth correction to fault top @ 10 km" );
eq.fault[i].depth = eq.fault[i].width/2 * sindeg(eq.fault[i].dip) + 10.e3;
}
else {
if( Par.adjustZtop ) {
// check fault parameters
Err.disable();
ierr = eq.finalizeInput();
while( ierr ) {
i = ierr/10;
ierr = ierr - 10*i;
if( ierr == FLT_ERR_STRIKE ) {
Log.print( "No strike on input: Employing effective symmetric source model" );
if( eq.nfault > 1 ) { Err.enable(); return Err.post("Symmetric source assumes only 1 fault"); }
eq.fault[0].strike = 0.;
effSymSource = 1;
}
else if( ierr == FLT_ERR_ZTOP ) {
Log.print( "Automatic depth correction to fault top @ 10 km" );
eq.fault[i].depth = eq.fault[i].width/2 * sindeg(eq.fault[i].dip) + 10.e3;
}
else {
Err.enable();
return ierr;
}
ierr = eq.finalizeInput();
}
Err.enable();
return ierr;
}
ierr = eq.finalizeInput();
}
Err.enable();
#else
// check fault parameters
Err.disable();
ierr = eq.finalizeInput();
if( ierr ) {
i = ierr/10;
ierr = ierr - 10*i;
if( ierr != FLT_ERR_STRIKE ) {
Err.enable();
ierr = eq.finalizeInput();
return ierr;
}
Log.print( "No strike on input: Employing effective symmetric source model" );
Err.enable();
if( eq.nfault > 1 ) return Err.post("symmetric source assumes only 1 fault");
eq.fault[0].strike = 0.;
effSymSource = 1;
ierr = eq.finalizeInput(); if(ierr) return ierr;
} else {
// check fault parameters
Err.disable();
ierr = eq.finalizeInput();
if( ierr ) {
i = ierr/10;
ierr = ierr - 10*i;
if( ierr != FLT_ERR_STRIKE ) {
Err.enable();
ierr = eq.finalizeInput();
return ierr;
}
Log.print( "No strike on input: Employing effective symmetric source model" );
Err.enable();
if( eq.nfault > 1 ) return Err.post("symmetric source assumes only 1 fault");
eq.fault[0].strike = 0.;
effSymSource = 1;
ierr = eq.finalizeInput(); if(ierr) return ierr;
}
Err.enable();
}
Err.enable();
#endif
// calculate uplift on a rectangular grid
// set grid resolution, grid dimensions will be set automatically
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment