diff --git a/cpu/branches/web/EasyWave.cu b/cpu/branches/web/EasyWave.cu new file mode 100644 index 0000000000000000000000000000000000000000..81c6a042e3cc321beaeb5ec86d6737d3b5b9012c --- /dev/null +++ b/cpu/branches/web/EasyWave.cu @@ -0,0 +1,178 @@ +#define HEADER "\neasyWave ver.2013-04-11\n" + +#include +#include +#include +#include +#include +#include "easywave.h" + +#include "ewGpuNode.cuh" + +CNode *gNode; + +double diff(timespec start, timespec end) { + + timespec temp; + if ((end.tv_nsec-start.tv_nsec)<0) { + temp.tv_sec = end.tv_sec-start.tv_sec-1; + temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec; + } else { + temp.tv_sec = end.tv_sec-start.tv_sec; + temp.tv_nsec = end.tv_nsec-start.tv_nsec; + } + + return (double)((double)temp.tv_nsec / 1000000000.0 + (double)temp.tv_sec); +} + +int commandLineHelp( void ); + +int main( int argc, char **argv ) +{ + char buf[1024]; + int ierr,argn,elapsed; + int lastProgress,lastPropagation,lastDump; + int loop; + + printf(HEADER); + Err.setchannel(MSG_OUTFILE); + + // Read parameters from command line and use default + ierr = ewParam( argc, argv ); if(ierr) return commandLineHelp(); + + // Log command line + /* FIXME: buffer overflow */ + sprintf( buf, "Command line: " ); + for( argn=1; argn= Par.outProgress ) { + printf( "Model time = %s, elapsed: %ld msec\n", utlTimeSplitString(Par.time), clock()/(CLOCKS_PER_SEC/1000) ); + Log.print( "Model time = %s, elapsed: %ld msec", utlTimeSplitString(Par.time), clock()/(CLOCKS_PER_SEC/1000) ); + lastProgress = 0; + } + } + + if( Par.outPropagation ) { + if( lastPropagation >= Par.outPropagation ) { + Node.copyIntermediate(); + ewOut2D(); + lastPropagation = 0; + } + } + + if( Par.outDump ) { + if( (elapsed-lastDump) >= Par.outDump ) { + Node.copyIntermediate(); + /* FIXME: needs tArr as well */ + ewDumpPOIs(); + ewDump2D(); + lastDump = elapsed; + } + } + + } // main loop + clock_gettime(CLOCK_MONOTONIC, &end); + Log.print("Finishing main loop"); + + /* TODO: check if theses calls can be combined */ + Node.copyIntermediate(); + Node.copyFromGPU(); + + // Final output + Log.print("Final dump..."); + ewDumpPOIs(); + ewDump2D(); + + Node.freeMem(); + + printf_v("Runtime: %.3lf\n", diff(start, end) * 1000.0); + + delete gNode; + + return 0; +} + + +//======================================================================== +int commandLineHelp( void ) +{ + printf( "Usage: easyWave -grid ... -source ... -time ... [optional parameters]\n" ); + printf( "-grid ... bathymetry in GoldenSoftware(C) GRD format (text or binary)\n" ); + printf( "-source ... input wave either als GRD-file or file with Okada faults\n" ); + printf( "-time ... simulation time in [min]\n" ); + printf( "Optional parameters:\n" ); + printf( "-step ... simulation time step, default- estimated from bathymetry\n" ); + printf( "-coriolis use Coriolis fource, default- no\n" ); + printf( "-poi ... POIs file\n" ); + printf( "-label ... model name, default- 'eWave'\n" ); + printf( "-progress ... show simulation progress each ... minutes, default- 10\n" ); + printf( "-propagation ... write wave propagation grid each ... minutes, default- 5\n" ); + printf( "-dump ... make solution dump each ... physical seconds, default- 0\n" ); + printf( "-nolog deactivate logging\n" ); + printf( "-poi_dt_out ... output time step for mariograms in [sec], default- 30\n" ); + printf( "-poi_search_dist ... in [km], default- 10\n" ); + printf( "-poi_min_depth ... in [m], default- 1\n" ); + printf( "-poi_max_depth ... in [m], default- 10 000\n" ); + printf( "-poi_report enable POIs loading report, default- disabled\n" ); + printf( "-ssh0_rel ... relative threshold for initial wave, default- 0.01\n" ); + printf( "-ssh0_abs ... absolute threshold for initial wave in [m], default- 0\n" ); + printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" ); + printf( " negative value considered as relative threshold\n" ); + printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" ); + printf( "-verbose generate verbose output on stdout\n" ); + printf( "\nExample:\n" ); + printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" ); + + return -1; +} diff --git a/cpu/branches/web/Makefile b/cpu/branches/web/Makefile new file mode 100644 index 0000000000000000000000000000000000000000..11b3694ee7441d39877b2990fcd2f7edd5dc9b22 --- /dev/null +++ b/cpu/branches/web/Makefile @@ -0,0 +1,33 @@ +-include ../../../Makefile.inc + +CC=g++ +NVCC=nvcc +CFLAGS=-O3 -I. + +ARCH?=compute_20 +CODE?=sm_21 +NVFLAGS=$(CFLAGS) -gencode arch=$(ARCH),code=$(CODE) + +CPPS=$(wildcard *.cpp) +CUS=$(wildcard *.cu) + +CU_OBJS=$(patsubst %.cu,%.o,$(CUS) ) + +OBJECTS=$(patsubst %.cpp, %.o, $(CPPS) ) $(patsubst %.cu,%.o,$(CUS) ) + +all: EasyWave + +EasyWave: $(OBJECTS) link.o + $(NVCC) -o $@ $^ + +%.o: %.cpp *.h + $(CC) -c $(CFLAGS) -o $@ $< + +%.o: %.cu *.cuh *.h + $(NVCC) -dc $(NVFLAGS) -x cu -o $@ $< + +link.o: $(CU_OBJS) + $(NVCC) -dlink $(NVFLAGS) -o $@ $^ + +clean: + rm -f EasyWave *.o diff --git a/cpu/branches/web/Makefile_debug b/cpu/branches/web/Makefile_debug new file mode 100644 index 0000000000000000000000000000000000000000..201daee16fa8288a366fde03eda28e44b90e90d4 --- /dev/null +++ b/cpu/branches/web/Makefile_debug @@ -0,0 +1,31 @@ +CC=g++ +NVCC=nvcc +CFLAGS=-O3 -I. -g + +CODE=sm_35 +ARCH=compute_35 +NVFLAGS=$(CFLAGS) -gencode arch=$(ARCH),code=$(CODE) + +CPPS=$(wildcard *.cpp) +CUS=$(wildcard *.cu) + +CU_OBJS=$(patsubst %.cu,%.o,$(CUS) ) + +OBJECTS=$(patsubst %.cpp, %.o, $(CPPS) ) $(patsubst %.cu,%.o,$(CUS) ) + +all: EasyWave + +EasyWave: $(OBJECTS) link.o + $(NVCC) -g -G -o $@ $^ + +%.o: %.cpp *.h + $(CC) -c $(CFLAGS) -o $@ $< + +%.o: %.cu *.cuh *.h + $(NVCC) -G -dc $(NVFLAGS) -x cu -o $@ $< + +link.o: $(CU_OBJS) + $(NVCC) -G -dlink $(NVFLAGS) -o $@ $^ + +clean: + rm -f EasyWave *.o diff --git a/cpu/branches/web/cOgrd.cpp b/cpu/branches/web/cOgrd.cpp new file mode 100644 index 0000000000000000000000000000000000000000..93eff486a548d6371131d984792ed04e3eef50c1 --- /dev/null +++ b/cpu/branches/web/cOgrd.cpp @@ -0,0 +1,1159 @@ +#include +#include +#include +#include +#include + +#define DEFAULT_NOVAL 0.0 +#define ROWWISE 1 +#define COLWISE 2 +#define TOP2BOT -1 +#define BOT2TOP 1 + + +int iabs( int i ) +{ + return( (i<0) ? (-i) : i ); +} + + +//========================================================================= +// Zero Constructor +cOgrd::cOgrd( ) +{ + xmin = xmax = dx = 0.; + nx = 0; + ymin = ymax = dy = 0.; + ny = 0; + nnod = 0; + noval = DEFAULT_NOVAL; + val = NULL; +} + + +//========================================================================= +// Copy constructor similar to operator = +cOgrd::cOgrd( const cOgrd& grd ) +{ + xmin=grd.xmin; xmax=grd.xmax; dx=grd.dx; nx=grd.nx; + ymin=grd.ymin; ymax=grd.ymax; dy=grd.dy; ny=grd.ny; + nnod=grd.nnod; + noval=grd.noval; + + val = new double[nnod]; + memcpy( val, grd.val, nnod*sizeof(double) ); +} + + +//========================================================================= +cOgrd::cOgrd( double xmin0, double xmax0, int nx0, double ymin0, double ymax0, int ny0 ) +{ + noval = DEFAULT_NOVAL; + val = NULL; + initialize( xmin0, xmax0, nx0, ymin0, ymax0, ny0 ); +} + + +//========================================================================= +cOgrd::cOgrd( double xmin0, double xmax0, double dx0, double ymin0, double ymax0, double dy0 ) +{ + noval = DEFAULT_NOVAL; + val = NULL; + initialize( xmin0, xmax0, dx0, ymin0, ymax0, dy0 ); +} + + +//========================================================================= +// Destructor +cOgrd::~cOgrd() +{ + if(val) delete [] val; +} + + +//========================================================================= +// Initialize with number of nodes +int cOgrd::initialize( double xmin0, double xmax0, int nx0, double ymin0, double ymax0, int ny0 ) +{ + + xmin = xmin0; + xmax = xmax0; + nx = nx0; + dx = (xmax-xmin)/(nx-1); + + ymin = ymin0; + ymax = ymax0; + ny = ny0; + dy = (ymax-ymin)/(ny-1); + + nnod = nx*ny; + + if( val ) delete [] val; + + val = new double[nnod]; + for( int l=0; l xmax ) xmax = x; + if( y < ymin ) ymin = y; + if( y > ymax ) ymax = y; + } + + // Read first two lines and define if file is row- or column-wise + rewind( fp ); + line = 0; + utlReadNextDataRecord( fp, record, &line ); + sscanf( record, "%lf %lf %lf", &x0, &y0, &z ); + utlReadNextDataRecord( fp, record, &line ); + sscanf( record, "%lf %lf %lf", &x, &y, &z ); + if( x0==x && y0!=y ) + ordering = COLWISE; + else if( x0!=x && y0==y ) + ordering = ROWWISE; + else + return Err.post(Err.msgReadFile( fname, line, "Cannot recognise data ordering" )); + + // Define nx and ny + rewind( fp ); + line = 0; + utlReadNextDataRecord( fp, record, &line ); + sscanf( record, "%lf %lf %lf", &x0, &y0, &z ); + if( ordering == ROWWISE ) { + nx = 1; + while( utlReadNextDataRecord( fp, record, &line ) != EOF ) { + sscanf( record, "%lf %lf %lf", &x, &y, &z ); + if( y != y0 ) break; + nx++; + } + ny = nxny / nx; + if( y > y0 ) + ydirection = BOT2TOP; + else + ydirection = TOP2BOT; + + } + else if( ordering == COLWISE ) { + ny = 1; + while( utlReadNextDataRecord( fp, record, &line ) != EOF ) { + sscanf( record, "%lf %lf %lf", &x, &y, &z ); + if( x != x0 ) break; + if( ny == 1 ) { + if( y > y0 ) + ydirection = BOT2TOP; + else + ydirection = TOP2BOT; + } + ny++; + } + nx = nxny / ny; + } + + if( nx*ny != nxny ) + return Err.post( "cOgrd::readXYZ -> nx*ny != nxny" ); + + // Other grid parameters + dx = (xmax-xmin)/(nx-1); + dy = (ymax-ymin)/(ny-1); + + nnod = nx*ny; + + // Allocate memory for z-values + if( val ) delete [] val; + val = new double[nnod]; + for( int l=0; l=0; j-- ) { + for( i=0; i=0; j-- ) { + utlReadNextDataRecord( fp, record, &line ); + sscanf( record, "%*s %*s %lf", &val[idx(i,j)] ); + } + } + } + } + + + fclose( fp ); + + return 0; +} + + +//========================================================================= +// Read grid from ASCII-raster file +int cOgrd::readRasterStream( FILE *fp, int ordering, int ydirection ) +{ + int i,j; + double x0,x,y0,y,z; + + + if( ordering == ROWWISE ) { + if( ydirection == BOT2TOP ) { + for( j=0; j=0; j-- ) + for( i=0; i=0; j-- ) + if( fscanf( fp, "%lf", &val[idx(i,j)] ) != 1 ) return Err.post("Unexpected EOF"); + } + else return Err.post("Unexpected direction"); + } + else return Err.post("Unexpected Ordering"); + + return 0; +} + + +//========================================================================= +// Cut-off a subgrid +cOgrd* cOgrd::extract( int i1, int i2, int j1, int j2 ) +{ + cOgrd *grd; + + if( i1 < 0 || i2 > (nx-1) || j1 < 0 || j2 > (ny-1) ) { Err.post("subGrid: bad ranges"); return NULL; } + + grd = new cOgrd( getX(i1,0), getX(i2,0), (i2-i1+1), getY(0,j1), getY(0,j2), (j2-j1+1) ); + + for( int i=i1; i<=i2; i++ ) { + for( int j=j1; j<=j2; j++ ) { + (*grd).setVal( getVal(i,j), (i-i1), (j-j1) ); + } + } + + return grd; +} + + +//========================================================================= +// Cut-off a subgrid +cOgrd* cOgrd::extract( double x1, double x2, double y1, double y2 ) +{ + int i1,i2,j1,j2; + + i1 = (int)((x1-xmin)/dx); if( i1 < 0 ) i1 = 0; + i2 = (int)((x2-xmin)/dx); if( i2 > (nx-1) ) i2 = nx-1; + j1 = (int)((y1-ymin)/dy); if( j1 < 0 ) j1 = 0; + j2 = (int)((y2-ymin)/dy); if( j2 > (ny-1) ) j2 = ny-1; + + return extract( i1, i2, j1, j2 ); +} + +//========================================================================= +// Total reset +void cOgrd::reset() +{ + xmin = xmax = dx = 0.; + nx = 0; + ymin = ymax = dy = 0.; + ny = 0; + + nnod = 0; + noval = DEFAULT_NOVAL; + + if( val != NULL ) delete [] val; + val = NULL; +} + + +//========================================================================= +// Reset values to zero +void cOgrd::resetVal() +{ + for( int l=0; l nx-1 ) i2 = nx-1; + j1 = (int)((grd.ymin-ymin)/dy); + if( j1 < 0 ) j1 = 0; + j2 = (int)((grd.ymax-ymin)/dy) + 1; + if( j2 > ny-1 ) j2 = ny-1; + for( i=i1; i<=i2; i++ ) { + for( j=j1; j<=j2; j++ ) { + val[idx(i,j)] += grd.getVal( getX(i,j), getY(i,j) ); + } + } + } + + return *this; +} + + +//========================================================================= +double& cOgrd::operator() ( int i, int j ) +{ + return( val[idx(i,j)] ); +} + + +//========================================================================= +double& cOgrd::operator() ( int l ) +{ + return( val[l] ); +} + + +//========================================================================= +// Get IJ-indices from the offset +void cOgrd::getIJ( int idx, int& i, int& j ) +{ + + // J is the inner loop (increments faster than I) + i = idx/ny; + j = idx - i*ny; + +} + + +//========================================================================= +// Get IJ-indices from coordinates +int cOgrd::getIJ( double x, double y, int& i, int& j ) +{ + i = (int)( (x-xmin)/(xmax-xmin)*nx ); + if( i<0 || i>(nx-1) ) return -1; + j = (int)( (y-ymin)/(ymax-ymin)*ny ); + if( j<0 || j>(ny-1) ) return -1; + + return 0; +} + + +//========================================================================= +// Get offset from IJ-indices +int cOgrd::idx( int i, int j ) +{ + + // J is the inner loop (increments faster than I) + return( int( (int)ny*i + j ) ); + +} + + +//========================================================================= +// Get value at given node +double cOgrd::getVal( int i, int j ) +{ + + return( val[idx(i,j)] ); + +} + + +//========================================================================= +// Get value at given node +double cOgrd::getVal( int idx ) +{ + + return( val[idx] ); + +} + + +//========================================================================= +// Set value at given node +void cOgrd::setVal( double value, int i, int j ) +{ + + val[idx(i,j)] = value; + +} + + +//========================================================================= +// Set value at given node +void cOgrd::setVal( double value, int idx ) +{ + + val[idx] = value; + +} + + +//========================================================================= +// Get maximal value on a grid +double cOgrd::getMaxVal() +{ + int l; + double vmax; + + + for( vmax=-RealMax, l=0; l vmax ) + vmax = val[l]; + + return vmax; +} + + +//========================================================================= +// Get maximal value and its position on a grid +double cOgrd::getMaxVal( int& i, int& j ) +{ + int l,lmax; + double vmax; + + + for( lmax=0,vmax=-RealMax, l=0; l vmax ) { + vmax = val[l]; + lmax = l; + } + } + + getIJ( lmax, i, j ); + + return vmax; +} + + +//========================================================================= +// Get minimal value on a grid +double cOgrd::getMinVal() +{ + int l; + double vmin; + + + for( vmin=RealMax, l=0; l vmax ) + vmax = fabs(val[l]); + + return vmax; +} + + +//========================================================================= +// Get maximal absolute value and its position on a grid +double cOgrd::getMaxAbsVal( int& i, int& j ) +{ + int l,lmax; + double vmax; + + + for( lmax=0,vmax=-RealMax, l=0; l vmax ) { + vmax = fabs(val[l]); + lmax = l; + } + } + + getIJ( lmax, i, j ); + + return vmax; +} + + +//========================================================================= +// Get maximal absolute value along grid boundaries +double cOgrd::getMaxAbsValBnd() +{ + int i,j; + double vmax=-RealMax; + + for( i=0; i vmax ) vmax = fabs(val[idx(i,0)]); + if( fabs(val[idx(i,ny-1)]) > vmax ) vmax = fabs(val[idx(i,ny-1)]); + } + + for( j=0; j vmax ) vmax = fabs(val[idx(0,j)]); + if( fabs(val[idx(nx-1,j)]) > vmax ) vmax = fabs(val[idx(nx-1,j)]); + } + + return vmax; +} + + +//========================================================================= +// Get value with interpolation (bi-linear) +double cOgrd::getVal( double x, double y ) +{ + int i0,j0; + double fx,fy,val_l,val_r,result; + + + i0 = (int)((x-xmin)/dx); + if( i0<0 || (i0+1)>=nx ) return(noval); + j0 = (int)((y-ymin)/dy); + if( j0<0 || (j0+1)>=ny ) return(noval); + + fx = (x - (xmin+dx*i0))/dx; + fy = (y - (ymin+dy*j0))/dy; + + val_l = (1-fy)*getVal(i0,j0) + fy*getVal(i0,j0+1); + val_r = (1-fy)*getVal(i0+1,j0) + fy*getVal(i0+1,j0+1); + + result = (1-fx)*val_l + fx*val_r; + + return( result ); +} + + +//========================================================================= +// Get longitude from IJ-indeces +double cOgrd::getX( int i, int j ) +{ + return( xmin + i*dx ); +} + + +//========================================================================= +// Get longitude from the offset +double cOgrd::getX( int idx ) +{ + int i,j; + + getIJ( idx, i, j ); + return( xmin + i*dx ); +} + + +//========================================================================= +// Get lattitude from IJ-indeces +double cOgrd::getY( int i, int j ) +{ + return( ymin + j*dy ); +} + + +//========================================================================= +// Get lattitude from the offset +double cOgrd::getY( int idx ) +{ + int i,j; + + getIJ( idx, i, j ); + return( ymin + j*dy ); +} + + +//========================================================================= +// Check if the shapes are equal +int cOgrd::isSameShapeTo( cOgrd& grd ) +{ + if( xmin != grd.xmin ) return 0; + if( xmax != grd.xmax ) return 0; + if( nx != grd.nx ) return 0; + if( ymin != grd.ymin ) return 0; + if( ymax != grd.ymax ) return 0; + if( ny != grd.ny ) return 0; + + return 1; +} + + +//========================================================================= +// INtersection region with another grid +int cOgrd::getIntersectionRegion( const cOgrd &grd, int& imin, int& imax, int& jmin, int& jmax ) +{ + + if( xmin < grd.xmin ) { + if( xmax < grd.xmin ) return -1; + imin = (int)((grd.xmin-xmin)/dx); + } + else if( xmin <= grd.xmax ) { + imin = 0; + } + else + return -1; + + if( xmax < grd.xmin ) + return -1; + else if( xmax <= grd.xmax ) { + imax = nx-1; + } + else { + imax = (int)((grd.xmax-xmin)/dx); + } + + if( ymin < grd.ymin ) { + if( ymax < grd.ymin ) return -1; + jmin = (int)((grd.ymin-ymin)/dy); + } + else if( ymin <= grd.ymax ) { + jmin = 0; + } + else + return -1; + + if( ymax < grd.ymin ) + return -1; + else if( ymax <= grd.ymax ) { + jmax = ny-1; + } + else { + jmax = (int)((grd.ymax-ymin)/dy); + } + + return 0; +} + + +//========================================================================= +// Interpolate grid values from another O-grid (bi-linear) +int cOgrd::interpolateFrom( cOgrd &grd, int resetValues ) +{ + int ierr,imin,imax,jmin,jmax; + double value; + + if( resetValues ) resetVal(); + + ierr = getIntersectionRegion( grd, imin, imax, jmin, jmax ); + if(ierr) return 0; + + for( int i=imin; i<=imax; i++ ) { + for( int j=jmin; j<=jmax; j++ ) { + value = grd.getVal( getX(i,j), getY(i,j) ); + if( value != grd.noval ) val[idx(i,j)] = value; + } + } + + return( 0 ); +} + + +//========================================================================= +// Get nearest node +int cOgrd::getNearestIdx( double x0, double y0 ) +{ + int i0,j0; + int l,lmin; + double dist2,dist2min; + + + if( x0xmax || y0ymax ) + return -1; + + i0 = (int)((x0-xmin)/dx); + j0 = (int)((y0-ymin)/dy); + + l = idx(i0,j0); + dist2min = (x0-getX(l))*(x0-getX(l)) + (y0-getY(l))*(y0-getY(l)); + lmin = l; + + l = idx(i0+1,j0); + dist2 = (x0-getX(l))*(x0-getX(l)) + (y0-getY(l))*(y0-getY(l)); + if( dist2 < dist2min ) { dist2min = dist2; lmin = l; } + + l = idx(i0,j0+1); + dist2 = (x0-getX(l))*(x0-getX(l)) + (y0-getY(l))*(y0-getY(l)); + if( dist2 < dist2min ) { dist2min = dist2; lmin = l; } + + l = idx(i0+1,j0+1); + dist2 = (x0-getX(l))*(x0-getX(l)) + (y0-getY(l))*(y0-getY(l)); + if( dist2 < dist2min ) { dist2min = dist2; lmin = l; } + + return lmin; +} + + +//========================================================================= +// Get nearest conditioned node +int cOgrd::getNearestIdx( double x0, double y0, double rangemin, double rangemax ) +{ + int i,j,i0,j0,rad; + int l,lmin; + double dist2,dist2min; + + + lmin = getNearestIdx( x0, y0 ); + if( lmin == -1 ) + return lmin; + + if( val[lmin] >= rangemin && val[lmin] <= rangemax ) + return lmin; + + getIJ( lmin, i0,j0 ); + + for( lmin=-1,rad=1; radnx-1 || j<0 || j>ny-1 ) continue; + l = idx(i,j); + if( val[l] < rangemin || val[l] > rangemax ) continue; + dist2 = (x0-getX(l))*(x0-getX(l)) + (y0-getY(l))*(y0-getY(l)); + if( dist2 < dist2min ) { dist2min = dist2; lmin = l; } + } + + if( lmin > 0 ) break; + } + + return lmin; +} + + +//========================================================================= +// Smooth data on grid +void cOgrd::smooth( int radius ) +{ + int i,j,k,ik,jk; + int l; + double sumwt; + double wt[10] = { 1.0, 0.5, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25, 0.25 }; + + if( radius == 0 ) return; + + cOgrd tmp( *this ); + + for( i=0; i=nx ) continue; + for( jk=j-k; jk<=j+k; jk++ ) { + if( jk<0 || jk>=ny ) continue; + if( iabs(ik-i)==k || iabs(jk-j)==k ) { + tmp(l) += wt[k] * ((*this)(ik,jk)); + sumwt += wt[k]; + } + } + } + } + tmp(l) /= sumwt; + } + } + + *this = tmp; +} + + + +//========================================================================= +// write to GRD-file +int cOgrd::writeGRD( const char *fname ) +{ + FILE *fp; + int i,j,cnt; + + + fp = fopen( fname, "wt" ); + + fprintf( fp, "DSAA\n" ); + fprintf( fp, "%d %d\n", nx, ny ); + fprintf( fp, "%f %f\n", xmin, xmax ); + fprintf( fp, "%f %f\n", ymin, ymax ); + fprintf( fp, "%f %f\n", getMinVal(), getMaxVal() ); + + for( cnt=0, j=0; j +#include +#include +#include +#include "cOkadaEarthquake.h" + + + +//========================================================================= +cOkadaEarthquake::cOkadaEarthquake() +{ + finalized = 0; + m0 = 0; + nfault = 1; + fault = new cOkadaFault [1]; +} + + +//========================================================================= +cOkadaEarthquake::~cOkadaEarthquake() +{ + if( fault != NULL ) delete [] fault; +} + + +//========================================================================= +// Read fault(s) from a file +int cOkadaEarthquake::read( char *fname ) +{ + FILE *fp; + char record[256]; + int ierr; + + nfault = utlGetNumberOfRecords( fname ); + if( fault != NULL ) delete [] fault; + fault = new cOkadaFault [nfault]; + + if( (fp = fopen(fname,"rt")) == NULL ) return Err.post(Err.msgOpenFile(fname)); + + for( int n=0, line=0; n lonmax ) lonmax = xmax; + if( ymin < latmin ) latmin = ymin; if( ymax > latmax ) latmax = ymax; + } + + if( round ) { + lonmin = floor( lonmin ); lonmax = ceil( lonmax ); + latmin = floor( latmin ); latmax = ceil( latmax ); + } + + return 0; +} + + +//========================================================================= +// Calculate surface displacements by summing effect from all ruptures +int cOkadaEarthquake::calculate( double lon, double lat, double& uz, double& ulon, double &ulat ) +{ + int n,ierr; + double uzf,ulonf,ulatf; + + + if( !finalized ) return Err.post("cOkadaEarthquake::calculate: eq not finalized"); + + for( ulon=ulat=uz=0, n=0; n +#include +#include "cOkadaFault.h" + + +class cOkadaEarthquake +{ +protected: + + int finalized; + int getDeformArea( int round, double& lonmin, double& lonmax, double& latmin, double& latmax ); + int setGrid( cOgrd& u ); + +public: + + int nfault; // total nuber of Okada faults + double m0; // total earthquake moment + cOkadaFault *fault; // array of composing faults + + cOkadaEarthquake(); + ~cOkadaEarthquake(); + int read( char *fname ); + int finalizeInput(); + double getM0(); + double getMw(); + int calculate( double lon, double lat, double& uz ); + int calculate( double lon, double lat, double& uz, double& ulon, double &ulat ); + int calculate( cObsArray& arr ); + int calculate( cOgrd& uZ ); + int calculate( cOgrd& uZ, cOgrd& uLon, cOgrd& uLat ); + char *sprint(); + +}; + +#endif // OKADAEARTHQUAKE_H diff --git a/cpu/branches/web/cOkadaFault.cpp b/cpu/branches/web/cOkadaFault.cpp new file mode 100644 index 0000000000000000000000000000000000000000..192e515d7a84afa6d82e6ca9b3b03c526ea6fc73 --- /dev/null +++ b/cpu/branches/web/cOkadaFault.cpp @@ -0,0 +1,440 @@ +#include +#include +#include +#include "cOkadaFault.h" + +static const double Rearth=6384.e+3; + +//========================================================================= +// Constructor +cOkadaFault::cOkadaFault() +{ + // obligatory user parameters + lat = lon = depth = strike = dip = rake = RealMax; + // optional user parameters + mw = slip = length = width = 0.; + adjust_depth = 0; + // default values + refpos = FLT_POS_C; + mu = 3.5e+10; + // derivative parameters + zbot = sind = cosd = sins = coss = tand = coslat = wp = dslip = sslip = 0; + // flags + checked = 0; +} + + + +//========================================================================= +// Destructor +cOkadaFault::~cOkadaFault() +{ +} + + + +//========================================================================= +// read fault parameters from input string. read only. consistency check in separate method +int cOkadaFault::read( char *faultparam ) +{ + char *cp,buf[64]; + int ierr; + + + // origin location + cp = strstr( faultparam, "-location" ); + if( cp ) { + if( sscanf( cp, "%*s %lf %lf %lf", &lon, &lat, &depth ) != 3 ) return Err.post( "cOkadaFault::read: position" ); + depth *= 1000; + } + + // adjust depth if fault breaks through surface + cp = strstr( faultparam, "-adjust_depth" ); + if( cp ) adjust_depth = 1; + + // reference point + cp = strstr( faultparam, "-refpos" ); + if( cp ) { + if( sscanf( cp, "%*s %s", buf ) != 1 ) return Err.post( "cOkadaFault::read: refpos" ); + if( !strcmp( buf, "C" ) || !strcmp( buf, "c" ) ) + refpos = FLT_POS_C; + else if( !strcmp( buf, "MT" ) || !strcmp( buf, "mt" ) ) + refpos = FLT_POS_MT; + else if( !strcmp( buf, "BT" ) || !strcmp( buf, "bt" ) ) + refpos = FLT_POS_BT; + else if( !strcmp( buf, "BB" ) || !strcmp( buf, "bb" ) ) + refpos = FLT_POS_BB; + else if( !strcmp( buf, "MB" ) || !strcmp( buf, "mb" ) ) + refpos = FLT_POS_MB; + else + return Err.post( "cOkadaFault::read: refpos" ); + } + + // magnitude + cp = strstr( faultparam, "-mw" ); + if( cp ) if( sscanf( cp, "%*s %lf", &mw ) != 1 ) return Err.post( "cOkadaFault::read: mw" ); + + // slip + cp = strstr( faultparam, "-slip" ); + if( cp ) { + if( sscanf( cp, "%*s %lf", &slip ) != 1 ) return Err.post( "cOkadaFault::read: slip" ); + if( slip < 1.e-6 ) slip = 1.e-6; + } + + // strike + cp = strstr( faultparam, "-strike" ); + if( cp ) if( sscanf( cp, "%*s %lf", &strike ) != 1 ) return Err.post( "cOkadaFault::read: strike" ); + + // dip + cp = strstr( faultparam, "-dip" ); + if( cp ) if( sscanf( cp, "%*s %lf", &dip ) != 1 ) return Err.post( "cOkadaFault::read: dip" ); + + // rake + cp = strstr( faultparam, "-rake" ); + if( cp ) if( sscanf( cp, "%*s %lf", &rake ) != 1 ) return Err.post( "cOkadaFault::read: rake" ); + + // length and width + cp = strstr( faultparam, "-size" ); + if( cp ) { + if( sscanf( cp, "%*s %lf %lf", &length, &width ) != 2 ) return Err.post( "cOkadaFault::read: size" ); + length *= 1000; + width *= 1000; + } + + // rigidity + cp = strstr( faultparam, "-rigidity" ); + if( cp ) if( sscanf( cp, "%*s %lf", &mu ) != 1 ) return Err.post( "cOkadaFault::read: rigidity" ); + + + // check fault data for integrity + //ierr = check(); if(ierr) return ierr; + + return 0; +} + + + +//================================================================================ +int cOkadaFault::check() +// Check readed fault parameters for consistency and calculate secondary parameters +{ + + // check necessary parameters + if( lon == RealMax ) { Err.post( "cOkadaFault::check: lon" ); return FLT_ERR_DATA; } + if( lat == RealMax ) { Err.post( "cOkadaFault::check: lat" ); return FLT_ERR_DATA; } + if( depth == RealMax ) { Err.post( "cOkadaFault::check: depth" ); return FLT_ERR_DATA; } + if( strike == RealMax ) { Err.post( "cOkadaFault::check: strike" ); return FLT_ERR_STRIKE; } + if( rake == RealMax ) { Err.post( "cOkadaFault::check: rake" ); return FLT_ERR_DATA; } + if( dip == RealMax ) { Err.post( "cOkadaFault::check: dip" ); return FLT_ERR_DATA; } + + // cache trigonometric expressions + sind = sindeg( dip ); + cosd = cosdeg( dip ); + sins = sindeg( 90-strike ); + coss = cosdeg( 90-strike ); + tand = tandeg( dip ); + coslat = cosdeg( lat ); + + // branching through given parameters (the full solution table see end of file) + if( !mw && !slip ) { + Err.post( "cOkadaFault::check: not enough data" ); return FLT_ERR_DATA; + } + else if( !mw && slip ) { + if( !length && !width ) { + Err.post( "cOkadaFault::check: not enough data" ); return FLT_ERR_DATA; + } + else if( length && !width ) { + width = length/2; + } + else if( !length && width ) { + length = 2*width; + } + else if( length && width ) { + } + else { + Err.post( "cOkadaFault::check: internal error" ); return FLT_ERR_INTERNAL; + } + + mw = 2./3. * (log10(mu*length*width*slip) - 9.1); + } + else if( mw && !slip ) { + if( !length && !width ) { + // scaling relations used by JMA + length = pow( 10., -1.80 + 0.5*mw ) * 1000; + width = length/2; + } + else if( length && !width ) { + width = length/2; + } + else if( !length && width ) { + length = 2*width; + } + else if( length && width ) { + } + else { + Err.post( "cOkadaFault::check: internal error" ); return FLT_ERR_INTERNAL; + } + slip = mw2m0() / mu / length / width; + } + else if( mw && slip ) { + if( !length && !width ) { + double area = mw2m0() / mu / slip; + length = sqrt( 2 * area ); + width = length/2; + } + else if( length && !width ) { + width = mw2m0() / mu / slip / length; + } + else if( !length && width ) { + length = mw2m0() / mu / slip / width; + } + else if( length && width ) { + if( fabs( 1 - mu*slip*length*width/mw2m0() ) > 0.01 ) { + Err.post( "cOkadaFault::check: data inconsistency" ); return FLT_ERR_DATA; + } + } + else { + Err.post( "cOkadaFault::check: internal error" ); return FLT_ERR_INTERNAL; + } + } + + // calculate bottom of the fault + switch( refpos ) { + double ztop; + case FLT_POS_C: + ztop = depth - width/2*sind; + if( ztop < 0 ) { + if( adjust_depth ) { + ztop = 0.; + depth = ztop + width/2*sind; + } + else { + Err.post( "cOkadaFault::check: negative ztop" ); + return FLT_ERR_ZTOP; + } + } + zbot = depth + width/2*sind; + break; + case FLT_POS_MT: + case FLT_POS_BT: + zbot = depth + width*sind; + break; + case FLT_POS_BB: + case FLT_POS_MB: + ztop = depth - width*sind; + if( ztop < 0 ) { + if( adjust_depth ) { + ztop = 0.; + depth = ztop + width*sind; + } + else { + Err.post( "cOkadaFault::check: negative ztop" ); + return FLT_ERR_ZTOP; + } + } + zbot = depth; + break; + } + + // slip components + dslip = slip*sindeg(rake); + sslip = slip*cosdeg(rake); + + // surface projection of width + wp = width*cosd; + + checked = 1; + + return 0; +} + + +//========================================================================= +double cOkadaFault::mw2m0() +{ + return pow(10., 3.*mw/2 + 9.1); +} + + +//========================================================================= +double cOkadaFault::getM0() +{ + if( !checked ) { Err.post( "cOkadaFault::getM0: fault not checked" ); return -RealMax; } + + return mw2m0(); +} + +//========================================================================= +double cOkadaFault::getMw() +{ + if( !checked ) { Err.post( "cOkadaFault::getMw: fault not checked" ); return -RealMax; } + + return mw; +} + + +//========================================================================= +double cOkadaFault::getZtop() +{ + return( zbot - width*sind ); +} + + +//========================================================================= +int cOkadaFault::global2local( double glon, double glat, double& lx, double& ly ) +// from global geographical coordinates into local Okada's coordinates +{ + double x,y; + + // center coordinate system to refpos (lon/lat), convert to meters + y = Rearth * g2r(glat - lat); + x = Rearth * coslat * g2r(glon - lon); + + // rotate according to strike + lx = x*coss + y*sins; + ly = -x*sins + y*coss; + + // finally shift to Okada's origin point (BB) + switch( refpos ) { + case FLT_POS_C: lx = lx + length/2; ly = ly + wp/2; break; + case FLT_POS_MT: lx = lx + length/2; ly = ly + wp ; break; + case FLT_POS_BT: lx = lx ; ly = ly + wp ; break; + case FLT_POS_BB: lx = lx ; ly = ly ; break; + case FLT_POS_MB: lx = lx + length/2; ly = ly ; break; + } + + return 0; +} + + + +//========================================================================= +int cOkadaFault::local2global( double lx, double ly, double& glon, double& glat ) +// from local Okada's coordinates to global geographical +{ + double x,y,gx,gy; + + + // define local coordinates relative to the fault refpos + switch( refpos ) { + case FLT_POS_C: x = lx - length/2; y = ly - wp/2; break; + case FLT_POS_MT: x = lx - length/2; y = ly - wp ; break; + case FLT_POS_BT: x = lx ; y = ly - wp ; break; + case FLT_POS_BB: x = lx ; y = ly ; break; + case FLT_POS_MB: x = lx - length/2; y = ly ; break; + } + + // back-rotate to geographical axes (negative strike!). Values are still in meters! + gx = x*coss + y*(-sins); + gy = -x*(-sins) + y*coss; + + // convert meters to degrees. This is offset in degrees relative to refpos. Add refpos coordinates for absolute values + glat = r2g(gy/Rearth) + lat; + glon = r2g(gx/Rearth/cosdeg(lat)) + lon; + + return 0; +} + + + +//========================================================================= +// get ruptured area to calculate surface displacement +// parameter rand accounts for lateral enlargement at rands of rupture surface projection +int cOkadaFault::getDeformArea( double& lonmin, double& lonmax, double& latmin, double& latmax ) +{ + #define FLT_NL 2 // significant deformation area along length (in length units) + #define FLT_NW 5 // significant deformation area along width (in width units) + int ierr; + double dxC,dyC,l2,w2,glon,glat; + + + if( !checked ) { Err.post( "cOkadaFault::getDeformArea: attempt with non-checked fault" ); return FLT_ERR_INTERNAL; } + + // follow rectangle around the fault + dxC = FLT_NL * length; + l2 = length/2; + dyC = FLT_NW * wp; + w2 = wp/2; + + local2global( dxC+l2, dyC+w2, glon, glat ); + lonmin = lonmax = glon; + latmin = latmax = glat; + + local2global( -dxC+l2, dyC+w2, glon, glat ); + if( glonlonmax ) lonmax = glon; + if( glatlatmax ) latmax = glat; + + local2global( -dxC+l2, -dyC+w2, glon, glat ); + if( glonlonmax ) lonmax = glon; + if( glatlatmax ) latmax = glat; + + local2global( dxC+l2, -dyC+w2, glon, glat ); + if( glonlonmax ) lonmax = glon; + if( glatlatmax ) latmax = glat; + + return 0; +} + + + +//========================================================================= +int cOkadaFault::calculate( double lon0, double lat0, double& uz, double& ulon, double &ulat ) +{ + int ierr; + double x,y,ux,uy; + + + if( !checked ) { Err.post( "cOkadaFault::calculate: attempt with non-checked fault" ); return FLT_ERR_INTERNAL; } + + global2local( lon0, lat0, x, y ); + + // Okada model + okada( length, width, zbot, sind, cosd, sslip, dslip, x, y, 1, &ux,&uy,&uz ); + + // back-rotate horizontal deformations to global coordinates (negative strike!) + ulon = ux*coss + uy*(-sins); + ulat = -ux*(-sins) + uy*coss; + + return 0; +} + + + +//========================================================================= +int cOkadaFault::calculate( double lon0, double lat0, double &uz ) +{ + int ierr; + double x,y,ux,uy; + + + if( !checked ) { Err.post( "cOkadaFault::calculate: attempt with non-checked fault" ); return FLT_ERR_INTERNAL; } + + global2local( lon0, lat0, x, y ); + + // Okada model + okada( length, width, zbot, sind, cosd, sslip, dslip, x, y, 0, &ux,&uy,&uz ); + + return 0; +} + + +//====================================================================================== +// Input parameter selection: Solution table +// if given: +// mw slip L W action +// - - - - err_data +// - - - + err_data +// - - + - err_data +// - - + + err_data +// - + - - err_data +// - + - + L=2W, Mw +// - + + - W=L/2, Mw +// - + + + Mw +// + - - - W&C96(L,W), S +// + - - + L=2W, S +// + - + - W=L/2, S +// + - + + S +// + + - - area(Mw), L=2W +// + + - + L +// + + + - W +// + + + + check Mw=muSLW diff --git a/cpu/branches/web/cOkadaFault.h b/cpu/branches/web/cOkadaFault.h new file mode 100644 index 0000000000000000000000000000000000000000..7223dc00b3d5347ef26c4667486ed6f2f78d8d03 --- /dev/null +++ b/cpu/branches/web/cOkadaFault.h @@ -0,0 +1,64 @@ +#ifndef OKADAFAULT_H +#define OKADAFAULT_H + +// Position of the fault reference point (to which lon/lat/depth are related) +#define FLT_POS_C 0 // center of the fault, Okada's coordinates: (L/2;W/2) +#define FLT_POS_MT 1 // middle of the top edge: (L/2;W) +#define FLT_POS_BT 2 // beginning of the top edge: (0;W) +#define FLT_POS_BB 3 // beginning of the bottom edge: (0;0) +#define FLT_POS_MB 4 // middle of the bottom edge: (L/2;0) + +#define FLT_ERR_DATA 1 +#define FLT_ERR_ZTOP 3 +#define FLT_ERR_INTERNAL 4 +#define FLT_ERR_STRIKE 6 + +class cOkadaFault +{ + +protected: + + int checked; + int adjust_depth; + double sind; + double cosd; + double sins; + double coss; + double tand; + double coslat; + double zbot; + double wp; + double dslip; + double sslip; + + double mw2m0(); + +public: + + int refpos; // 0- one of POS_XX (see above definitions) + double mw; + double slip; + double lon,lat,depth; + double strike; + double dip; + double rake; + double length,width; + double mu; + + cOkadaFault(); + ~cOkadaFault(); + int read( char *faultparam ); + int check(); + double getMw(); + double getM0(); + double getZtop(); + int global2local( double glon, double glat, double& lx, double& ly ); + int local2global( double lx, double ly, double& glon, double& glat ); + int getDeformArea( double& lonmin, double& lonmax, double& latmin, double& latmax ); + int calculate( double lon, double lat, double& uz, double& ulon, double &ulat ); + int calculate( double lon, double lat, double& uz ); +}; + +int okada( double L,double W,double D,double sinD,double cosD,double U1,double U2,double x,double y,int flag_xy, double *Ux,double *Uy,double *Uz ); + +#endif // OKADAFAULT_H diff --git a/cpu/branches/web/cSphere.cpp b/cpu/branches/web/cSphere.cpp new file mode 100644 index 0000000000000000000000000000000000000000..94e768dd7f0a585d9c1b521d9ca34c2d906e9e65 --- /dev/null +++ b/cpu/branches/web/cSphere.cpp @@ -0,0 +1,328 @@ +#include +#include +#include +#include +#include + + +//========================================================================= +// Constructor +cObsArray::cObsArray() +{ + nPos = 0; + nObs = 0; + id = NULL; + lon = NULL; + lat = NULL; + obs = NULL; +} + + +//========================================================================= +// Destructor +cObsArray::~cObsArray() +{ + if( lon ) delete [] lon; + if( lat ) delete [] lat; + if( obs ) { + for( int n=0; n 0 ) { + obs = new double* [nPos]; + for( n=0; n 1 ) rad = 1; + c = 2 * asin(rad); + dist = REARTH*c; + + return dist; +} + +double GeoStrikeOnSphere( double lon1, double lat1, double lon2, double lat2 ) +{ + const double G2R = 3.14159265358979/180.; // multiplyer to convert from degrees into radians + const double R2G = 180./3.14159265358979; // multiplyer to convert from radians into degrees + const double REARTH = 6378.137; // Earth radius in km along equator + double strike, distRad; + + + if( (lat1 == lat2) && (lon1 == lon2) ) { + strike = 0.; + } + else if( lon1 == lon2 ) { + if( lat1 > lat2 ) + strike = 180.; + else + strike = 0.; + } + else { + distRad = GeoDistOnSphere( lon1,lat1,lon2,lat2 ) / REARTH; + strike = R2G * asin( cos(G2R*lat2)*sin(G2R*(lon2-lon1)) / sin(distRad) ); + + if( (lat2 > lat1) && (lon2 > lon1) ) { + } + else if( (lat2 < lat1) && (lon2 < lon1) ) { + strike = 180.0 - strike; + } + else if( (lat2 < lat1) && (lon2 > lon1) ) { + strike = 180.0 - strike; + } + else if( (lat2 > lat1) && (lon2 < lon1) ) { + strike += 360.0; + } + + } + +// if( strike > 180.0 ) strike -= 180.0; + + return strike; +} diff --git a/cpu/branches/web/cSphere.h b/cpu/branches/web/cSphere.h new file mode 100644 index 0000000000000000000000000000000000000000..d528ef9041f6e9d055b43bdd1d61328433c6e20b --- /dev/null +++ b/cpu/branches/web/cSphere.h @@ -0,0 +1,36 @@ +#ifndef ONSPHERE_H +#define ONSPHERE_H + +#define Re 6384.e+3 // Earth radius + +class cObsArray +{ + +public: + + int nPos; + int nObs; + char **id; + double *lon; + double *lat; + double **obs; + + cObsArray(); + ~cObsArray(); + int read( char *fname ); + int write( char *fname ); + int resetObs(); + int resetObs( int newnobs ); + int findById( char *id0 ); + long writeBin( FILE *fp ); + long readBin( FILE *fp ); + double residual( cObsArray& ref ); + double norm(); + +}; + + +double GeoDistOnSphere( double lon1, double lat1, double lon2, double lat2 ); +double GeoStrikeOnSphere( double lon1, double lat1, double lon2, double lat2 ); + +#endif // ONSPHERE_H diff --git a/cpu/branches/web/easywave.h b/cpu/branches/web/easywave.h new file mode 100644 index 0000000000000000000000000000000000000000..035442617edcbeb077d9a83141ccff49f7449f50 --- /dev/null +++ b/cpu/branches/web/easywave.h @@ -0,0 +1,99 @@ +#ifndef EASYWAVE_H +#define EASYWAVE_H + +#define Re 6384.e+3 // Earth radius +#define Gravity 9.81 // gravity acceleration +#define Omega 7.29e-5 // Earth rotation period [1/sec] + +#define MAX_VARS_PER_NODE 12 + +#define iD 0 +#define iH 1 +#define iHmax 2 +#define iM 3 +#define iN 4 +#define iR1 5 +#define iR2 6 +#define iR3 7 +#define iR4 8 +#define iR5 9 +#define iTime 10 +#define iTopo 11 + +// Global data +struct EWPARAMS { + char *modelName; + char *modelSubset; + char *fileBathymetry; + char *fileSource; + char *filePOIs; + int dt; + int time; + int timeMax; + int poiDt; + int poiReport; + int outDump; + int outProgress; + int outPropagation; + int coriolis; + float dmin; + float poiDistMax; + float poiDepthMin; + float poiDepthMax; + float ssh0ThresholdRel; + float ssh0ThresholdAbs; + float sshClipThreshold; + float sshZeroThreshold; + float sshTransparencyThreshold; + float sshArrivalThreshold; + bool gpu; + bool adjustZtop; + bool verbose; +}; + +extern struct EWPARAMS Par; +extern int NLon,NLat; +extern double LonMin,LonMax,LatMin,LatMax; +extern double DLon,DLat; // steps in grad +extern double Dx,Dy; // steps in m, dx must be multiplied by cos(y) before use +extern float *R6; +extern float *C1; +extern float *C2; +extern float *C3; +extern float *C4; +extern int Imin; +extern int Imax; +extern int Jmin; +extern int Jmax; + +#define idx(j,i) ((i-1)*NLat+j-1) +#define getLon(i) (LonMin+(i-1)*DLon) +#define getLat(j) (LatMin+(j-1)*DLat) + +int ewLoadBathymetry(); +int ewParam( int argc, char **argv ); +void ewLogParams(void); +int ewReset(); +int ewSource(); +int ewStep(); +int ewStepCor(); + +int ewStart2DOutput(); +int ewOut2D(); +int ewDump2D(); +int ewLoadPOIs(); +int ewSavePOIs(); +int ewDumpPOIs(); +int ewDumpPOIsCompact( int istage ); + +extern int NPOIs; +extern long *idxPOI; + +/* verbose printf: only executed if -verbose was set */ +#define printf_v( Args, ... ) if( Par.verbose ) printf( Args, ##__VA_ARGS__); + +#include "ewNode.h" + +extern CNode *gNode; + +#endif /* EASYWAVE_H */ diff --git a/cpu/branches/web/ewCudaKernels.cu b/cpu/branches/web/ewCudaKernels.cu new file mode 100644 index 0000000000000000000000000000000000000000..cce42701f62935fa054319795c3eafc0646700a2 --- /dev/null +++ b/cpu/branches/web/ewCudaKernels.cu @@ -0,0 +1,188 @@ +#include "ewGpuNode.cuh" +#include "ewCudaKernels.cuh" + +__global__ void runWaveUpdateKernel( KernelData data ) { + + Params& dp = data.params; + + int i = blockIdx.y * blockDim.y + threadIdx.y + dp.iMin; + int j = blockIdx.x * blockDim.x + threadIdx.x + dp.jMin; + int ij = data.idx(i,j); + float absH; + + /* maybe unnecessary if controlled from outside */ + if( i <= dp.iMax && j <= dp.jMax && data.d[ij] != 0 ) { + + float hh = data.h[ij] - data.cR1[ij] * ( data.fM[ij] - data.fM[data.le(ij)] + data.fN[ij] * data.cR6[j] - data.fN[data.dn(ij)]*data.cR6[j-1] ); + + absH = fabs(hh); + + if( absH < dp.sshZeroThreshold ) { + hh = 0.f; + } else if( hh > data.hMax[ij] ) { + data.hMax[ij] = hh; + //hMax[ij] = fmaxf(hMax[ij],h[ij]); + } + + if( dp.sshArrivalThreshold && data.tArr[ij] < 0 && absH > dp.sshArrivalThreshold ) + data.tArr[ij] = dp.mTime; + + data.h[ij] = hh; + + } + +} + +__global__ void runFluxUpdateKernel( KernelData data ) { + + Params& dp = data.params; + + int i = blockIdx.y * blockDim.y + threadIdx.y + dp.iMin; + int j = blockIdx.x * blockDim.x + threadIdx.x + dp.jMin; + int ij = data.idx(i,j); + + if( i <= dp.iMax && j <= dp.jMax && data.d[ij] != 0 ) { + + float hh = data.h[ij]; + + if( data.d[data.ri(ij)] != 0 ) + data.fM[ij] = data.fM[ij] - data.cR2[ij]*(data.h[data.ri(ij)] - hh); + + if( data.d[data.up(ij)] != 0 ) + data.fN[ij] = data.fN[ij] - data.cR4[ij]*(data.h[data.up(ij)] - hh); + + } + +} + +__global__ void runWaveBoundaryKernel( KernelData data ) { + + KernelData& dt = data; + Params& dp = data.params; + + int id = blockIdx.x * blockDim.x + threadIdx.x + 2; + int ij; + + if( dt.devID == 0 ) { + + if( id <= dp.nJ-1 ) { + ij = dt.idx(1,id); + dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + 0.25f*powf((dt.fN[ij] + dt.fN[dt.dn(ij)]),2.0f) )*dt.cB2[id-1]; + if( dt.fM[ij] > 0 ) dt.h[ij] = -dt.h[ij]; + } + + if( id == 2 ) { + ij = dt.idx(1,1); + dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + powf(dt.fN[ij],2.0f) )*dt.cB1[0]; + if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij]; + + ij = dt.idx(1,dp.nJ); + dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + powf(dt.fN[dt.dn(ij)],2.0f) )*dt.cB3[0]; + if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij]; + } + + } else if( dt.devID == dt.devNum - 1 ) { + + if( id <= dp.nJ-1 ) { + ij = dt.idx(dp.nI,id); + dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + 0.25f*powf((dt.fN[ij] + dt.fN[dt.dn(ij)]),2.0f) )*dt.cB4[id-1]; + if( dt.fM[dt.le(ij)] < 0 ) dt.h[ij] = -dt.h[ij]; + } + + if( id == 2 ) { + ij = dt.idx(dp.nI,1); + dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + powf(dt.fN[ij],2.0f) )*dt.cB1[dp.nI-1]; + if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij]; + + ij = dt.idx(dp.nI,dp.nJ); + dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + powf(dt.fN[dt.dn(ij)],2.0f) )*dt.cB3[dp.nI-1]; + if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij]; + } + + } + + if( id <= dp.nI - 1 ) { + ij = dt.idx(id,1); + dt.h[ij] = sqrtf( powf(dt.fN[ij],2.0f) + 0.25f*powf((dt.fM[ij] + dt.fM[dt.le(ij)]),2.0f) )*dt.cB1[id-1]; + if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij]; + } + + if( id <= dp.nI - 1 ) { + ij = dt.idx(id,dp.nJ); + dt.h[ij] = sqrtf( powf(dt.fN[dt.dn(ij)],2.0f) + 0.25f*powf((dt.fM[ij] + dt.fM[dt.dn(ij)]),2.0f) )*dt.cB3[id-1]; + if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij]; + } + +} + +__global__ void runFluxBoundaryKernel( KernelData data ) { + + KernelData& dt = data; + Params& dp = data.params; + + int id = blockIdx.x * blockDim.x + threadIdx.x + 1; + int ij; + + if( dt.devID == 0 ) { + + if( id <= dp.nJ ) { + ij = dt.idx(1,id); + dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]); + } + + if( id <= dp.nJ-1 ) { + ij = dt.idx(1,id); + dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]); + } + + } else if( dt.devID == dt.devNum - 1 ) { + + if( id <= dp.nJ-1 ) { + ij = dt.idx(dp.nI,id); + dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]); + } + + } + + if( id <= dp.nI - 1 ) { + ij = dt.idx(id,1); + dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]); + } + + if( id <= dp.nI - 1 ) { + ij = dt.idx(id,dp.nJ); + dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]); + } + + if( id <= dp.nI ) { + ij = dt.idx(id,1); + dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]); + } + +} + +__global__ void runGridExtendKernel( KernelData data ) { + + Params& dp = data.params; + + int id = blockIdx.x * blockDim.x + threadIdx.x; + + if( id >= dp.jMin && id <= dp.jMax ) { + + if( fabsf(data.h[data.idx(dp.iMin+2,id)]) > dp.sshClipThreshold ) + atomicAdd( &(data.extend->x), 1 ); + + if( fabsf(data.h[data.idx(dp.iMax-2,id)]) > dp.sshClipThreshold ) + atomicAdd( &(data.extend->y), 1 ); + } + + if( id >= dp.iMin && id <= dp.iMax ) { + + if( fabsf(data.h[data.idx(id,dp.jMin+2)]) > dp.sshClipThreshold ) + atomicAdd( &(data.extend->z), 1 ); + + if( fabsf(data.h[data.idx(id,dp.jMax-2)]) > dp.sshClipThreshold ) + atomicAdd( &(data.extend->w), 1 ); + } + +} diff --git a/cpu/branches/web/ewCudaKernels.cuh b/cpu/branches/web/ewCudaKernels.cuh new file mode 100644 index 0000000000000000000000000000000000000000..2a459281706b7e86a35c1009f17f583df71f6f24 --- /dev/null +++ b/cpu/branches/web/ewCudaKernels.cuh @@ -0,0 +1,10 @@ +#ifndef EW_KERNELS_H +#define EW_KERNELS_H + +__global__ void runWaveUpdateKernel( KernelData data ); +__global__ void runWaveBoundaryKernel( KernelData data ); +__global__ void runFluxUpdateKernel( KernelData data ); +__global__ void runFluxBoundaryKernel( KernelData data ); +__global__ void runGridExtendKernel( KernelData data ); + +#endif /* EW_KERNELS_H */ diff --git a/cpu/branches/web/ewGpuNode.cu b/cpu/branches/web/ewGpuNode.cu new file mode 100644 index 0000000000000000000000000000000000000000..a369ed49933fd94d8044bb9f04b4785db2d397fa --- /dev/null +++ b/cpu/branches/web/ewGpuNode.cu @@ -0,0 +1,604 @@ +#include "ewGpuNode.cuh" +#include "ewCudaKernels.cuh" + +CGpuNode::CGpuNode() { + + pitch = 0; + copied = true; + + /* TODO: make dynamic */ + num_virtual_gpus = 4; + num_real_gpus = 2; + + vgpus = new VGpu[num_virtual_gpus]; + gpus = new Gpu[num_real_gpus]; + + cudaMallocHost( &extend, num_virtual_gpus * sizeof(int4) ); + + for( int j = 0; j < num_real_gpus; j++ ) { + + cudaSetDevice( j ); + + cudaDeviceSetCacheConfig( cudaFuncCachePreferL1 ); + + gpus[j].id = j; + + for( int i = 0; i < gpus[j].NEVENTS; i++ ) { + cudaEventCreate( &(gpus[j].evtStart[i]) ); + cudaEventCreate( &(gpus[j].evtEnd[i]) ); + gpus[j].dur[i] = 0.0f; + } + + } + + for( int j = 0; j < num_virtual_gpus; j++ ) { + + VGpu& vgpu = vgpus[j]; + + vgpu.data.devID = j; + vgpu.data.devNum = num_virtual_gpus; + + vgpu.dev = &(gpus[j % num_real_gpus]); + + vgpu.dev->maxId = j / num_real_gpus; + vgpu.relId = j / num_real_gpus; + + cudaSetDevice( vgpu.dev->id ); + + for( int i = 0; i < vgpu.NSTREAMS; i++ ) { + cudaStreamCreate( &(vgpu.stream[i]) ); + } + + cudaEventCreate( &vgpu.evtSync ); + } + + for( int j = 0; j < num_real_gpus - 1; j++ ) { + + int peerAccess = 0; + cudaDeviceCanAccessPeer( &peerAccess, j, j + 1 ); + + printf_v("GPU #%u can access GPU #%u: %u\n", j, j + 1, peerAccess); + if( peerAccess ) { + cudaSetDevice( j ); + cudaDeviceEnablePeerAccess( j + 1, 0 ); + } + + cudaDeviceCanAccessPeer( &peerAccess, j + 1, j ); + + printf_v("GPU #%u can access GPU #%u: %u\n", j + 1, j, peerAccess); + if( peerAccess ) { + cudaSetDevice( j + 1 ); + cudaDeviceEnablePeerAccess( j, 0 ); + } + + } + + memset( extend, 0, num_virtual_gpus * sizeof(int4) ); +} + +CGpuNode::~CGpuNode() { + + for( int j = 0; j < num_virtual_gpus; j++ ) { + + VGpu& vgpu = vgpus[j]; + + cudaSetDevice( vgpu.dev->id ); + + for( int i = 0; i < vgpu.NSTREAMS; i++ ) { + cudaStreamDestroy( vgpu.stream[i] ); + } + + cudaEventDestroy( vgpu.evtSync ); + + } + + for( int j = 0; j < num_real_gpus; j++ ) { + + cudaSetDevice( j ); + + for( int i = 0; i < gpus[j].NEVENTS; i++ ) { + cudaEventDestroy( gpus[j].evtStart[i] ); + cudaEventDestroy( gpus[j].evtEnd[i] ); + } + + cudaDeviceReset(); + } + +} + +int CGpuNode::mallocMem() { + + CArrayNode::mallocMem(); + + Params& dp = params; + + /* fill in some fields here */ + dp.nI = NLon; + dp.nJ = NLat; + dp.sshArrivalThreshold = Par.sshArrivalThreshold; + dp.sshClipThreshold = Par.sshClipThreshold; + dp.sshZeroThreshold = Par.sshZeroThreshold; + dp.lpad = 0; + + size_t nJ_aligned = dp.nJ + dp.lpad; + + init_vgpus(); + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + int ghost = vgpu.gb + vgpu.gt; + + cudaSetDevice( vgpu.dev->id ); + + /* arrays that need ghost zones must add 2 to vgpu.size */ + /* 2-dim */ + CUDA_CALL( cudaMallocPitch( &(data.d), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.fM), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.fN), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.cR1), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.cR2), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.cR4), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.tArr), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + /* TODO: cR3, cR5 for coriolis */ + + CUDA_CALL( cudaMallocPitch( &(data.h), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + CUDA_CALL( cudaMallocPitch( &(data.hMax), &pitch, nJ_aligned * sizeof(float), vgpu.size + ghost ) ); + + /* 1-dim */ + CUDA_CALL( cudaMalloc( &(data.cR6), dp.nJ * sizeof(float) ) ); + CUDA_CALL( cudaMalloc( &(data.cB1), (vgpu.size + ghost) * sizeof(float) ) ); + CUDA_CALL( cudaMalloc( &(data.cB2), dp.nJ * sizeof(float) ) ); + CUDA_CALL( cudaMalloc( &(data.cB3), (vgpu.size + ghost) * sizeof(float) ) ); + CUDA_CALL( cudaMalloc( &(data.cB4), dp.nJ * sizeof(float) ) ); + + /* data field to store return values like calculated iMin and iMax values */ + CUDA_CALL( cudaMalloc( &(data.extend), sizeof(int4) ) ); + + /* assign some parameters */ + data.params.nI = vgpu.getRel( vgpu.end ); + data.params.nJ = dp.nJ; + data.params.sshArrivalThreshold = dp.sshArrivalThreshold; + data.params.sshClipThreshold = dp.sshClipThreshold; + data.params.sshZeroThreshold = dp.sshZeroThreshold; + data.params.lpad = dp.lpad; + + /* TODO: make sure that pitch is a multiple of 4 and the same for each cudaMallocPitch() call */ + data.params.pI = pitch / sizeof(float); + } + + return 0; +} + +int CGpuNode::copyToGPU() { + + Params& dp = params; + + /* fill in further fields here */ + dp.iMin = Imin; + dp.iMax = Imax; + dp.jMin = Jmin; + dp.jMax = Jmax; + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + /* special treatment because of needed ghost zones */ + int off = (vgpu.off - vgpu.gt - 1); + int ghost = vgpu.gb + vgpu.gt; + + cudaSetDevice( vgpu.dev->id ); + + /* FIXME: should not work correctly */ + /* add offset to data.d to guarantee alignment: data.d + LPAD */ + /* 2-dim */ + CUDA_CALL( cudaMemcpy2D( data.d + dp.lpad, pitch, d + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.fM + dp.lpad, pitch, fM + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.fN + dp.lpad, pitch, fN + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.cR1 + dp.lpad, pitch, cR1 + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.cR2 + dp.lpad, pitch, cR2 + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.cR4 + dp.lpad, pitch, cR4 + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.tArr + dp.lpad, pitch, tArr + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.h + dp.lpad, pitch, h + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy2D( data.hMax + dp.lpad, pitch, hMax + off * dp.nJ, dp.nJ * sizeof(float), dp.nJ * sizeof(float), vgpu.size + ghost, cudaMemcpyHostToDevice ) ); + + /* FIXME: move global variables into data structure */ + /* 1-dim */ + CUDA_CALL( cudaMemcpy( data.cR6, R6, dp.nJ * sizeof(float), cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy( data.cB1, C1 + off, (vgpu.size + ghost) * sizeof(float), cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy( data.cB2, C2, dp.nJ * sizeof(float), cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy( data.cB3, C3 + off, (vgpu.size + ghost) * sizeof(float), cudaMemcpyHostToDevice ) ); + CUDA_CALL( cudaMemcpy( data.cB4, C4, dp.nJ * sizeof(float), cudaMemcpyHostToDevice ) ); + + data.params.jMin = dp.jMin; + data.params.jMax = dp.jMax; + } + + return 0; +} +int CGpuNode::copyFromGPU() { + + Params& dp = params; + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + int off = (vgpu.off - 1) * dp.nJ; + + cudaSetDevice( vgpu.dev->id ); + + CUDA_CALL( cudaMemcpy2D( hMax + off, dp.nJ * sizeof(float), data.hMax + (vgpu.gt)*data.params.pI + dp.lpad, pitch, dp.nJ * sizeof(float), vgpu.size, cudaMemcpyDeviceToHost ) ); + CUDA_CALL( cudaMemcpy2D( tArr + off, dp.nJ * sizeof(float), data.tArr + (vgpu.gt)*data.params.pI + dp.lpad, pitch, dp.nJ * sizeof(float), vgpu.size, cudaMemcpyDeviceToHost ) ); + + } + + return 0; +} + +int CGpuNode::copyIntermediate() { + + /* ignore copy requests if data already present on CPU side */ + if( copied ) + return 0; + + Params& dp = params; + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + int off = (vgpu.off - 1) * dp.nJ; + + cudaSetDevice( vgpu.dev->id ); + + CUDA_CALL( cudaMemcpy2D( h + off, dp.nJ * sizeof(float), data.h + (vgpu.gt) * data.params.pI + dp.lpad, pitch, dp.nJ * sizeof(float), vgpu.size, cudaMemcpyDeviceToHost ) ); + + } + + /* copy finished */ + copied = true; + + return 0; +} + +int CGpuNode::copyPOIs() { + + Params& dp = params; + + if( copied ) + return 0; + + VGpu *vgpu; + + for( int n = 0; n < NPOIs; n++ ) { + + int i = idxPOI[n] / dp.nJ + 1; + int j = idxPOI[n] % dp.nJ + 1; + + for( int id = 0; id < num_virtual_gpus; id++ ) { + + if( vgpus[id].hasLine( i ) ) { + vgpu = &(vgpus[id]); + break; + } + + } + + int id = vgpu->data.idx( vgpu->getRel(i), j ); + + CUDA_CALL( cudaSetDevice( vgpu->dev->id ) ) + CUDA_CALL( cudaMemcpy( h + idxPOI[n], vgpu->data.h + dp.lpad + id, sizeof(float), cudaMemcpyDeviceToHost ) ); + } + + return 0; +} + +int CGpuNode::freeMem() { + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + cudaSetDevice( vgpu.dev->id ); + + /* 2-dim */ + CUDA_CALL( cudaFree( data.d ) ); + CUDA_CALL( cudaFree( data.h ) ); + CUDA_CALL( cudaFree( data.hMax ) ); + CUDA_CALL( cudaFree( data.fM ) ); + CUDA_CALL( cudaFree( data.fN ) ); + CUDA_CALL( cudaFree( data.cR1 ) ); + CUDA_CALL( cudaFree( data.cR2 ) ); + CUDA_CALL( cudaFree( data.cR4 ) ); + CUDA_CALL( cudaFree( data.tArr ) ); + + /* 1-dim */ + CUDA_CALL( cudaFree( data.cR6 ) ); + CUDA_CALL( cudaFree( data.cB1 ) ); + CUDA_CALL( cudaFree( data.cB2 ) ); + CUDA_CALL( cudaFree( data.cB3 ) ); + CUDA_CALL( cudaFree( data.cB4 ) ); + + CUDA_CALL( cudaFree( data.extend ) ); + } + + cudaFreeHost( extend ); + + for( int i = 0; i < num_real_gpus; i++ ) { + + float dur = 0.0f; + for( int j = 0; j < 7; j++ ) { + + printf_v("GPU #%u, duration %u: %.3f\n", i, j, gpus[i].dur[j]); + dur += gpus[i].dur[j]; + } + printf_v("GPU #%u, duration total: %.3f\n", i, dur); + } + + CArrayNode::freeMem(); + + return 0; +} + +int CGpuNode::run() { + + Params& dp = params; + + int nThreads = 256; + int xThreads = 32; + int yThreads = nThreads / xThreads; + + int4 glb_MinMax = {0,0,0,0}; + + dp.mTime = Par.time; + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + updateParams( vgpu ); + + vgpu.nBlocks = ceil( (float)max(data.params.nI,dp.nJ) / (float)nThreads ); + vgpu.threads = dim3( xThreads, yThreads ); + vgpu.blocks = dim3( ceil( (float)dp.nJ / (float)xThreads ), ceil( (float)data.params.nI / (float)yThreads ) ); + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( Par.verbose && vgpu.relId == 0 ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[0] ) ); + + if( isActive( vgpu ) ) { + + runWaveUpdateKernel<<>>( data ); + runWaveBoundaryKernel<<>>( data ); + } + + if( Par.verbose && vgpu.relId == vgpu.dev->maxId ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[0] ) ); + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( Par.verbose && vgpu.relId == 0 ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[5] ) ); + + if( isActive( vgpu ) ) { + + if( i < num_virtual_gpus - 1 ) { + int off = ( vgpu.getRel(vgpu.end) - 1 ) * vgpu.data.params.pI; + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i+1].data.h, vgpus[i+1].dev->id, vgpu.data.h + off, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0]) ); + } + + if( i > 0 ) { + int off = ( vgpus[i-1].getRel(vgpus[i-1].end) ) * vgpus[i-1].data.params.pI; + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i-1].data.h + off, vgpus[i-1].dev->id, vgpu.data.h + vgpu.data.params.pI, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0] ) ); + } + + } + + if( Par.verbose && vgpu.relId == vgpu.dev->maxId ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[5] ) ); + + cudaEventRecord( vgpu.evtSync, vgpu.stream[0] ); + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + + if( ! isActive(vgpu) ) + continue; + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( i < num_virtual_gpus - 1 ) + cudaStreamWaitEvent( vgpu.stream[0], vgpus[i+1].evtSync, 0 ); + + if( i > 0 ) + cudaStreamWaitEvent( vgpu.stream[0], vgpus[i-1].evtSync, 0 ); + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( Par.verbose && vgpu.relId == 0 ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[2] ) ); + + if( isActive( vgpu ) ) { + + runFluxUpdateKernel<<>>( data ); + runFluxBoundaryKernel<<>>( data ); + } + + if( Par.verbose && vgpu.relId == vgpu.dev->maxId ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[2] ) ); + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( Par.verbose && vgpu.relId == 0 ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[6] ) ); + + if( isActive( vgpu ) ) { + + if( i < num_virtual_gpus - 1 ) { + int off = ( vgpu.getRel(vgpu.end) - 1 ) * vgpu.data.params.pI; + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i+1].data.fN, vgpus[i+1].dev->id, vgpu.data.fN + off, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0]) ); + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i+1].data.fM, vgpus[i+1].dev->id, vgpu.data.fM + off, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0]) ); + } + + if( i > 0 ) { + int off = ( vgpus[i-1].getRel(vgpus[i-1].end) ) * vgpus[i-1].data.params.pI; + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i-1].data.fN + off, vgpus[i-1].dev->id, vgpu.data.fN + vgpu.data.params.pI, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0] ) ); + CUDA_CALL( cudaMemcpyPeerAsync( vgpus[i-1].data.fM + off, vgpus[i-1].dev->id, vgpu.data.fM + vgpu.data.params.pI, vgpu.dev->id, vgpu.data.params.pI * sizeof(float), vgpu.stream[0] ) ); + } + + } + + if( Par.verbose && vgpu.relId == vgpu.dev->maxId ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[6] ) ); + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + CUDA_CALL( cudaSetDevice( vgpu.dev->id ) ); + + if( Par.verbose && vgpu.relId == 0 ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[4] ) ); + + if( isActive( vgpu ) ) { + + runGridExtendKernel<<>>( data ); + CUDA_CALL( cudaMemcpyAsync( &(extend[i]), data.extend, sizeof(int4), cudaMemcpyDeviceToHost, vgpu.stream[0]) ); + } + + if( Par.verbose && vgpu.relId == vgpu.dev->maxId ) + CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[4] ) ); + + } + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + VGpu& vgpu = vgpus[i]; + KernelData& data = vgpu.data; + + cudaSetDevice( vgpu.dev->id ); + cudaDeviceSynchronize(); + CUDA_CALL( cudaMemset( data.extend, 0, sizeof(int4) ) ); + + if( vgpu.hasLine( dp.iMin + 2 ) ) + glb_MinMax.x += extend[i].x; + + if( vgpu.hasLine( dp.iMax - 2 ) ) + glb_MinMax.y += extend[i].y; + + glb_MinMax.z += extend[i].z; + glb_MinMax.w += extend[i].w; + } + + if( Par.verbose ) { + for( int i = 0; i < num_real_gpus; i++ ) { + + cudaSetDevice( i ); + + float dur; + for( int j = 0; j < 7; j++ ) { + if( cudaEventElapsedTime( &dur, gpus[i].evtStart[j], gpus[i].evtEnd[j]) == cudaSuccess ) + gpus[i].dur[j] += dur; + } + } + } + + memset( extend, 0, num_virtual_gpus * sizeof(int4) ); + + if( glb_MinMax.x > 0 ) + Imin = dp.iMin = max( dp.iMin-1, 2 ); + + if( glb_MinMax.y > 0 ) + Imax = dp.iMax = min( dp.iMax+1, dp.nI-1 ); + + if( glb_MinMax.z > 0 ) + Jmin = dp.jMin = max( dp.jMin-1, 2 ); + + if( glb_MinMax.w > 0 ) + Jmax = dp.jMax = min( dp.jMax+1, dp.nJ-1 ); + + /* data has changed now -> copy becomes necessary */ + copied = false; + + return 0; +} + +int CGpuNode::init_vgpus() { + + Params& dp = params; + + int nI_partial = dp.nI / num_virtual_gpus; + int nI_mod = dp.nI % num_virtual_gpus; + int off = 1; + + for( int i = 0; i < num_virtual_gpus; i++ ) { + + vgpus[i].size = nI_partial + (int)( i < nI_mod ); + vgpus[i].off = off; + vgpus[i].end = vgpus[i].off + vgpus[i].size - 1; + off += vgpus[i].size; + + vgpus[i].gt = 1 - (int)( i == 0 ); + vgpus[i].gb = 1 - (int)( i == num_virtual_gpus - 1 ); + + printf_v("VGPU #%u: off=%u end=%u size=%u gt=%u gb=%u\n", i, vgpus[i].off, vgpus[i].end, vgpus[i].size, vgpus[i].gt, vgpus[i].gb); + } + + return 0; +} + +int CGpuNode::updateParams( VGpu& vgpu ) { + + Params& dp = vgpu.data.params; + + dp.mTime = params.mTime; + dp.jMin = params.jMin; + dp.jMax = params.jMax; + + if( params.iMin <= vgpu.end && params.iMax >= vgpu.off ) { + + /* range of virtual GPU must be handled */ + /* calculate relative values for iMin and iMax depending on current range */ + dp.iMin = max( 2, vgpu.getRel(params.iMin) ); + dp.iMax = min( vgpu.getRel( vgpu.end ), vgpu.getRel(params.iMax) ); + + return 0; + } + + return 1; +} + +bool CGpuNode::isActive( VGpu& vgpu ) { + + return ( params.iMin <= vgpu.end && params.iMax >= vgpu.off ); +} diff --git a/cpu/branches/web/ewGpuNode.cuh b/cpu/branches/web/ewGpuNode.cuh new file mode 100644 index 0000000000000000000000000000000000000000..93837c2fbc41db47e625241bd6b09891d5616afb --- /dev/null +++ b/cpu/branches/web/ewGpuNode.cuh @@ -0,0 +1,144 @@ +#ifndef EW_GPUNODE_H +#define EW_GPUNODE_H + +/* FIXME: check header dependencies */ +#include "easywave.h" +#include "ewNode.h" +#include + +#define CUDA_CALL(x) if( x != cudaSuccess ) { fprintf( stderr, "Error in file %s on line %u: %s\n", __FILE__, __LINE__, cudaGetErrorString( cudaGetLastError() ) ); return 1; } + +#undef idx + +class Params { + +public: + int mTime; + int nI; + int nJ; + int iMin; + int iMax; + int jMin; + int jMax; + float sshArrivalThreshold; + float sshZeroThreshold; + float sshClipThreshold; + + /* pitch / sizeof(float) */ + size_t pI; + size_t lpad; +}; + +class KernelData { + +public: + /* 2-dim */ + float *d; + float *h; + float *hMax; + float *fM; + float *fN; + float *cR1; + float *cR2; + float *cR4; + float *tArr; + + /* 1-dim */ + float *cR6; + float *cB1; + float *cB2; + float *cB3; + float *cB4; + + Params params; + + int4 *extend; + int devID; + int devNum; + + __device__ int le( int ij ) { return ij - params.pI; } + __device__ int ri( int ij ) { return ij + params.pI; } + __device__ int up( int ij ) { return ij + 1; } + __device__ int dn( int ij ) { return ij - 1; } + __host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; } +}; + +class Gpu { + +public: + int id; + + int maxId; + + static const short NEVENTS = 7; + cudaEvent_t evtStart[NEVENTS]; + cudaEvent_t evtEnd[NEVENTS]; + float dur[NEVENTS]; + +}; + +class VGpu { + +public: + int off; + int end; + int size; + KernelData data; + + int gt, gb; + + static const short NSTREAMS = 2; + cudaStream_t stream[NSTREAMS]; + + cudaEvent_t evtSync; + + Gpu *dev; + int relId; + + int nBlocks; + dim3 threads; + dim3 blocks; + + bool hasLine( int i ) { return (i >= off && i <= end ); } + int getRel( int i ) { return (i - off + 1 + gt); } +}; + +/* GPU dependent */ +class CGpuNode : public CArrayNode { + +protected: + VGpu *vgpus; + Params params; + + Gpu *gpus; + + int4 *extend; + + /* line size in bytes */ + size_t pitch; + + /* specifies if data was already copied in the current calculation step */ + bool copied; + + /* multiple GPUs */ + int num_virtual_gpus; + int num_real_gpus; + +public: + CGpuNode(); + ~CGpuNode(); + int mallocMem(); + int copyToGPU(); + int copyFromGPU(); + int copyIntermediate(); + int copyPOIs(); + int freeMem(); + int run(); + +private: + int init_vgpus(); + int updateParams( VGpu& vgpu ); + bool isActive( VGpu& vgpu ); +}; + +#endif /* EW_GPUNODE_H */ diff --git a/cpu/branches/web/ewGrid.cpp b/cpu/branches/web/ewGrid.cpp new file mode 100644 index 0000000000000000000000000000000000000000..fbf705adc628b4057a5f41783852dc42da57254b --- /dev/null +++ b/cpu/branches/web/ewGrid.cpp @@ -0,0 +1,203 @@ +#include +#include +#include +#include +#include "easywave.h" + +int NLon,NLat; +double LonMin,LonMax,LatMin,LatMax; +double DLon,DLat; // steps in grad +double Dx,Dy; // steps in m, dx must be multiplied by cos(y) before use +float *R6; +float *C1; +float *C2; +float *C3; +float *C4; + + +int ewLoadBathymetry() +{ + FILE *fp; + char fileLabel[5]; + unsigned short shval; + int ierr,isBin,i,j,m,k; + float fval; + double dval; + + CNode& Node = *gNode; + + Log.print( "Loading bathymetry from %s", Par.fileBathymetry ); + + // check if bathymetry file is in ascii or binary format + if( (fp=fopen(Par.fileBathymetry,"rb")) == NULL ) return Err.post( Err.msgOpenFile(Par.fileBathymetry) ); + + memset( fileLabel, 0, 5 ); + ierr = fread( fileLabel, 4, 1, fp ); + if( !strcmp( fileLabel,"DSAA" ) ) + isBin = 0; + else if( !strcmp( fileLabel,"DSBB" ) ) + isBin = 1; + else + return Err.post( "%s: not GRD-file!", Par.fileBathymetry ); + + fclose(fp); + + if( isBin ) { + fp = fopen( Par.fileBathymetry, "rb" ); + ierr = fread( fileLabel, 4, 1, fp ); + ierr = fread( &shval, sizeof(unsigned short), 1, fp ); NLon = shval; + ierr = fread( &shval, sizeof(unsigned short), 1, fp ); NLat = shval; + } + else { + fp = fopen( Par.fileBathymetry, "rt" ); + ierr = fscanf( fp, "%s", fileLabel ); + ierr = fscanf( fp, " %d %d ", &NLon, &NLat ); + } + + // try to allocate memory for GRIDNODE structure and for caching arrays + printf_v("Size: %d %d %luMB\n", NLon, NLat, sizeof(float)*MAX_VARS_PER_NODE*NLon*NLat/1024/1024); + if( Node.mallocMem() ) return Err.post( Err.msgAllocateMem() ); + + if( isBin ) { + ierr = fread( &LonMin, sizeof(double), 1, fp ); ierr = fread( &LonMax, sizeof(double), 1, fp ); + ierr = fread( &LatMin, sizeof(double), 1, fp ); ierr = fread( &LatMax, sizeof(double), 1, fp ); + ierr = fread( &dval, sizeof(double), 1, fp ); ierr = fread( &dval, sizeof(double), 1, fp ); // zmin zmax + } + else { + ierr = fscanf( fp, " %lf %lf ", &LonMin, &LonMax ); + ierr = fscanf( fp, " %lf %lf ", &LatMin, &LatMax ); + ierr = fscanf( fp, " %*s %*s " ); // zmin, zmax + } + + DLon = (LonMax - LonMin)/(NLon - 1); // in degrees + DLat = (LatMax - LatMin)/(NLat - 1); + + Dx = Re * g2r( DLon ); // in m along the equator + Dy = Re * g2r( DLat ); + + /* NOTE: optimal would be reading everything in one step, but that does not work because rows and columns are transposed + * (only possible with binary data at all) - use temporary buffer for now (consumes additional memory!) */ + float *buf = new float[ NLat*NLon ]; + ierr = fread( buf, sizeof(float), NLat*NLon, fp ); + + for( i=1; i<=NLon; i++ ) { + for( j=1; j<=NLat; j++ ) { + + m = idx(j,i); + + if( isBin ) + fval = buf[ (j-1) * NLon + (i-1) ]; + //ierr = fread( &fval, sizeof(float), 1, fp ); + else + ierr = fscanf( fp, " %f ", &fval ); + + Node(m, iTopo) = fval; + Node(m, iTime) = -1; + Node(m, iD) = -fval; + + if( Node(m, iD) < 0 ) { + Node(m, iD) = 0.0f; + } else if( Node(m, iD) < Par.dmin ) { + Node(m, iD) = Par.dmin; + } + + } + } + + delete[] buf; + + for( k=1; k 15 ) Par.dt = 15; + else if( dtLoc > 10 ) Par.dt = 10; + else if( dtLoc > 5 ) Par.dt = 5; + else if( dtLoc > 2 ) Par.dt = 2; + else if( dtLoc > 1 ) Par.dt = 1; + else return Err.post("Bathymetry requires too small time step (<1sec)"); + } + + // Correct bathymetry for edge artefacts + for( i=1; i<=NLon; i++ ) { + if( Node(idx(1,i), iD) != 0 && Node(idx(2,i), iD) == 0 ) Node(idx(1,i), iD) = 0.; + if( Node(idx(NLat,i), iD) != 0 && Node(idx(NLat-1,i), iD) == 0 ) Node(idx(NLat,i), iD) = 0.; + } + for( j=1; j<=NLat; j++ ) { + if( Node(idx(j,1), iD) != 0 && Node(idx(j,2), iD) == 0 ) Node(idx(j,1), iD) = 0.; + if( Node(idx(j,NLon), iD) != 0 && Node(idx(j,NLon-1), iD) == 0 ) Node(idx(j,NLon), iD) = 0.; + } + + + // Calculate caching grid parameters for speedup + for( j=1; j<=NLat; j++ ) { + R6[j] = cosdeg( LatMin + (j-0.5)*DLat ); + } + + for( i=1; i<=NLon; i++ ) { + for( j=1; j<=NLat; j++ ) { + + m = idx(j,i); + + if( Node(m, iD) == 0 ) continue; + + Node(m, iR1) = Par.dt/Dy/R6[j]; + + if( i != NLon ) { + if( Node(m+NLat, iD) != 0 ) { + Node(m, iR2) = 0.5*Gravity*Par.dt/Dy/R6[j]*(Node(m, iD)+Node(m+NLat, iD)); + Node(m, iR3) = 0.5*Par.dt*Omega*sindeg( LatMin + (j-0.5)*DLat ); + } + } + else { + Node(m, iR2) = 0.5*Gravity*Par.dt/Dy/R6[j]*Node(m, iD)*2; + Node(m, iR3) = 0.5*Par.dt*Omega*sindeg( LatMin + (j-0.5)*DLat ); + } + + if( j != NLat ) { + if( Node(m+1, iD) != 0 ) { + Node(m, iR4) = 0.5*Gravity*Par.dt/Dy*(Node(m, iD)+Node(m+1, iD)); + Node(m, iR5) = 0.5*Par.dt*Omega*sindeg( LatMin + j*DLat ); + } + } + /* FIXME: Bug? */ + else { + Node(m, iR2) = 0.5*Gravity*Par.dt/Dy*Node(m, iD)*2; + Node(m, iR3) = 0.5*Par.dt*Omega*sindeg( LatMin + j*DLat ); + } + + } + } + + for( i=1; i<=NLon; i++ ) { + C1[i] = 0; + if( Node(idx(1,i), iD) != 0 ) C1[i] = 1./sqrt(Gravity*Node(idx(1,i), iD)); + C3[i] = 0; + if( Node(idx(NLat,i), iD) != 0 ) C3[i] = 1./sqrt(Gravity*Node(idx(NLat,i), iD)); + } + + for( j=1; j<=NLat; j++ ) { + C2[j] = 0; + if( Node(idx(j,1), iD) != 0 ) C2[j] = 1./sqrt(Gravity*Node(idx(j,1), iD)); + C4[j] = 0; + if( Node(idx(j,NLon), iD) != 0 ) C4[j] = 1./sqrt(Gravity*Node(idx(j,NLon), iD)); + } + + return 0; +} diff --git a/cpu/branches/web/ewNode.h b/cpu/branches/web/ewNode.h new file mode 100644 index 0000000000000000000000000000000000000000..2535cb112ea1087df03fb238642b70bc99e9edba --- /dev/null +++ b/cpu/branches/web/ewNode.h @@ -0,0 +1,187 @@ +#ifndef EW_NODE_H +#define EW_NODE_H + +#include +#include +#include "easywave.h" + +#define CHKRET( x ) if( (x) == NULL ) return 1; + +typedef float Float[MAX_VARS_PER_NODE]; + +class CNode { + +public: + virtual ~CNode() {}; + virtual float& operator()( const int idx1, const int idx2 ) = 0; + virtual int mallocMem() = 0; + virtual int copyToGPU() = 0; + virtual int copyFromGPU() = 0; + virtual int copyIntermediate() = 0; + virtual int copyPOIs() = 0; + virtual int freeMem() = 0; + virtual int run() = 0; + + virtual void initMemory( int index, int val ) = 0; +}; + +class CStructNode : public CNode { + +public: + Float *node; + +public: + inline float& operator()( const int idx1, const int idx2 ) { + + return node[idx1][idx2]; + } + + void initMemory( int index, int val ) { + + int m; + for( int i=1; i<=NLon; i++ ) { + for( int j=1; j<=NLat; j++ ) { + m = idx(j,i); + this->operator ()(m, index) = val; + } + } + } + + int mallocMem() { + + CHKRET( this->node = (Float*) malloc( sizeof(Float) * NLon * NLat) ); + + /* FIXME: remove global variables */ + CHKRET( R6 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + CHKRET( C1 = (float*) malloc( sizeof(float) * (NLon+1) ) ); + CHKRET( C3 = (float*) malloc( sizeof(float) * (NLon+1) ) ); + CHKRET( C2 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + CHKRET( C4 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + + return 0; + } + + int freeMem() { + + free( this->node ); + free( R6 ); + free( C1 ); + free( C2 ); + free( C3 ); + free( C4 ); + + return 0; + } + + int run() { + + if( Par.coriolis ) + return ewStepCor(); + + return ewStep(); + } + + int copyToGPU() { return 0; } + int copyFromGPU() { return 0; } + int copyIntermediate() { return 0; } + int copyPOIs() { return 0; } + +}; + +#pragma pack(push, 1) +class CArrayNode : public CNode { + +protected: + float *d; + float *h; + float *hMax; + float *fM; + float *fN; + float *cR1; + float *cR2; + float *cR3; + float *cR4; + float *cR5; + float *tArr; + float *topo; + +public: + virtual float& operator()( const int idx1, const int idx2 ) { + + return ((float**)&d)[idx2][idx1]; + } + + void *getBuf( int idx ) { return ((float**)&d)[idx]; } + + virtual void initMemory( int index, int val ) { + + memset( getBuf(index), 0, NLat * NLon * sizeof(float) ); + + } + + virtual int mallocMem() { + + CHKRET( this->d = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->h = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->hMax = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->fM = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->fN = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->cR1 = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->cR2 = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->cR3 = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->cR4 = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->cR5 = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->tArr = (float*) malloc( sizeof(float) * NLon * NLat ) ); + CHKRET( this->topo = (float*) malloc( sizeof(float) * NLon * NLat ) ); + + /* FIXME: remove global variables */ + CHKRET( R6 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + CHKRET( C1 = (float*) malloc( sizeof(float) * (NLon+1) ) ); + CHKRET( C3 = (float*) malloc( sizeof(float) * (NLon+1) ) ); + CHKRET( C2 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + CHKRET( C4 = (float*) malloc( sizeof(float) * (NLat+1) ) ); + + return 0; + } + + virtual int freeMem() { + + free( this->d ); + free( this->h ); + free( this->hMax ); + free( this->fM ); + free( this->fN ); + free( this->cR1 ); + free( this->cR2 ); + free( this->cR3 ); + free( this->cR4 ); + free( this->cR5 ); + free( this->tArr ); + free( this->topo ); + + free( R6 ); + free( C1 ); + free( C2 ); + free( C3 ); + free( C4 ); + + return 0; + } + + virtual int run() { + + if( Par.coriolis ) + return ewStepCor(); + + return ewStep(); + } + + virtual int copyToGPU() { return 0; } + virtual int copyFromGPU() { return 0; } + virtual int copyIntermediate() { return 0; } + virtual int copyPOIs() { return 0; } + +}; +#pragma pack(pop) + +#endif /* EW_NODE_H */ diff --git a/cpu/branches/web/ewOut2D.cpp b/cpu/branches/web/ewOut2D.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f9f93813a37922d94debf778b5e4341c4bbcce99 --- /dev/null +++ b/cpu/branches/web/ewOut2D.cpp @@ -0,0 +1,142 @@ +#include +#include +#include +#include "easywave.h" + + +static char *IndexFile; +static int Nrec2DOutput; + + +int ewStart2DOutput() +{ + FILE *fp; + char buf[64]; + + + // start index file + sprintf( buf, "%s.2D.idx", Par.modelName ); + IndexFile = strdup(buf); + + fp = fopen( IndexFile, "wt" ); + + fprintf( fp, "%g %g %d %g %g %d\n", LonMin, LonMax, NLon, LatMin, LatMax, NLat ); + + fclose( fp ); + + Nrec2DOutput = 0; + + return 0; +} + + + +int ewOut2D() +{ + FILE *fp; + short nOutI,nOutJ; + int i,j,m; + float ftmp; + double dtmp,lonOutMin,lonOutMax,latOutMin,latOutMax; + char record[128]; + + CNode& Node = *gNode; + + Nrec2DOutput++; + + nOutI = Imax-Imin+1; + lonOutMin = getLon(Imin); lonOutMax = getLon(Imax); + nOutJ = Jmax-Jmin+1; + latOutMin = getLat(Jmin); latOutMax = getLat(Jmax); + + // write ssh + sprintf( record, "%s.2D.%5.5d.ssh", Par.modelName, Par.time ); + fp = fopen( record, "wb" ); + fwrite( "DSBB", 4, 1, fp ); + fwrite( &nOutI, sizeof(short), 1, fp ); + fwrite( &nOutJ, sizeof(short), 1, fp ); + fwrite( &lonOutMin, sizeof(double), 1, fp ); + fwrite( &lonOutMax, sizeof(double), 1, fp ); + fwrite( &latOutMin, sizeof(double), 1, fp ); + fwrite( &latOutMax, sizeof(double), 1, fp ); + dtmp = -1.; fwrite( &dtmp, sizeof(double), 1, fp ); + dtmp = +1.; fwrite( &dtmp, sizeof(double), 1, fp ); + for( j=Jmin; j<=Jmax; j++ ) { + for( i=Imin; i<=Imax; i++ ) { + m = idx(j,i); + if( fabs(Node(m, iH)) < Par.sshTransparencyThreshold ) + ftmp = (float)9999; + else + ftmp = (float)Node(m, iH); + fwrite( &ftmp, sizeof(float), 1, fp ); + } + } + fclose( fp ); + + // updating contents file + fp = fopen( IndexFile, "at" ); + fprintf( fp, "%3.3d %s %d %d %d %d\n", Nrec2DOutput, utlTimeSplitString(Par.time), Imin, Imax, Jmin, Jmax ); + fclose( fp ); + + return 0; +} + + +int ewDump2D() +{ + FILE *fp; + short nOutI,nOutJ; + int i,j,m; + float ftmp; + double dtmp,lonOutMin,lonOutMax,latOutMin,latOutMax; + char record[128]; + + CNode& Node = *gNode; + + nOutI = Imax-Imin+1; + lonOutMin = getLon(Imin); lonOutMax = getLon(Imax); + nOutJ = Jmax-Jmin+1; + latOutMin = getLat(Jmin); latOutMax = getLat(Jmax); + + // write ssh max + sprintf( record, "%s.2D.sshmax", Par.modelName ); + fp = fopen( record, "wb" ); + fwrite( "DSBB", 4, 1, fp ); + fwrite( &nOutI, sizeof(short), 1, fp ); + fwrite( &nOutJ, sizeof(short), 1, fp ); + fwrite( &lonOutMin, sizeof(double), 1, fp ); + fwrite( &lonOutMax, sizeof(double), 1, fp ); + fwrite( &latOutMin, sizeof(double), 1, fp ); + fwrite( &latOutMax, sizeof(double), 1, fp ); + dtmp = 0.; fwrite( &dtmp, sizeof(double), 1, fp ); + dtmp = 1.; fwrite( &dtmp, sizeof(double), 1, fp ); + for( j=Jmin; j<=Jmax; j++ ) { + for( i=Imin; i<=Imax; i++ ) { + ftmp = (float)Node(idx(j,i), iHmax); + fwrite( &ftmp, sizeof(float), 1, fp ); + } + } + fclose( fp ); + + // write arrival times + sprintf( record, "%s.2D.time", Par.modelName ); + fp = fopen( record, "wb" ); + fwrite( "DSBB", 4, 1, fp ); + fwrite( &nOutI, sizeof(short), 1, fp ); + fwrite( &nOutJ, sizeof(short), 1, fp ); + fwrite( &lonOutMin, sizeof(double), 1, fp ); + fwrite( &lonOutMax, sizeof(double), 1, fp ); + fwrite( &latOutMin, sizeof(double), 1, fp ); + fwrite( &latOutMax, sizeof(double), 1, fp ); + dtmp = 0.; fwrite( &dtmp, sizeof(double), 1, fp ); + dtmp = 1.; fwrite( &dtmp, sizeof(double), 1, fp ); + for( j=Jmin; j<=Jmax; j++ ) { + for( i=Imin; i<=Imax; i++ ) { + ftmp = (float)Node(idx(j,i), iTime) / 60; + fwrite( &ftmp, sizeof(float), 1, fp ); + } + } + fclose( fp ); + + return 0; +} diff --git a/cpu/branches/web/ewPOIs.cpp b/cpu/branches/web/ewPOIs.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ce895350dae10874d58f6f71dd6d04539d6c6962 --- /dev/null +++ b/cpu/branches/web/ewPOIs.cpp @@ -0,0 +1,245 @@ +#include +#include +#include +#include +#include "easywave.h" + +//#define SSHMAX_TO_SINGLE_FILE 0 + +static int MaxPOIs; +int NPOIs; +static char **idPOI; +long *idxPOI; +static int *flagRunupPOI; +static int NtPOI; +static int *timePOI; +static float **sshPOI; + + +int ewLoadPOIs() +{ + FILE *fp,*fpFit,*fpAcc,*fpRej; + int ierr,line; + int i,j,i0,j0,imin,imax,jmin,jmax,flag,it,n; + int rad,nmin,itype; + char record[256],buf[256],id[64]; + double lon,lat,d2,d2min,lenLon,lenLat,depth; + double POIdistMax,POIdepthMin,POIdepthMax; + + CNode& Node = *gNode; + + // if POIs file is not specified + if( Par.filePOIs == NULL ) return 0; + + Log.print("Loading POIs from %s", Par.filePOIs ); + + MaxPOIs = utlGetNumberOfRecords( Par.filePOIs ); if( !MaxPOIs ) return Err.post( "Empty POIs file" ); + + idPOI = new char*[MaxPOIs]; if( !idPOI ) return Err.post( Err.msgAllocateMem() ); + idxPOI = new long[MaxPOIs]; if( !idxPOI ) return Err.post( Err.msgAllocateMem() ); + flagRunupPOI = new int[MaxPOIs]; if( !flagRunupPOI ) return Err.post( Err.msgAllocateMem() ); + sshPOI = new float*[MaxPOIs]; if( !sshPOI ) return Err.post( Err.msgAllocateMem() ); + + // read first record and get idea about the input type + fp = fopen( Par.filePOIs, "rt" ); + line = 0; utlReadNextRecord( fp, record, &line ); + itype = sscanf( record, "%s %s %s", buf, buf, buf ); + fclose( fp ); + + if( itype == 2 ) { // poi-name and grid-index + + fp = fopen( Par.filePOIs, "rt" ); + line = NPOIs = 0; + while( utlReadNextRecord( fp, record, &line ) != EOF ) { + + i = sscanf( record, "%s %d", id, &nmin ); + if( i != 2 ) { + Log.print( "! Bad POI record: %s", record ); + continue; + } + idPOI[NPOIs] = strdup(id); + idxPOI[NPOIs] = nmin; + flagRunupPOI[NPOIs] = 1; + NPOIs++; + } + + fclose( fp ); + Log.print( "%d POIs of %d loaded successfully; %d POIs rejected", NPOIs, MaxPOIs, (MaxPOIs-NPOIs) ); + } + else if( itype == 3 ) { // poi-name and coordinates + + if( Par.poiReport ) { + fpAcc = fopen( "poi_accepted.lst", "wt" ); + fprintf( fpAcc, "ID lon lat lonIJ latIJ depthIJ dist[km]\n" ); + fpRej = fopen( "poi_rejected.lst", "wt" ); + } + + lenLat = My_PI*Re/180; + + fp = fopen( Par.filePOIs, "rt" ); + line = NPOIs = 0; + while( utlReadNextRecord( fp, record, &line ) != EOF ) { + + i = sscanf( record, "%s %lf %lf %d", id, &lon, &lat, &flag ); + if( i == 3 ) + flag = 1; + else if( i == 4 ) + ; + else { + Log.print( "! Bad POI record: %s", record ); + if( Par.poiReport ) fprintf( fpRej, "%s\n", record ); + continue; + } + + // find the closest water grid node. Local distances could be treated as cartesian (2 min cell distortion at 60 degrees is only about 2 meters or 0.2%) + i0 = (int)((lon - LonMin)/DLon) + 1; + j0 = (int)((lat - LatMin)/DLat) + 1; + if( i0<1 || i0>NLon || j0<1 || j0>NLat ) { + Log.print( "!POI out of grid: %s", record ); + if( Par.poiReport ) fprintf( fpRej, "%s\n", record ); + continue; + } + lenLon = lenLat * R6[j0]; + + for( nmin=-1,rad=0; rad NLon ) imax = NLon; + jmin = j0-rad; if( jmin < 1 ) jmin = 1; + jmax = j0+rad+1; if( jmax > NLat ) jmax = NLat; + for( i=imin; i<=imax; i++ ) + for( j=jmin; j<=jmax; j++ ) { + if( i != imin && i != imax && j != jmin && j != jmax ) continue; + n = idx(j,i); + depth = Node(n, iD); + if( depth < Par.poiDepthMin || depth > Par.poiDepthMax ) continue; + d2 = pow( lenLon*(lon-getLon(i)), 2. ) + pow( lenLat*(lat-getLat(j)), 2. ); + if( d2 < d2min ) { d2min = d2; nmin = n; } + } + + if( nmin > 0 ) break; + } + + if( sqrt(d2min) > Par.poiDistMax ) { + Log.print( "! Closest water node too far: %s", record ); + if( Par.poiReport ) fprintf( fpRej, "%s\n", record ); + continue; + } + + idPOI[NPOIs] = strdup(id); + idxPOI[NPOIs] = nmin; + flagRunupPOI[NPOIs] = flag; + NPOIs++; + i = nmin/NLat + 1; + j = nmin - (i-1)*NLat + 1; + if( Par.poiReport ) fprintf( fpAcc, "%s %.4f %.4f %.4f %.4f %.1f %.3f\n", id, lon, lat, getLon(i), getLat(j), Node(nmin, iD), sqrt(d2min)/1000 ); + } + + fclose( fp ); + Log.print( "%d POIs of %d loaded successfully; %d POIs rejected", NPOIs, MaxPOIs, (MaxPOIs-NPOIs) ); + if( Par.poiReport ) { + fclose( fpAcc ); + fclose( fpRej ); + } + } + + // if mareograms + if( Par.poiDt ) { + NtPOI = Par.timeMax/Par.poiDt + 1; + + timePOI = new int[NtPOI]; + for( it=0; it +#include +#include +#include +#include "easywave.h" + +struct EWPARAMS Par; + + +int ewParam( int argc, char **argv ) +// Process command line arguments and/or use default +{ + int argn,ierr; + + /* TODO: optimize argument handling */ + + // Obligatory command line parameters + + // Bathymetry + if( ( argn = utlCheckCommandLineOption( argc, argv, "grid", 4 ) ) != 0 ) { + /* TODO: strdup not necessary here because all arguments in argv reside until program exit -> memory leak */ + Par.fileBathymetry = strdup( argv[argn+1] ); + } + else return -1; + + // Source: Okada faults or Surfer grid + if( ( argn = utlCheckCommandLineOption( argc, argv, "source", 6 ) ) != 0 ) { + Par.fileSource = strdup( argv[argn+1] ); + } + else return -1; + + // Simulation time, [sec] + if( ( argn = utlCheckCommandLineOption( argc, argv, "time", 4 ) ) != 0 ) { + Par.timeMax = atoi( argv[argn+1] ); + Par.timeMax *= 60; + } + else return -1; + + + // Optional parameters or their default values + + // Model name + if( ( argn = utlCheckCommandLineOption( argc, argv, "label", 3 ) ) != 0 ) { + Par.modelName = strdup( argv[argn+1] ); + } + else Par.modelName = strdup( "eWave" ); + + // Deactivate logging + if( ( argn = utlCheckCommandLineOption( argc, argv, "nolog", 5 ) ) != 0 ) + ; + else { + Log.start( "easywave.log" ); + Log.timestamp_disable(); + } + + // Use Coriolis force + if( ( argn = utlCheckCommandLineOption( argc, argv, "coriolis", 3 ) ) != 0 ) + Par.coriolis = 1; + else Par.coriolis = 0; + + // Periodic dumping of mariograms and cumulative 2D-plots (wavemax, arrival times), [sec] + if( ( argn = utlCheckCommandLineOption( argc, argv, "dump", 4 ) ) != 0 ) + Par.outDump = atoi( argv[argn+1] ); + else Par.outDump = 0; + + // Reporting simulation progress, [sec model time] + if( ( argn = utlCheckCommandLineOption( argc, argv, "progress", 4 ) ) != 0 ) + Par.outProgress = (int)(atof(argv[argn+1])*60); + else Par.outProgress = 600; + + // 2D-wave propagation output, [sec model time] + if( ( argn = utlCheckCommandLineOption( argc, argv, "propagation", 4 ) ) != 0 ) + Par.outPropagation = (int)(atof(argv[argn+1])*60); + else Par.outPropagation = 300; + + // minimal calculation depth, [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "min_depth", 9 ) ) != 0 ) + Par.dmin = (float)atof(argv[argn+1]); + else Par.dmin = 10.; + + // timestep, [sec] + if( ( argn = utlCheckCommandLineOption( argc, argv, "step", 4 ) ) != 0 ) + Par.dt = atoi(argv[argn+1]); + else Par.dt = 0; // will be estimated automatically + + // Initial uplift: relative threshold + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh0_rel", 8 ) ) != 0 ) + Par.ssh0ThresholdRel = (float)atof(argv[argn+1]); + else Par.ssh0ThresholdRel = 0.01; + + // Initial uplift: absolute threshold, [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh0_abs", 8 ) ) != 0 ) + Par.ssh0ThresholdAbs = (float)atof(argv[argn+1]); + else Par.ssh0ThresholdAbs = 0.0; + + // Threshold for 2-D arrival time (0 - do not calculate), [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh_arrival", 9 ) ) != 0 ) + Par.sshArrivalThreshold = (float)atof(argv[argn+1]); + else Par.sshArrivalThreshold = 0.001; + + // Threshold for clipping of expanding computational area, [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh_clip", 8 ) ) != 0 ) + Par.sshClipThreshold = (float)atof(argv[argn+1]); + else Par.sshClipThreshold = 1.e-4; + + // Threshold for resetting the small ssh (keep expanding area from unnesessary growing), [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh_zero", 8 ) ) != 0 ) + Par.sshZeroThreshold = (float)atof(argv[argn+1]); + else Par.sshZeroThreshold = 1.e-5; + + // Threshold for transparency (for png-output), [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "ssh_transparency", 8 ) ) != 0 ) + Par.sshTransparencyThreshold = (float)atof(argv[argn+1]); + else Par.sshTransparencyThreshold = 0.0; + + // Points Of Interest (POIs) input file + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi", 3 ) ) != 0 ) { + Par.filePOIs = strdup( argv[argn+1] ); + } + else Par.filePOIs = NULL; + + // POI fitting: max search distance, [km] + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi_search_dist", 15 ) ) != 0 ) + Par.poiDistMax = (float)atof(argv[argn+1]); + else Par.poiDistMax = 10.0; + Par.poiDistMax *= 1000.; + + // POI fitting: min depth, [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi_min_depth", 13 ) ) != 0 ) + Par.poiDepthMin = (float)atof(argv[argn+1]); + else Par.poiDepthMin = 1.0; + + // POI fitting: max depth, [m] + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi_max_depth", 13 ) ) != 0 ) + Par.poiDepthMax = (float)atof(argv[argn+1]); + else Par.poiDepthMax = 10000.0; + + // report of POI loading + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi_report", 7 ) ) != 0 ) + Par.poiReport = 1; + else Par.poiReport = 0; + + // POI output interval, [sec] + if( ( argn = utlCheckCommandLineOption( argc, argv, "poi_dt_out", 10 ) ) != 0 ) + Par.poiDt = atoi(argv[argn+1]); + else Par.poiDt = 30; + + if( ( argn = utlCheckCommandLineOption( argc, argv, "gpu", 3 ) ) != 0 ) + Par.gpu = true; + else + Par.gpu = false; + + if( ( argn = utlCheckCommandLineOption( argc, argv, "adjust_ztop", 11 ) ) != 0 ) + Par.adjustZtop = true; + else + Par.adjustZtop = false; + + if( ( argn = utlCheckCommandLineOption( argc, argv, "verbose", 7 ) ) != 0 ) + Par.verbose = true; + else + Par.verbose = false; + + return 0; +} + + +void ewLogParams(void) +{ + Log.print("\nModel parameters for this simulation:"); + Log.print("timestep: %d sec", Par.dt); + Log.print("max time: %g min", (float)Par.timeMax/60); + Log.print("poi_dt_out: %d sec", Par.poiDt); + Log.print("poi_report: %s", (Par.poiReport ? "yes" : "no") ); + Log.print("poi_search_dist: %g km", Par.poiDistMax/1000.); + Log.print("poi_min_depth: %g m", Par.poiDepthMin); + Log.print("poi_max_depth: %g m", Par.poiDepthMax); + Log.print("coriolis: %s", (Par.coriolis ? "yes" : "no") ); + Log.print("min_depth: %g m", Par.dmin); + Log.print("ssh0_rel: %g", Par.ssh0ThresholdRel); + Log.print("ssh0_abs: %g m", Par.ssh0ThresholdAbs); + Log.print("ssh_arrival: %g m", Par.sshArrivalThreshold); + Log.print("ssh_clip: %g m", Par.sshClipThreshold); + Log.print("ssh_zero: %g m", Par.sshZeroThreshold); + Log.print("ssh_transparency: %g m\n", Par.sshTransparencyThreshold); + + return; +} diff --git a/cpu/branches/web/ewReset.cpp b/cpu/branches/web/ewReset.cpp new file mode 100644 index 0000000000000000000000000000000000000000..ed8afa6df45957ec2a1ced2f7d41225f69498240 --- /dev/null +++ b/cpu/branches/web/ewReset.cpp @@ -0,0 +1,24 @@ +#include +#include +#include "easywave.h" + + +int ewReset() +{ + int i,j,m; + + CNode& Node = *gNode; + + for( i=1; i<=NLon; i++ ) { + for( j=1; j<=NLat; j++ ) { + + m = idx(j,i); + Node(m, iH) = Node(m, iHmax) = Node(m, iM) = Node(m, iN) = 0; + Node(m, iTime) = -1; + } + } + + Imin = 1; Imax = NLon; Jmin = 1; Jmax = NLat; + + return 0; +} diff --git a/cpu/branches/web/ewSource.cpp b/cpu/branches/web/ewSource.cpp new file mode 100644 index 0000000000000000000000000000000000000000..8a65a1bf2f5771838f50ee9afb4eb1e521f77fbc --- /dev/null +++ b/cpu/branches/web/ewSource.cpp @@ -0,0 +1,187 @@ +#include +#include +#include +#include +#include "cOkadaEarthquake.h" +#include "easywave.h" + +int Imin; +int Imax; +int Jmin; +int Jmax; + +#define SRC_GRD 1 +#define SRC_FLT 2 + +//==================================================== +int ewSource() +{ + char dsaa_label[8]; + int i,j,ierr,srcType; + double lon,lat,dz,absuzmax,absuzmin; + FILE *fp; + cOkadaEarthquake eq; + cOgrd uZ; + + CNode& Node = *gNode; + + // check input file type: GRD or fault + if( (fp = fopen( Par.fileSource, "rb" )) == NULL ) return Err.post( Err.msgOpenFile(Par.fileSource) ); + memset( dsaa_label, 0, 5 ); + ierr = fread( dsaa_label, 4, 1, fp ); + if( !strcmp( dsaa_label,"DSAA" ) || !strcmp( dsaa_label,"DSBB" ) ) + srcType = SRC_GRD; + else + srcType = SRC_FLT; + fclose(fp); + + + // load GRD file + if( srcType == SRC_GRD) { + ierr = uZ.readGRD( Par.fileSource ); if(ierr) return ierr; + } + + // read fault(s) from file + if( srcType == SRC_FLT) { + int effSymSource = 0; + long l; + double dist,energy,factLat,effRad,effMax; + + ierr = eq.read( Par.fileSource ); if(ierr) return ierr; + + if( Par.adjustZtop ) { + + // check fault parameters + Err.disable(); + ierr = eq.finalizeInput(); + while( ierr ) { + i = ierr/10; + ierr = ierr - 10*i; + if( ierr == FLT_ERR_STRIKE ) { + Log.print( "No strike on input: Employing effective symmetric source model" ); + if( eq.nfault > 1 ) { Err.enable(); return Err.post("Symmetric source assumes only 1 fault"); } + eq.fault[0].strike = 0.; + effSymSource = 1; + } + else if( ierr == FLT_ERR_ZTOP ) { + Log.print( "Automatic depth correction to fault top @ 10 km" ); + eq.fault[i].depth = eq.fault[i].width/2 * sindeg(eq.fault[i].dip) + 10.e3; + } + else { + Err.enable(); + return ierr; + } + ierr = eq.finalizeInput(); + } + Err.enable(); + + } else { + + // check fault parameters + Err.disable(); + ierr = eq.finalizeInput(); + if( ierr ) { + i = ierr/10; + ierr = ierr - 10*i; + if( ierr != FLT_ERR_STRIKE ) { + Err.enable(); + ierr = eq.finalizeInput(); + return ierr; + } + Log.print( "No strike on input: Employing effective symmetric source model" ); + Err.enable(); + if( eq.nfault > 1 ) return Err.post("symmetric source assumes only 1 fault"); + eq.fault[0].strike = 0.; + effSymSource = 1; + ierr = eq.finalizeInput(); if(ierr) return ierr; + } + Err.enable(); + + } + + // calculate uplift on a rectangular grid + // set grid resolution, grid dimensions will be set automatically + uZ.dx = DLon; uZ.dy = DLat; + ierr = eq.calculate( uZ ); if(ierr) return ierr; + + if( effSymSource ) { + // integrate for tsunami energy + energy = 0.; + for( j=0; j Par.sshClipThreshold ) { + Imin = My_min( Imin, i ); + Imax = My_max( Imax, i ); + Jmin = My_min( Jmin, j ); + Jmax = My_max( Jmax, j ); + } + + } + } + + if( Imin == NLon ) return Err.post( "Zero initial displacement" ); + + Imin = My_max( Imin - 2, 2 ); + Imax = My_min( Imax + 2, NLon-1 ); + Jmin = My_max( Jmin - 2, 2 ); + Jmax = My_min( Jmax + 2, NLat-1 ); + + return 0; +} diff --git a/cpu/branches/web/ewStep.cpp b/cpu/branches/web/ewStep.cpp new file mode 100644 index 0000000000000000000000000000000000000000..6187df44de1b8e6bcde409f99e7ada4bd0721cbe --- /dev/null +++ b/cpu/branches/web/ewStep.cpp @@ -0,0 +1,344 @@ +// Time stepping +#include +#include +#include +#include "easywave.h" + +/* TODO: still not perfect */ +#define Node(idx1, idx2) Node.node[idx1][idx2] +#define CNode CStructNode +#define gNode ((CStructNode*)gNode) + +int ewStep( void ) +{ + int i,j,enlarge; + float absH,v1,v2; + int m; + + CNode& Node = *gNode; + + // sea floor topography (mass conservation) + #pragma omp parallel for default(shared) private(i,j,absH) + for( i=Imin; i<=Imax; i++ ) { + for( j=Jmin; j<=Jmax; j++ ) { + + m = idx(j,i); + + if( Node(m, iD) == 0 ) continue; + + Node(m, iH) = Node(m, iH) - Node(m, iR1)*( Node(m, iM) - Node(m-NLat, iM) + Node(m, iN)*R6[j] - Node(m-1, iN)*R6[j-1] ); + + absH = fabs(Node(m, iH)); + + if( absH < Par.sshZeroThreshold ) Node(m, iH) = 0.; + + if( Node(m, iH) > Node(m, iHmax) ) Node(m, iHmax) = Node(m, iH); + + if( Par.sshArrivalThreshold && Node(m, iTime) < 0 && absH > Par.sshArrivalThreshold ) Node(m, iTime) = (float)Par.time; + + } + } + + // open bondary conditions + if( Jmin <= 2 ) { + for( i=2; i<=(NLon-1); i++ ) { + m = idx(1,i); + Node(m, iH) = sqrt( pow(Node(m, iN),2.) + 0.25*pow((Node(m, iM)+Node(m-NLat, iM)),2.) )*C1[i]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Imin <= 2 ) { + for( j=2; j<=(NLat-1); j++ ) { + m = idx(j,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + 0.25*pow((Node(m, iN)+Node(m-1, iN)),2.) )*C2[j]; + if( Node(m, iM) > 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Jmax >= (NLat-1) ) { + for( i=2; i<=(NLon-1); i++ ) { + m = idx(NLat,i); + Node(m, iH) = sqrt( pow(Node(m-1, iN),2.) + 0.25*pow((Node(m, iM)+Node(m-1, iM)),2.) )*C3[i]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Imax >= (NLon-1) ) { + for( j=2; j<=(NLat-1); j++ ) { + m = idx(j,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + 0.25*pow((Node(m, iN)+Node(m-1, iN)),2.) )*C4[j]; + if( Node(m-NLat, iM) < 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Jmin <= 2 ) { + m = idx(1,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + pow(Node(m, iN),2.) )*C1[1]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + m = idx(1,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + pow(Node(m, iN),2.) )*C1[NLon]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + } + if( Jmin >= (NLat-1) ) { + m = idx(NLat,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + pow(Node(m-1, iN),2.) )*C3[1]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + m = idx(NLat,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + pow(Node(m-1, iN),2.) )*C3[NLon]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + } + + // moment conservation + #pragma omp parallel for default(shared) private(i,j) + for( i=Imin; i<=Imax; i++ ) { + for( j=Jmin; j<=Jmax; j++ ) { + + m = idx(j,i); + + if( (Node(m, iD)*Node(m+NLat, iD)) != 0 ) + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH)-Node(m, iH)); + + if( (Node(m, iD)*Node(m+1, iD)) != 0 ) + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH)-Node(m, iH)); + + } + } + // open boundaries + if( Jmin <= 2 ) { + for( i=1; i<=(NLon-1); i++ ) { + m = idx(1,i); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + if( Imin <= 2 ) { + for( j=1; j<=NLat; j++ ) { + m = idx(j,1); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + if( Jmax >= (NLat-1) ) { + for( i=1; i<=(NLon-1); i++ ) { + m = idx(NLat,i); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + if( Imin <= 2 ) { + for( j=1; j<=(NLat-1); j++ ) { + m = idx(j,1); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + if( Jmin <= 2 ) { + for( i=1; i<=NLon; i++ ) { + m = idx(1,i); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + if( Imax >= (NLon-1) ) { + for( j=1; j<=(NLat-1); j++ ) { + m = idx(j,NLon); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + + // calculation area for the next step + if( Imin > 2 ) { + for( enlarge=0, j=Jmin; j<=Jmax; j++ ) { + if( fabs(Node(idx(j,Imin+2), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Imin--; if( Imin < 2 ) Imin = 2; } + } + if( Imax < (NLon-1) ) { + for( enlarge=0, j=Jmin; j<=Jmax; j++ ) { + if( fabs(Node(idx(j,Imax-2), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Imax++; if( Imax > (NLon-1) ) Imax = NLon-1; } + } + if( Jmin > 2 ) { + for( enlarge=0, i=Imin; i<=Imax; i++ ) { + if( fabs(Node(idx(Jmin+2,i), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Jmin--; if( Jmin < 2 ) Jmin = 2; } + } + if( Jmax < (NLat-1) ) { + for( enlarge=0, i=Imin; i<=Imax; i++ ) { + if( fabs(Node(idx(Jmax-2,i), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Jmax++; if( Jmax > (NLat-1) ) Jmax = NLat-1; } + } + +return 0; +} + + + +int ewStepCor( void ) +{ + int i,j,enlarge; + float absH,v1,v2; + int m; + + CNode& Node = *gNode; + + // sea floor topography (mass conservation) + #pragma omp parallel for default(shared) private(i,j,nod,absH) + for( i=Imin; i<=Imax; i++ ) { + for( j=Jmin; j<=Jmax; j++ ) { + + m = idx(j,i); + + if( Node(m, iD) == 0 ) continue; + + Node(m, iH) = Node(m, iH) - Node(m, iR1)*( Node(m, iM) - Node(m-NLat, iM) + Node(m, iN)*R6[j] - Node(m-1, iN)*R6[j-1] ); + + absH = fabs(Node(m, iH)); + + if( absH < Par.sshZeroThreshold ) Node(m, iH) = 0.; + + if( Node(m, iH) > Node(m, iHmax) ) Node(m, iHmax) = Node(m, iH); + + if( Par.sshArrivalThreshold && Node(m, iTime) < 0 && absH > Par.sshArrivalThreshold ) Node(m, iTime) = (float)Par.time; + + } + } + + // open bondary conditions + if( Jmin <= 2 ) { + for( i=2; i<=(NLon-1); i++ ) { + m = idx(1,i); + Node(m, iH) = sqrt( pow(Node(m, iN),2.) + 0.25*pow((Node(m, iM)+Node(m-NLat, iM)),2.) )*C1[i]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Imin <= 2 ) { + for( j=2; j<=(NLat-1); j++ ) { + m = idx(j,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + 0.25*pow((Node(m, iN)+Node(m-1, iN)),2.) )*C2[j]; + if( Node(m, iM) > 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Jmax >= (NLat-1) ) { + for( i=2; i<=(NLon-1); i++ ) { + m = idx(NLat,i); + Node(m, iH) = sqrt( pow(Node(m-1, iN),2.) + 0.25*pow((Node(m, iM)+Node(m-1, iM)),2.) )*C3[i]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Imax >= (NLon-1) ) { + for( j=2; j<=(NLat-1); j++ ) { + m = idx(j,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + 0.25*pow((Node(m, iN)+Node(m-1, iN)),2.) )*C4[j]; + if( Node(m-NLat, iM) < 0 ) Node(m, iH) = - Node(m, iH); + } + } + if( Jmin <= 2 ) { + m = idx(1,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + pow(Node(m, iN),2.) )*C1[1]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + m = idx(1,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + pow(Node(m, iN),2.) )*C1[NLon]; + if( Node(m, iN) > 0 ) Node(m, iH) = - Node(m, iH); + } + if( Jmin >= (NLat-1) ) { + m = idx(NLat,1); + Node(m, iH) = sqrt( pow(Node(m, iM),2.) + pow(Node(m-1, iN),2.) )*C3[1]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + m = idx(NLat,NLon); + Node(m, iH) = sqrt( pow(Node(m-NLat, iM),2.) + pow(Node(m-1, iN),2.) )*C3[NLon]; + if( Node(m-1, iN) < 0 ) Node(m, iH) = - Node(m, iH); + } + + // moment conservation + // longitudial flux update + #pragma omp parallel for default(shared) private(i,j,nod,v1,v2) + for( i=Imin; i<=Imax; i++ ) { + for( j=Jmin; j<=Jmax; j++ ) { + + m = idx(j,i); + + if( (Node(m, iD)*Node(m+NLat, iD)) == 0 ) continue; + + v1 = Node(m+NLat, iH) - Node(m, iH); + v2 = Node(m-1, iN) + Node(m, iN) + Node(m+NLat, iN) + Node(m+NLat-1, iN); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*v1 + Node(m, iR3)*v2; + } + } + // open boundaries + if( Jmin <= 2 ) { + for( i=1; i<=(NLon-1); i++ ) { + m = idx(1,i); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + if( Imin <= 2 ) { + for( j=1; j<=NLat; j++ ) { + m = idx(j,1); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + if( Jmax >= (NLat-1) ) { + for( i=1; i<=(NLon-1); i++ ) { + m = idx(NLat,i); + Node(m, iM) = Node(m, iM) - Node(m, iR2)*(Node(m+NLat, iH) - Node(m, iH)); + } + } + + // lattitudial flux update + #pragma omp parallel for default(shared) private(i,j,nod,v1,v2) + for( i=Imin; i<=Imax; i++ ) { + for( j=Jmin; j<=Jmax; j++ ) { + + m = idx(j,i); + + if( (Node(m, iD)*Node(m+1, iD)) == 0 ) continue; + + v1 = Node(m+1, iH) - Node(m, iH); + v2 = Node(m-NLat, iM) + Node(m, iM) + Node(m-NLat+1, iM) + Node(m+1, iM); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*v1 - Node(m, iR5)*v2; + } + } + // open boundaries + if( Imin <= 2 ) { + for( j=1; j<=(NLat-1); j++ ) { + m = idx(j,1); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + if( Jmin <= 2 ) { + for( i=1; i<=NLon; i++ ) { + m = idx(1,i); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + if( Imax >= (NLon-1) ) { + for( j=1; j<=(NLat-1); j++ ) { + m = idx(j,NLon); + Node(m, iN) = Node(m, iN) - Node(m, iR4)*(Node(m+1, iH) - Node(m, iH)); + } + } + + // calculation area for the next step + if( Imin > 2 ) { + for( enlarge=0, j=Jmin; j<=Jmax; j++ ) { + if( fabs(Node(idx(j,Imin+2), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Imin--; if( Imin < 2 ) Imin = 2; } + } + if( Imax < (NLon-1) ) { + for( enlarge=0, j=Jmin; j<=Jmax; j++ ) { + if( fabs(Node(idx(j,Imax-2), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Imax++; if( Imax > (NLon-1) ) Imax = NLon-1; } + } + if( Jmin > 2 ) { + for( enlarge=0, i=Imin; i<=Imax; i++ ) { + if( fabs(Node(idx(Jmin+2,i), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Jmin--; if( Jmin < 2 ) Jmin = 2; } + } + if( Jmax < (NLat-1) ) { + for( enlarge=0, i=Imin; i<=Imax; i++ ) { + if( fabs(Node(idx(Jmax-2,i), iH)) > Par.sshClipThreshold ) { enlarge = 1; break; } + } + if( enlarge ) { Jmax++; if( Jmax > (NLat-1) ) Jmax = NLat-1; } + } + +return 0; +} diff --git a/cpu/branches/web/okada.cpp b/cpu/branches/web/okada.cpp new file mode 100644 index 0000000000000000000000000000000000000000..0dfedb1982da9f8f020c726af9e1b668e3fde2a7 --- /dev/null +++ b/cpu/branches/web/okada.cpp @@ -0,0 +1,348 @@ +// Y. Okada (1985) Surface deformation due to shear and tensile faults in a half-space: +// Bull.Seism.Soc.Am., v.75, p.1135-1154. +// okada@bosai.go.jp + +#include +#define My_PI 3.14159265358979 +#define DISPLMAX 1000 + +double fun_Chinnery( double (*fun)(double ksi, double eta), double x, double y ); +double f_ssUx(double ksi, double eta); +double f_ssUy(double ksi, double eta); +double f_ssUz(double ksi, double eta); +double f_dsUx(double ksi, double eta); +double f_dsUy(double ksi, double eta); +double f_dsUz(double ksi, double eta); +double fun_R(double ksi, double eta); +double fun_X(double ksi, double eta); +double fun_yp(double ksi, double eta); +double fun_dp(double ksi, double eta); +double fun_I1(double ksi, double eta); +double fun_I2(double ksi, double eta); +double fun_I3(double ksi, double eta); +double fun_I4(double ksi, double eta); +double fun_I5(double ksi, double eta); + +static double sdip; +static double cdip; +static double p; +static double q; +static double width; +static double length; +static double elast; + + + +//============================================================================ +int okada( double L,double W,double D,double sinD,double cosD,double U1,double U2, + double x,double y, int flag_xy, double *Ux,double *Uy,double *Uz ) +{ + double U1x,U2x,U1y,U2y,U1z,U2z; + + sdip = sinD; + if( fabs(sdip)<1.e-10 ) sdip = 0; + cdip = cosD; + if( fabs(cdip)<1.e-10 ) cdip = 0; + p = y*cdip + D*sdip; + q = y*sdip - D*cdip; + width = W; + length = L; + elast = 0.5; // mu/(lambda+mu) + + + U1x=U2x=U1y=U2y=U1z=U2z=0; + + if( U1 != 0 ) { + if( flag_xy ) { + U1x = -U1/2/My_PI * fun_Chinnery( f_ssUx, x,y ); + if( fabs(U1x) > DISPLMAX ) U1x = 0; + U1y = -U1/2/My_PI * fun_Chinnery( f_ssUy, x,y ); + if( fabs(U1y) > DISPLMAX ) U1y = 0; + } + U1z = -U1/2/My_PI * fun_Chinnery( f_ssUz, x,y ); + if( fabs(U1z) > DISPLMAX ) U1z = 0; + } + + if( U2 != 0 ) { + if( flag_xy ) { + U2x = -U2/2/My_PI * fun_Chinnery( f_dsUx, x,y ); + if( fabs(U2x) > DISPLMAX ) U2x = 0; + U2y = -U2/2/My_PI * fun_Chinnery( f_dsUy, x,y ); + if( fabs(U2y) > DISPLMAX ) U2y = 0; + } + U2z = -U2/2/My_PI * fun_Chinnery( f_dsUz, x,y ); + if( fabs(U2z) > DISPLMAX ) U2z = 0; + } + + *Ux = U1x + U2x; + *Uy = U1y + U2y; + *Uz = U1z + U2z; + + return 0; +} + + +double fun_Chinnery( double (*fun)(double ksi, double eta), double x, double y ) +{ + double value; + + value = fun(x,p) - fun(x,p-width) - fun(x-length,p) + fun(x-length,p-width); + + return value; +} + + +double f_ssUx(double ksi, double eta) +{ + double val,R,I1,term2; + + R = fun_R(ksi,eta); + I1 = fun_I1(ksi,eta); + + if( q*R == 0 ) { + if( ksi*eta == 0 ) { + term2 = 0; + } else { + if( ksi*eta*q*R > 0 ) + term2 = My_PI; + else + term2 = -My_PI; + } + } else { + term2 = atan(ksi*eta/q/R); + } + + + val = ksi*q/R/(R+eta) + term2 + I1*sdip; + + return val; + +} + + +double f_ssUy(double ksi, double eta) +{ + double val,yp,R,I2; + + R = fun_R(ksi,eta); + I2 = fun_I2(ksi,eta); + yp = fun_yp(ksi,eta); + + + val = yp*q/R/(R+eta) + q*cdip/(R+eta) + I2*sdip; + + return val; + +} + + +double f_ssUz(double ksi, double eta) +{ + double val,dp,R,I4; + + R = fun_R(ksi,eta); + I4 = fun_I4(ksi,eta); + dp = fun_dp(ksi,eta); + + + val = dp*q/R/(R+eta) + q*sdip/(R+eta) + I4*sdip; + + return val; + +} + + +double f_dsUx(double ksi, double eta) +{ + double val,R,I3; + + R = fun_R(ksi,eta); + I3 = fun_I3(ksi,eta); + + + val = q/R - I3*sdip*cdip; + + return val; + +} + + +double f_dsUy(double ksi, double eta) +{ + double val,yp,R,I1,term2; + + R = fun_R(ksi,eta); + I1 = fun_I1(ksi,eta); + yp = fun_yp(ksi,eta); + + if( q*R == 0 ) { + if( ksi*eta == 0 ) { + term2 = 0; + } else { + if( ksi*eta*q*R > 0 ) + term2 = My_PI; + else + term2 = -My_PI; + } + } else { + term2 = atan(ksi*eta/q/R); + } + + val = yp*q/R/(R+ksi) + cdip*term2 - I1*sdip*cdip; + + return val; + +} + + +double f_dsUz(double ksi, double eta) +{ + double val,dp,R,I5,term2; + + R = fun_R(ksi,eta); + I5 = fun_I5(ksi,eta); + dp = fun_dp(ksi,eta); + + if( q*R == 0 ) { + if( ksi*eta == 0 ) { + term2 = 0; + } else { + if( ksi*eta*q*R > 0 ) + term2 = My_PI; + else + term2 = -My_PI; + } + } else { + term2 = atan(ksi*eta/q/R); + } + + val = dp*q/R/(R+ksi) + sdip*term2 - I5*sdip*cdip; + + return val; + +} + + +double fun_R( double ksi, double eta ) +{ + double val; + + val = sqrt(ksi*ksi + eta*eta + q*q); + + return val; +} + + +double fun_X( double ksi, double eta ) +{ + double val; + + val = sqrt(ksi*ksi + q*q); + + return val; +} + + +double fun_dp( double ksi, double eta ) +{ + double val; + + val = eta*sdip - q*cdip; + + return val; +} + + +double fun_yp( double ksi, double eta ) +{ + double val; + + val = eta*cdip + q*sdip; + + return val; +} + + +double fun_I1( double ksi, double eta ) +{ + double val,R,dp,I5; + + R = fun_R(ksi,eta); + dp = fun_dp(ksi,eta); + I5 = fun_I5(ksi,eta); + + if( cdip != 0 ) + val = elast*(-1./cdip*ksi/(R+dp)) - sdip/cdip*I5; + else + val = -elast/2*ksi*q/(R+dp)/(R+dp); + + return val; +} + + +double fun_I2( double ksi, double eta ) +{ + double val,R,I3; + + R = fun_R(ksi,eta); + I3 = fun_I3(ksi,eta); + + val = elast*(-log(R+eta)) - I3; + + return val; +} + + +double fun_I3( double ksi, double eta ) +{ + double val,R,yp,dp,I4; + + R = fun_R(ksi,eta); + yp = fun_yp(ksi,eta); + dp = fun_dp(ksi,eta); + I4 = fun_I4(ksi,eta); + + if( cdip != 0 ) + val = elast*(1./cdip*yp/(R+dp) - log(R+eta)) + sdip/cdip*I4; + else + val = elast/2*(eta/(R+dp) + yp*q/(R+dp)/(R+dp) - log(R+eta)); + + return val; +} + + +double fun_I4( double ksi, double eta ) +{ + double val,R,dp; + + R = fun_R(ksi,eta); + dp = fun_dp(ksi,eta); + + if( cdip != 0 ) + val = elast/cdip*(log(R+dp)-sdip*log(R+eta)); + else + val = -elast*q/(R+dp); + + return val; +} + + +double fun_I5( double ksi, double eta ) +{ + double val,dp,X,R; + + if( ksi == 0 ) + return (double)0; + + + R = fun_R(ksi,eta); + X = fun_X(ksi,eta); + dp = fun_dp(ksi,eta); + + if( cdip != 0 ) + val = elast*2/cdip * atan( ( eta*(X+q*cdip)+X*(R+X)*sdip ) / (ksi*(R+X)*cdip) ); + else + val = -elast*ksi*sdip/(R+dp); + + return val; +} diff --git a/cpu/branches/web/utilits.cpp b/cpu/branches/web/utilits.cpp new file mode 100644 index 0000000000000000000000000000000000000000..f3421159739f95d583548540fbf46776218cca11 --- /dev/null +++ b/cpu/branches/web/utilits.cpp @@ -0,0 +1,1039 @@ +// last modified 11.07.2012 + +#include +#include +#include +#include +#include +#include +#include +#include + +#define ERRORFILE "error.msg" + +int iscomment( int ); + + +cMsg Msg; +cErrMsg Err; +cLog Log; + +// ========================== Class Message ======================= +cMsg::cMsg() +{ + enabled = 1; + setchannel(MSG_OUTSCRN); + setfilename( "default.msg" ); +} + +cMsg::~cMsg() +{ + +} + +void cMsg::enable() +{ + enabled = 1; +} + +void cMsg::disable() +{ + enabled = 0; +} + +void cMsg::setfilename( const char* newfilename ) +{ + sprintf( fname, "%s", newfilename ); +} + +void cMsg::setchannel( int newchannel ) +{ + channel = newchannel; +} + +int cMsg::print( const char* fmt, ... ) +{ + if(!enabled) return 0; + + if( channel & MSG_OUTSCRN ) { + va_list arglst; + va_start(arglst, fmt); + vprintf( fmt, arglst ); + va_end(arglst); + } + + if( channel & MSG_OUTFILE ) { + va_list arglst; + va_start(arglst, fmt); + FILE *fp = fopen( fname, "at" ); + vfprintf( fp, fmt, arglst ); + fclose( fp ); + va_end(arglst); + } + + return 0; +} +// =================== END Class Message ================= + + +// ========================== Class Error Message ======================= +cErrMsg::cErrMsg() +{ + enabled = 1; + setfilename( "error.msg" ); + setchannel(MSG_OUTSCRN|MSG_OUTFILE); +} + +cErrMsg::~cErrMsg() +{ + +} + +int cErrMsg::post( const char* fmt, ... ) +{ + if(!enabled) return 0; + + if( channel & MSG_OUTSCRN ) { + va_list arglst; + va_start(arglst, fmt); + vprintf( fmt, arglst ); + va_end(arglst); + printf( "\n" ); + } + + if( channel & MSG_OUTFILE ) { + va_list arglst; + va_start(arglst, fmt); + FILE *fp = fopen( fname, "at" ); + vfprintf( fp, fmt, arglst ); + fprintf( fp, "\n" ); + fclose( fp ); + va_end(arglst); + } + + return -1; +} + +char* cErrMsg::msgAllocateMem( void ) +{ + char msg[256]; + + sprintf( msg, "Error allocating memory" ); + + return strdup(msg); +} + +char* cErrMsg::msgOpenFile( const char *fname ) +{ + char msg[256]; + + sprintf( msg, "Cannot open file %s", fname ); + + return strdup(msg); +} + +char* cErrMsg::msgReadFile( const char *fname, int line, const char *expected ) +{ + char msg[256]; + + sprintf( msg, "Error reading file %s, line number %-d. Expected: %s", fname, line, expected ); + + return strdup(msg); +} +// =================== END Class Message ================= + + +// ======================= Class Log ======================= +cLog::cLog() +{ + enabled = 0; + timestamp_enabled = 1; + setfilename( "default.log" ); + setchannel(MSG_OUTFILE); +} + +cLog::~cLog() +{ + +} + +void cLog::start( const char* filename ) +{ + int ifenabled=0; + + setfilename( filename ); + enabled = 1; + if( timestamp_enabled ) { + ifenabled = 1; + timestamp_disable(); + } + print(" ============= Starting new log at %s", utlCurrentTime() ); + if( ifenabled ) timestamp_enable(); +} + +void cLog::timestamp_enable() +{ + timestamp_enabled = 1; +} + +void cLog::timestamp_disable() +{ + timestamp_enabled = 0; +} + +int cLog::print( const char* fmt, ... ) +{ + if(!enabled) return 0; + + va_list arglst; + va_start(arglst, fmt); + + FILE *fp = fopen( fname, "at" ); + if( timestamp_enabled )fprintf( fp, "%s --> ", utlCurrentTime() ); + vfprintf( fp, fmt, arglst ); + fprintf( fp, "\n" ); + fclose( fp ); + + va_end(arglst); + + return 0; +} +// =================== END Class Log ================= + + + +char *utlCurrentTime() +{ + time_t timer; + char *cp; + + timer = time(NULL); + cp = asctime(localtime(&timer)); + cp[strlen(cp)-1] = '\0'; + + return cp; +} + + +int utlPostError( const char *message ) +{ + FILE *fp = fopen( ERRORFILE, "at" ); + fprintf( fp, "%s", utlCurrentTime() ); + fprintf( fp, " -> %s\n", message ); + fclose( fp ); + + return -1; +} + + +char *utlErrMsgMemory() +{ + return strdup("Cannot allocate memory"); +} + + +char *utlErrMsgOpenFile( const char *fname ) +{ + char msg[256]; + + sprintf( msg, "Cannot open file %s", fname ); + + return strdup(msg); +} + + +char *utlErrMsgEndOfFile( const char *fname ) +{ + char msg[256]; + + sprintf( msg, "Unexpected end of file: %s", fname ); + + return strdup(msg); +} + + +char *utlErrMsgReadFile( const char *fname, int line, const char *expected ) +{ + char msg[256]; + + sprintf( msg, "Error reading file %s, line number %-d. Expected: %s", fname, line, expected ); + + return strdup(msg); +} + + + +/*** Command-line processing ***/ + +int utlCheckCommandLineOption( int argc, char **argv, const char *option, int letters_to_compare ) +{ + int k; + + + for( k=1; k (string+strlen(string)) ) { word[0] = '\0'; return NULL; } + + for( cp = pos; isspace(*cp); cp++ ) ; + if( *cp == '\0' ) { word[0] = '\0'; return NULL; } + sscanf( cp, "%s", word ); + cp += strlen(word); + + return cp; +} + + +int utlCountWordsInString( char *line ) +// Count words in a string +{ + int nwords=0; + char *cp=line; + + while( 1 ) + { + while( isspace( *cp ) ) cp++; + if( *cp == '\0' || *cp == ';' ) return nwords; + nwords++; + while( !isspace( *cp ) && *cp != '\0' ) cp++; + } + +} + + +/***************************************************************************/ +/* Write sequance of words into a string */ +/***************************************************************************/ +char *utlWords2String( int nwords, char **word ) +{ + char *buf; + int k,lenstr; + + + for( lenstr=0,k=0; k < nwords; k++ ) + lenstr += strlen( word[k] ); + + lenstr += (nwords + 1); // add separators plus final null character + + buf = new char[lenstr]; + + memset( buf, 0, lenstr ); + + for( k=0; k < nwords; k++ ) { + if( k>0 ) strcat( buf, " " ); + strcat( buf, word[k] ); + } + + return buf; +} + + +int utlSubString( char *str, int p1, int p2, char *substr ) +// get substring from p1 to p2 into a buffer substr +{ + char *cp; + int k; + + + if( p1<0 || p2<0 || p1>p2 || (unsigned)p2 > (strlen(str)-1) ) return -1; + + for( k=0,cp = &str[p1]; cp <= &str[p2]; cp++ ) + substr[k++] = *cp; + substr[k] = '\0'; + + return 0; +} + + +void utlPrintToString( char *string, int position, char *insert ) +// Prints string to another starting from given position +{ + char *cp; + int i; + + for( i = 0; i < position; i++ ) + if( string[i] == '\0' ) string[i] = ' '; + for( cp = insert, i = 0; *cp != '\0'; i++, cp++ ) + string[position+i] = *cp; + +} + + + +/***************************************************************************/ +/*** ***/ +/*** F I L E H A N D L I N G ***/ +/*** ***/ +/***************************************************************************/ + +int iscomment( int ch ) +{ + if( ch == ';' || ch == '!' ) + return 1; + else + return 0; +} + + +int utlFindNextInputField( FILE *fp ) +// Search for next input field in a file skipping blanks, tabs, empty lines, comments (from ';' to EOL) +// Returns number of lines scanned until input field found +{ + int ch; + int lines = 0; + +L1: while( isspace( (ch=fgetc(fp)) ) ) + if( ch == '\n' ) lines++; + + if( iscomment(ch) ) + { + while( ( ch = fgetc( fp ) ) != '\n' && ch != EOF ) ; + if( ch == '\n' ) + { + lines++; + goto L1; + } + else if( ch == EOF ) + return EOF; + } + else if( ch == EOF ) + return EOF; + + ungetc( ch, fp ); + + return( lines ); +} + + +int utlReadNextRecord( FILE *fp, char *record, int *line ) +{ + int found = 0; + char *cp, firstchar; + + while( !found && fgets( record, MaxFileRecordLength, fp ) ) + { + for( cp = record; *cp == ' ' || *cp == '\t'; cp++ ) + ; + if( *cp != '\n' && *cp != '\r' && *cp != ';' ) + found = 1; + (*line)++; + } + + if( !found ) + return EOF; + else + { + firstchar = *cp; + while( *cp != '\n' && *cp != '\r' && *cp != '\0' ) + cp++; + *cp = '\0'; + return( firstchar ); + } +} + + +int utlReadNextDataRecord( FILE *fp, char *record, int *line ) +{ + int found = 0; + char *cp, firstchar; + + while( !found && fgets( record, MaxFileRecordLength, fp ) ) + { + for( cp = record; *cp == ' ' || *cp == '\t'; cp++ ) + ; + if( isdigit(*cp) || (*cp=='-' && isdigit(*(cp+1))) ) + found = 1; + (*line)++; + } + + if( !found ) + return EOF; + else + { + firstchar = *cp; + while( *cp != '\n' && *cp != '\r' && *cp != '\0' ) + cp++; + *cp = '\0'; + return( firstchar ); + } +} + + +char *utlFileFindRecord( char *fname, char *pattern, char *record ) +{ + char *cp; + int line; + FILE *fp; + + + if( (fp = fopen(fname,"rt")) == NULL ) { utlPostError( utlErrMsgOpenFile(fname) ); return NULL; } + + line = 0; + cp = NULL; + while( utlReadNextRecord( fp, record, &line ) != EOF ) { + + if( (cp = strstr(record, pattern) ) == NULL ) continue; + + break; + } + + fclose(fp); + + return cp; +} + + +int utlFileParceToString( FILE *fp, char *pattern ) +{ + int ch,pos=0,ierr=1; + + + while( (ch=fgetc(fp)) != EOF ) { + + if( ch != pattern[pos] ) { + pos = 0; + continue; + } + + pos++; + + if( pattern[pos] == '\0' ) { + ierr = 0; + break; + } + + } + + return ierr; +} + + +int utlGetNumberOfRecords( const char *fname ) +{ + FILE *fp; + int nrec,line=0; + char record[1024]; + + + fp = fopen( fname, "rt" ); + if( fp == NULL ) return 0; + + nrec = 0; + while( utlReadNextRecord( fp, record, &line ) != EOF ) + nrec++; + + fclose( fp ); + + return nrec; +} + + +int utlGetNumberOfDataRecords( const char *fname ) +{ + FILE *fp; + int nrec,line=0; + char record[1024]; + + + fp = fopen( fname, "rt" ); + if( fp == NULL ) return 0; + + nrec = 0; + while( utlReadNextDataRecord( fp, record, &line ) != EOF ) + nrec++; + + fclose( fp ); + + return nrec; +} + + +int utlFileReadOption( char *fname, char *option_name, char *contents ) +{ + FILE *fp; + int found,line; + char record[1024]; + + + fp = fopen( fname, "rt" ); + if( !fp ) return 0; + + for( line=0, found=0; utlReadNextRecord( fp, record, &line ) != EOF; ) { + if( utlStringReadOption( record, option_name, contents ) == 0 ) { + found = 1; + break; + } + } + fclose( fp ); + + if( found ) + return 0; + else + return -1; +} + + +char *utlFileChangeExtension( const char *fname, const char *newext ) +// Change file extension. Return new file name +{ + char buf[256], *cp; + int len,extlen; + + len = strlen(fname); + extlen = strlen(newext); + memset( buf, 0, len+extlen+1 ); + strcpy( buf, fname ); + + for( cp=&buf[len-1]; *cp!='.' && cp>=&buf[0]; cp-- ) ; + + if( *cp=='.' ) + sprintf( cp+1, "%s", newext ); + else + sprintf( &buf[len], ".%s", newext ); + + return strdup( buf ); +} + + +char *utlFileAddExtension( const char *fname, const char *addext ) +// Add new extension. Return new file name +{ + char buf[256], *cp; + int len,extlen; + + len = strlen(fname); + extlen = strlen(addext); + memset( buf, 0, len+extlen+1 ); + sprintf( buf, "%s.%s", fname, addext ); + + return strdup( buf ); +} + + +char *utlFileRemoveExtension( const char *fname ) +// Remove file extension. Return file name without extension +{ + char buf[256], *cp; + + strcpy( buf, fname ); + for( cp=&buf[strlen(fname)-1]; *cp!='.' && cp>=&buf[0]; cp-- ) ; + if( *cp=='.' ) *cp = '\0'; + + return strdup( buf ); +} + + +int utlFileRemoveExtension( char *newname, const char *fname ) +// Remove file extension +{ + char *cp; + + strcpy( newname, fname ); + for( cp=&newname[strlen(newname)-1]; *cp!='.' && cp>=&newname[0]; cp-- ) ; + if( *cp=='.' ) *cp = '\0'; + + return 0; +} + + +int utlReadXYdata( char *fname, double **x, double **y ) +// Reads XY ASCII data file. Returns number of data points read +{ + FILE *fp; + char record[256]; + int n,line=0; + double xv,yv; + + + fp = fopen( fname, "rt" ); + if( fp == NULL ) + return utlPostError( utlErrMsgOpenFile(fname) ); + + // Calculate number of data lines + for( n=0; utlReadNextDataRecord( fp, record, &line ) != EOF; ) { + if( sscanf( record, "%lf %lf", &xv, &yv ) != 2 ) + return utlPostError( utlErrMsgReadFile( fname, line, "X Y" ) ); + n++; + } + + // Allocate memory + *x = new double [n]; + *y = new double [n]; + + // Read data + rewind( fp ); + line=0; + for( int k=0; k | +// x: initial seed for random number generator (integer) | +// | +// | +// ru: normal random number | +// x: seed for next call (integer) | +// | +// --------------------------------------------------------+ +{ + + seed = seed * 65539; + if( seed < 0 ) seed = (seed + 2147483647) + 1; + + return( (double)seed * 0.4656613e-9 ); +} + + +double utlRandom( double avg, double err ) +// return value which randomly deviates from avg: value = avg +-err +// be sure to call srand() before calling this function +{ + return( avg + ( (double)rand()/RAND_MAX-0.5 )*2*err ); +} + +double utlNormal( int& seed, double avg, double stdev ) +//======================================================== +// NORMAL RANDOM NUMBER GENERATOR (BOX-MULLER METHOD) | +// | +// | +// x: initial seed for random number generator | +// av: average valuec st: standard deviation | +// | +// | +// rn: normal random numberc | +// | +// --------------------------------------------------------+ +{ + double r1, r2, w1, w2; + + r1 = utlRandom( seed ); + r2 = utlRandom( seed ); + w1 = log10(r1); + w1 = sqrt(-2*w1); + w2 = sin(2*My_PI * r2); + + return ( avg + stdev*w1*w2 ); +} + + +int utlPointInPolygon( int nvrtx, double *xvrtx, double *yvrtx, double x, double y ) +// Check if point lies inside polygon +{ + int l,ln,nsec; + double yside; + + + for( nsec=0, l=0; l= 0 ) + continue; + + yside = yvrtx[l] + (yvrtx[ln]-yvrtx[l])/(xvrtx[ln]-xvrtx[l])*(x-xvrtx[l]); + + if( yside > y ) + nsec++; + } + + if( (nsec/2)*2 != nsec ) + return 1; + else + return 0; + +} + + + +/****************************************************************/ +/*** ***/ +/*** O T H E R S ***/ +/*** ***/ +/****************************************************************/ + +void utlTimeSplit( double ctime, int& nHour, int& nMin, int& nSec, int& nMsec ) +// ctime is time in seconds with milliseconds as fraction +{ + int fullSec; + + fullSec = (int)ctime; + + nMsec = (int)((ctime - fullSec)*1000 + 0.1); + nHour = fullSec/3600; + nMin = (fullSec - nHour*3600)/60; + nSec = fullSec - nHour*3600 - nMin*60; + + return; +} + + +char *utlTimeSplitString( double ctime ) +{ + int nHour,nMin,nSec,nMsec; + static char buf[32]; + + utlTimeSplit( ctime, nHour, nMin, nSec, nMsec ); + + if( nMsec > 0 ) + sprintf( buf, "%2.2d:%2.2d:%2.2d.%3.3d", nHour,nMin,nSec,nMsec ); + else + sprintf( buf, "%2.2d:%2.2d:%2.2d", nHour,nMin,nSec ); + + return buf; +} +char *utlTimeSplitString( int ctime ) +{ + int nHour,nMin,nSec,nMsec; + static char buf[32]; + + utlTimeSplit( (double)ctime, nHour, nMin, nSec, nMsec ); + + sprintf( buf, "%2.2d:%2.2d:%2.2d", nHour,nMin,nSec ); + + return buf; +} + + +int utlTimeHour( double ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( ctime, nHour, nMin, nSec, nMsec ); + + return nHour; +} +int utlTimeHour( int ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( (double)ctime, nHour, nMin, nSec, nMsec ); + + return nHour; +} + + +int utlTimeMin( double ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( ctime, nHour, nMin, nSec, nMsec ); + + return nMin; +} +int utlTimeMin( int ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( (double)ctime, nHour, nMin, nSec, nMsec ); + + return nMin; +} + + +int utlTimeSec( double ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( ctime, nHour, nMin, nSec, nMsec ); + + return nSec; +} +int utlTimeSec( int ctime ) +{ + int nHour,nMin,nSec,nMsec; + + utlTimeSplit( (double)ctime, nHour, nMin, nSec, nMsec ); + + return nSec; +} diff --git a/cpu/branches/web/utilits.h b/cpu/branches/web/utilits.h new file mode 100644 index 0000000000000000000000000000000000000000..423a310c2b0ef7feb5de7df27b4277ac758dfbfe --- /dev/null +++ b/cpu/branches/web/utilits.h @@ -0,0 +1,132 @@ +// last modified 11.07.2012 +#ifndef UTIL_H +#define UTIL_H + +#include + +#define MaxFileRecordLength 16384 +#define RealMax 1.e+30 +#define RealMin 1.e-30 + +#define My_PI 3.14159265358979 +#define g2r(x) (((double)(x))*My_PI/180) +#define r2g(x) (((double)(x))/My_PI*180) +#define cosdeg(x) cos(g2r(x)) +#define sindeg(x) sin(g2r(x)) +#define tandeg(x) tan(g2r(x)) + +#define My_max(a,b) (((a) > (b)) ? (a) : (b)) +#define My_min(a,b) (((a) < (b)) ? (a) : (b)) + +// Class Message +#define MSG_OUTSCRN 1 +#define MSG_OUTFILE 2 +class cMsg +{ +protected: + int enabled; + int channel; + char fname[64]; + +public: + cMsg(); + ~cMsg(); + void enable(); + void disable(); + void setchannel( int newchannel ); + void setfilename( const char* newfilename ); + int print( const char* fmt, ... ); + }; +extern cMsg Msg; + + +// Class Error message +class cErrMsg : public cMsg +{ + +public: + cErrMsg(); + ~cErrMsg(); + int post( const char* fmt, ... ); + char* msgAllocateMem(); + char* msgOpenFile( const char *fname ); + char* msgReadFile( const char *fname, int line, const char *expected ); + }; +extern cErrMsg Err; + + +// Class Log +class cLog : public cMsg +{ +private: + int timestamp_enabled; + +public: + cLog(); + ~cLog(); + void start( const char* filename ); + void timestamp_enable(); + void timestamp_disable(); + int print( const char* fmt, ... ); + }; +extern cLog Log; + + +/*** Error messages and logging ***/ +char *utlCurrentTime(); + +// Following error functions are outdated. Use global Err object instead. +int utlPostError( const char* message ); +char *utlErrMsgMemory(); +char *utlErrMsgOpenFile( const char *fname ); +char *utlErrMsgEndOfFile( const char *fname ); +char *utlErrMsgReadFile( const char *fname, int line, const char *expected ); + +/*** Command-line processing ***/ +int utlCheckCommandLineOption( int argc, char **argv, const char *option, int letters_to_compare ); + +/*** String handling ***/ +int utlStringReadOption( char *record, char *option_name, char *contents ); +int utlReadSequenceOfInt( char *line, int *value ); +int utlReadSequenceOfDouble( char *line, double *value ); +int utlPickUpWordFromString( char *string, int n, char *word ); +char *utlPickUpWordFromString( char *string, int n ); +char *utlPickUpWordFromString( char *string, char *pos, char *word ); +int utlCountWordsInString( char *record ); +char *utlWords2String( int nwords, char **word ); +int utlSubString( char *logstr, int p1, int p2, char *substr ); +void utlPrintToString( char *string, int position, char *insert ); + +/*** File handling ***/ +int utlFindNextInputField( FILE *fp ); +int utlReadNextRecord( FILE *fp, char *record, int *line ); +int utlReadNextDataRecord( FILE *fp, char *record, int *line ); +char *utlFileFindRecord( char *fname, char *pattern, char *record ); +int utlFileParceToString( FILE *fp, char *pattern ); +int utlGetNumberOfRecords( const char *fname ); +int utlGetNumberOfDataRecords( const char *fname ); +int utlFileReadOption( char *fname, char *option_name, char *contents ); +char *utlFileChangeExtension( const char *fname, const char *newext ); +char *utlFileRemoveExtension( const char *fname ); +int utlFileRemoveExtension( char *newname, const char *fname ); +char *utlFileAddExtension( const char *fname, const char *addext ); +int utlReadXYdata( char *fname, double **x, double **y ); + +/*** MATH & GEOMETRY ***/ +double utlRandom( int& seed ); +double utlRandom( double avg, double err ); +double utlNormal( int& seed, double avg, double stdev ); +int utlPointInPolygon( int nvrtx, double *xvrtx, double *yvrtx, double x, double y ); + +/*** O T H E R S ***/ +void utlTimeSplit( double ctime, int& nHour, int& nMin, int& nSec, int& nMsec ); +char *utlTimeSplitString( double ctime ); +char *utlTimeSplitString( int ctime ); +int utlTimeHour( double timesec ); +int utlTimeHour( int timesec ); +int utlTimeMin( double timesec ); +int utlTimeMin( int timesec ); +int utlTimeSec( double timesec ); +int utlTimeSec( int timesec ); + +#endif // UTIL_H