Commit 589c5644 authored by Hannes Fuchs's avatar Hannes Fuchs

Moved svn branches to git branches, moved trunk to master

parent 36782b77
/*
* EasyWave - A realtime tsunami simulation program with GPU support.
* Copyright (C) 2014 Andrey Babeyko, Johannes Spazier
* GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de)
*
* Parts of this program (especially the GPU extension) were developed
* within the context of the following publicly funded project:
* - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723
* (http://www.tridec-online.eu)
*
* Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
* the European Commission - subsequent versions of the EUPL (the "Licence"),
* complemented with the following provision: For the scientific transparency
* and verification of results obtained and communicated to the public after
* using a modified version of the work, You (as the recipient of the source
* code and author of this modified version, used to produce the published
* results in scientific communications) commit to make this modified source
* code available in a repository that is easily and freely accessible for a
* duration of five years after the communication of the obtained results.
*
* You may not use this work except in compliance with the Licence.
*
* You may obtain a copy of the Licence at:
* https://joinup.ec.europa.eu/software/page/eupl
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Licence is distributed on an "AS IS" basis,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Licence for the specific language governing permissions and
* limitations under the Licence.
*/
#define HEADER "\neasyWave ver.2013-04-11\n"
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
#include <utilits.h>
#include "easywave.h"
#include "ewGpuNode.cuh"
CNode *gNode;
double diff(timespec start, timespec end) {
timespec temp;
if ((end.tv_nsec-start.tv_nsec)<0) {
temp.tv_sec = end.tv_sec-start.tv_sec-1;
temp.tv_nsec = 1000000000+end.tv_nsec-start.tv_nsec;
} else {
temp.tv_sec = end.tv_sec-start.tv_sec;
temp.tv_nsec = end.tv_nsec-start.tv_nsec;
}
return (double)((double)temp.tv_nsec / 1000000000.0 + (double)temp.tv_sec);
}
int commandLineHelp( void );
int main( int argc, char **argv )
{
char buf[1024];
int ierr,argn,elapsed;
int lastProgress,lastPropagation,lastDump;
int loop;
printf(HEADER);
Err.setchannel(MSG_OUTFILE);
// Read parameters from command line and use default
ierr = ewParam( argc, argv ); if(ierr) return commandLineHelp();
// Log command line
/* FIXME: buffer overflow */
sprintf( buf, "Command line: " );
for( argn=1; argn<argc; argn++ ) {
strcat( buf, " " );
strcat( buf, argv[argn] );
}
Log.print( "%s", buf );
if( Par.gpu ) {
gNode = new CGpuNode();
} else {
gNode = new CStructNode();
//gNode = new CArrayNode();
}
CNode& Node = *gNode;
// Read bathymetry
ierr = ewLoadBathymetry(); if(ierr) return ierr;
// Read points of interest
ierr = ewLoadPOIs(); if(ierr) return ierr;
// Init tsunami with faults or uplift-grid
ierr = ewSource(); if(ierr) return ierr;
Log.print( "Read source from %s", Par.fileSource );
// Write model parameters into the log
ewLogParams();
if( Par.outPropagation ) ewStart2DOutput();
Node.copyToGPU();
// Main loop
Log.print("Starting main loop...");
printf("Starting main loop %ld msec\n", clock()/(CLOCKS_PER_SEC/1000) );
timespec start, end;
clock_gettime(CLOCK_MONOTONIC, &start);
for( Par.time=0,loop=1,lastProgress=Par.outProgress,lastPropagation=Par.outPropagation,lastDump=0;
Par.time<=Par.timeMax; loop++,Par.time+=Par.dt,lastProgress+=Par.dt,lastPropagation+=Par.dt ) {
/* FIXME: check if Par.poiDt can be used for those purposes */
if( Par.filePOIs && Par.poiDt && ((Par.time/Par.poiDt)*Par.poiDt == Par.time) ) {
Node.copyPOIs();
ewSavePOIs();
}
Node.run();
elapsed = ((int)clock())/CLOCKS_PER_SEC;
if( Par.outProgress ) {
if( lastProgress >= Par.outProgress ) {
printf( "Model time = %s, elapsed: %ld msec\n", utlTimeSplitString(Par.time), clock()/(CLOCKS_PER_SEC/1000) );
Log.print( "Model time = %s, elapsed: %ld msec", utlTimeSplitString(Par.time), clock()/(CLOCKS_PER_SEC/1000) );
lastProgress = 0;
}
}
if( Par.outPropagation ) {
if( lastPropagation >= Par.outPropagation ) {
Node.copyIntermediate();
ewOut2D();
lastPropagation = 0;
}
}
fflush(stdout);
if( Par.outDump ) {
if( (elapsed-lastDump) >= Par.outDump ) {
Node.copyIntermediate();
/* FIXME: needs tArr as well */
ewDumpPOIs();
ewDump2D();
lastDump = elapsed;
}
}
} // main loop
clock_gettime(CLOCK_MONOTONIC, &end);
Log.print("Finishing main loop");
/* TODO: check if theses calls can be combined */
Node.copyIntermediate();
Node.copyFromGPU();
// Final output
Log.print("Final dump...");
ewDumpPOIs();
ewDump2D();
Node.freeMem();
printf_v("Runtime: %.3lf\n", diff(start, end) * 1000.0);
delete gNode;
return 0;
}
//========================================================================
int commandLineHelp( void )
{
printf( "Usage: easyWave -grid ... -source ... -time ... [optional parameters]\n" );
printf( "-grid ... bathymetry in GoldenSoftware(C) GRD format (text or binary)\n" );
printf( "-source ... input wave either als GRD-file or file with Okada faults\n" );
printf( "-time ... simulation time in [min]\n" );
printf( "Optional parameters:\n" );
printf( "-step ... simulation time step, default- estimated from bathymetry\n" );
printf( "-coriolis use Coriolis fource, default- no\n" );
printf( "-poi ... POIs file\n" );
printf( "-label ... model name, default- 'eWave'\n" );
printf( "-progress ... show simulation progress each ... minutes, default- 10\n" );
printf( "-propagation ... write wave propagation grid each ... minutes, default- 5\n" );
printf( "-dump ... make solution dump each ... physical seconds, default- 0\n" );
printf( "-nolog deactivate logging\n" );
printf( "-poi_dt_out ... output time step for mariograms in [sec], default- 30\n" );
printf( "-poi_search_dist ... in [km], default- 10\n" );
printf( "-poi_min_depth ... in [m], default- 1\n" );
printf( "-poi_max_depth ... in [m], default- 10 000\n" );
printf( "-poi_report enable POIs loading report, default- disabled\n" );
printf( "-ssh0_rel ... relative threshold for initial wave, default- 0.01\n" );
printf( "-ssh0_abs ... absolute threshold for initial wave in [m], default- 0\n" );
printf( "-ssh_arrival ... threshold for arrival times in [m], default- 0.001\n" );
printf( " negative value considered as relative threshold\n" );
printf( "-gpu start GPU version of EasyWave (requires a CUDA capable device)\n" );
printf( "-verbose generate verbose output on stdout\n" );
printf( "\nExample:\n" );
printf( "\t easyWave -grid gebcoIndonesia.grd -source fault.inp -time 120\n\n" );
return -1;
}
-include ../../../Makefile.inc
CC=g++
NVCC=nvcc
CFLAGS=-O3 -I.
ARCH?=compute_20
CODE?=sm_21
NVFLAGS=$(CFLAGS) -gencode arch=$(ARCH),code=$(CODE)
CPPS=$(wildcard *.cpp)
CUS=$(wildcard *.cu)
CU_OBJS=$(patsubst %.cu,%.o,$(CUS) )
OBJECTS=$(patsubst %.cpp, %.o, $(CPPS) ) $(patsubst %.cu,%.o,$(CUS) )
all: EasyWave
EasyWave: $(OBJECTS) link.o
$(NVCC) -o $@ $^
%.o: %.cpp *.h
$(CC) -c $(CFLAGS) -o $@ $<
%.o: %.cu *.cuh *.h
$(NVCC) -dc $(NVFLAGS) -x cu -o $@ $<
link.o: $(CU_OBJS)
$(NVCC) -dlink $(NVFLAGS) -o $@ $^
clean:
rm -f EasyWave *.o
CC=g++
NVCC=nvcc
CFLAGS=-O3 -I. -g
CODE=sm_35
ARCH=compute_35
NVFLAGS=$(CFLAGS) -gencode arch=$(ARCH),code=$(CODE)
CPPS=$(wildcard *.cpp)
CUS=$(wildcard *.cu)
CU_OBJS=$(patsubst %.cu,%.o,$(CUS) )
OBJECTS=$(patsubst %.cpp, %.o, $(CPPS) ) $(patsubst %.cu,%.o,$(CUS) )
all: EasyWave
EasyWave: $(OBJECTS) link.o
$(NVCC) -g -G -o $@ $^
%.o: %.cpp *.h
$(CC) -c $(CFLAGS) -o $@ $<
%.o: %.cu *.cuh *.h
$(NVCC) -G -dc $(NVFLAGS) -x cu -o $@ $<
link.o: $(CU_OBJS)
$(NVCC) -G -dlink $(NVFLAGS) -o $@ $^
clean:
rm -f EasyWave *.o
/*
* EasyWave - A realtime tsunami simulation program with GPU support.
* Copyright (C) 2014 Andrey Babeyko, Johannes Spazier
* GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de)
*
* Parts of this program (especially the GPU extension) were developed
* within the context of the following publicly funded project:
* - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723
* (http://www.tridec-online.eu)
*
* Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
* the European Commission - subsequent versions of the EUPL (the "Licence"),
* complemented with the following provision: For the scientific transparency
* and verification of results obtained and communicated to the public after
* using a modified version of the work, You (as the recipient of the source
* code and author of this modified version, used to produce the published
* results in scientific communications) commit to make this modified source
* code available in a repository that is easily and freely accessible for a
* duration of five years after the communication of the obtained results.
*
* You may not use this work except in compliance with the Licence.
*
* You may obtain a copy of the Licence at:
* https://joinup.ec.europa.eu/software/page/eupl
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Licence is distributed on an "AS IS" basis,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Licence for the specific language governing permissions and
* limitations under the Licence.
*/
#include "ewGpuNode.cuh"
#include "ewCudaKernels.cuh"
__global__ void runWaveUpdateKernel( KernelData data ) {
Params& dp = data.params;
int i = blockIdx.y * blockDim.y + threadIdx.y + dp.iMin;
int j = blockIdx.x * blockDim.x + threadIdx.x + dp.jMin;
int ij = data.idx(i,j);
float absH;
/* maybe unnecessary if controlled from outside */
if( i <= dp.iMax && j <= dp.jMax && data.d[ij] != 0 ) {
float hh = data.h[ij] - data.cR1[ij] * ( data.fM[ij] - data.fM[data.le(ij)] + data.fN[ij] * data.cR6[j] - data.fN[data.dn(ij)]*data.cR6[j-1] );
absH = fabs(hh);
if( absH < dp.sshZeroThreshold ) {
hh = 0.f;
} else if( hh > data.hMax[ij] ) {
data.hMax[ij] = hh;
//hMax[ij] = fmaxf(hMax[ij],h[ij]);
}
if( dp.sshArrivalThreshold && data.tArr[ij] < 0 && absH > dp.sshArrivalThreshold )
data.tArr[ij] = dp.mTime;
data.h[ij] = hh;
}
}
__global__ void runFluxUpdateKernel( KernelData data ) {
Params& dp = data.params;
int i = blockIdx.y * blockDim.y + threadIdx.y + dp.iMin;
int j = blockIdx.x * blockDim.x + threadIdx.x + dp.jMin;
int ij = data.idx(i,j);
if( i <= dp.iMax && j <= dp.jMax && data.d[ij] != 0 ) {
float hh = data.h[ij];
if( data.d[data.ri(ij)] != 0 )
data.fM[ij] = data.fM[ij] - data.cR2[ij]*(data.h[data.ri(ij)] - hh);
if( data.d[data.up(ij)] != 0 )
data.fN[ij] = data.fN[ij] - data.cR4[ij]*(data.h[data.up(ij)] - hh);
}
}
__global__ void runWaveBoundaryKernel( KernelData data ) {
KernelData& dt = data;
Params& dp = data.params;
int id = blockIdx.x * blockDim.x + threadIdx.x + 2;
int ij;
if( dt.devID == 0 ) {
if( id <= dp.nJ-1 ) {
ij = dt.idx(1,id);
dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + 0.25f*powf((dt.fN[ij] + dt.fN[dt.dn(ij)]),2.0f) )*dt.cB2[id-1];
if( dt.fM[ij] > 0 ) dt.h[ij] = -dt.h[ij];
}
if( id == 2 ) {
ij = dt.idx(1,1);
dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + powf(dt.fN[ij],2.0f) )*dt.cB1[0];
if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij];
ij = dt.idx(1,dp.nJ);
dt.h[ij] = sqrtf( powf(dt.fM[ij],2.0f) + powf(dt.fN[dt.dn(ij)],2.0f) )*dt.cB3[0];
if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij];
}
} else if( dt.devID == dt.devNum - 1 ) {
if( id <= dp.nJ-1 ) {
ij = dt.idx(dp.nI,id);
dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + 0.25f*powf((dt.fN[ij] + dt.fN[dt.dn(ij)]),2.0f) )*dt.cB4[id-1];
if( dt.fM[dt.le(ij)] < 0 ) dt.h[ij] = -dt.h[ij];
}
if( id == 2 ) {
ij = dt.idx(dp.nI,1);
dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + powf(dt.fN[ij],2.0f) )*dt.cB1[dp.nI-1];
if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij];
ij = dt.idx(dp.nI,dp.nJ);
dt.h[ij] = sqrtf( powf(dt.fM[dt.le(ij)],2.0f) + powf(dt.fN[dt.dn(ij)],2.0f) )*dt.cB3[dp.nI-1];
if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij];
}
}
if( id <= dp.nI - 1 ) {
ij = dt.idx(id,1);
dt.h[ij] = sqrtf( powf(dt.fN[ij],2.0f) + 0.25f*powf((dt.fM[ij] + dt.fM[dt.le(ij)]),2.0f) )*dt.cB1[id-1];
if( dt.fN[ij] > 0 ) dt.h[ij] = -dt.h[ij];
}
if( id <= dp.nI - 1 ) {
ij = dt.idx(id,dp.nJ);
dt.h[ij] = sqrtf( powf(dt.fN[dt.dn(ij)],2.0f) + 0.25f*powf((dt.fM[ij] + dt.fM[dt.dn(ij)]),2.0f) )*dt.cB3[id-1];
if( dt.fN[dt.dn(ij)] < 0 ) dt.h[ij] = -dt.h[ij];
}
}
__global__ void runFluxBoundaryKernel( KernelData data ) {
KernelData& dt = data;
Params& dp = data.params;
int id = blockIdx.x * blockDim.x + threadIdx.x + 1;
int ij;
if( dt.devID == 0 ) {
if( id <= dp.nJ ) {
ij = dt.idx(1,id);
dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]);
}
if( id <= dp.nJ-1 ) {
ij = dt.idx(1,id);
dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]);
}
} else if( dt.devID == dt.devNum - 1 ) {
if( id <= dp.nJ-1 ) {
ij = dt.idx(dp.nI,id);
dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]);
}
}
if( id <= dp.nI - 1 ) {
ij = dt.idx(id,1);
dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]);
}
if( id <= dp.nI - 1 ) {
ij = dt.idx(id,dp.nJ);
dt.fM[ij] = dt.fM[ij] - dt.cR2[ij]*(dt.h[dt.ri(ij)] - dt.h[ij]);
}
if( id <= dp.nI ) {
ij = dt.idx(id,1);
dt.fN[ij] = dt.fN[ij] - dt.cR4[ij]*(dt.h[dt.up(ij)] - dt.h[ij]);
}
}
__global__ void runGridExtendKernel( KernelData data ) {
Params& dp = data.params;
int id = blockIdx.x * blockDim.x + threadIdx.x;
if( id >= dp.jMin && id <= dp.jMax ) {
if( fabsf(data.h[data.idx(dp.iMin+2,id)]) > dp.sshClipThreshold )
atomicAdd( &(data.extend->x), 1 );
if( fabsf(data.h[data.idx(dp.iMax-2,id)]) > dp.sshClipThreshold )
atomicAdd( &(data.extend->y), 1 );
}
if( id >= dp.iMin && id <= dp.iMax ) {
if( fabsf(data.h[data.idx(id,dp.jMin+2)]) > dp.sshClipThreshold )
atomicAdd( &(data.extend->z), 1 );
if( fabsf(data.h[data.idx(id,dp.jMax-2)]) > dp.sshClipThreshold )
atomicAdd( &(data.extend->w), 1 );
}
}
/*
* EasyWave - A realtime tsunami simulation program with GPU support.
* Copyright (C) 2014 Andrey Babeyko, Johannes Spazier
* GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de)
*
* Parts of this program (especially the GPU extension) were developed
* within the context of the following publicly funded project:
* - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723
* (http://www.tridec-online.eu)
*
* Licensed under the EUPL, Version 1.1 or - as soon they will be approved by
* the European Commission - subsequent versions of the EUPL (the "Licence"),
* complemented with the following provision: For the scientific transparency
* and verification of results obtained and communicated to the public after
* using a modified version of the work, You (as the recipient of the source
* code and author of this modified version, used to produce the published
* results in scientific communications) commit to make this modified source
* code available in a repository that is easily and freely accessible for a
* duration of five years after the communication of the obtained results.
*
* You may not use this work except in compliance with the Licence.
*
* You may obtain a copy of the Licence at:
* https://joinup.ec.europa.eu/software/page/eupl
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the Licence is distributed on an "AS IS" basis,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the Licence for the specific language governing permissions and
* limitations under the Licence.
*/
#include "ewGpuNode.cuh"
#include "ewCudaKernels.cuh"
CGpuNode::CGpuNode() {
pitch = 0;
copied = true;
/* TODO: make dynamic */
num_virtual_gpus = 4;
num_real_gpus = 2;
vgpus = new VGpu[num_virtual_gpus];
gpus = new Gpu[num_real_gpus];
cudaMallocHost( &extend, num_virtual_gpus * sizeof(int4) );
for( int j = 0; j < num_real_gpus; j++ ) {
cudaSetDevice( j );
cudaDeviceSetCacheConfig( cudaFuncCachePreferL1 );
gpus[j].id = j;
for( int i = 0; i < gpus[j].NEVENTS; i++ ) {
cudaEventCreate( &(gpus[j].evtStart[i]) );
cudaEventCreate( &(gpus[j].evtEnd[i]) );
gpus[j].dur[i] = 0.0f;
}
}
for( int j = 0; j < num_virtual_gpus; j++ ) {
VGpu& vgpu = vgpus[j];
vgpu.data.devID = j;