/* * EasyWave - A realtime tsunami simulation program with GPU support. * Copyright (C) 2014 Andrey Babeyko, Johannes Spazier * GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de) * * Parts of this program (especially the GPU extension) were developed * within the context of the following publicly funded project: * - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723 * (http://www.tridec-online.eu) * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as * published by the Free Software Foundation, either version 3 of the * License, or (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Affero General Public License for more details. * * You should have received a copy of the GNU Affero General Public License * along with this program. If not, see . */ #ifndef EW_GPUNODE_H #define EW_GPUNODE_H /* FIXME: check header dependencies */ #include "easywave.h" #include "ewNode.h" #include #define CUDA_CALL(x) if( x != cudaSuccess ) { fprintf( stderr, "Error in file %s on line %u: %s\n", __FILE__, __LINE__, cudaGetErrorString( cudaGetLastError() ) ); return 1; } #undef idx class Params { public: int mTime; int nI; int nJ; int iMin; int iMax; int jMin; int jMax; float sshArrivalThreshold; float sshZeroThreshold; float sshClipThreshold; /* pitch / sizeof(float) */ size_t pI; size_t lpad; }; class KernelData { public: /* 2-dim */ float *d; float *h; float *hMax; float *fM; float *fN; float *cR1; float *cR2; float *cR4; float *tArr; /* 1-dim */ float *cR6; float *cB1; float *cB2; float *cB3; float *cB4; Params params; int4 *extend; int devID; int devNum; __device__ int le( int ij ) { return ij - params.pI; } __device__ int ri( int ij ) { return ij + params.pI; } __device__ int up( int ij ) { return ij + 1; } __device__ int dn( int ij ) { return ij - 1; } __host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; } }; class Gpu { public: int id; int maxId; static const short NEVENTS = 7; cudaEvent_t evtStart[NEVENTS]; cudaEvent_t evtEnd[NEVENTS]; float dur[NEVENTS]; }; class VGpu { public: int off; int end; int size; KernelData data; int gt, gb; static const short NSTREAMS = 2; cudaStream_t stream[NSTREAMS]; cudaEvent_t evtSync; Gpu *dev; int relId; int nBlocks; dim3 threads; dim3 blocks; bool hasLine( int i ) { return (i >= off && i <= end ); } int getRel( int i ) { return (i - off + 1 + gt); } }; /* GPU dependent */ class CGpuNode : public CArrayNode { protected: VGpu *vgpus; Params params; Gpu *gpus; int4 *extend; /* line size in bytes */ size_t pitch; /* specifies if data was already copied in the current calculation step */ bool copied; /* multiple GPUs */ int num_virtual_gpus; int num_real_gpus; public: CGpuNode(); ~CGpuNode(); int mallocMem(); int copyToGPU(); int copyFromGPU(); int copyIntermediate(); int copyPOIs(); int freeMem(); int run(); private: int init_vgpus(); int updateParams( VGpu& vgpu ); bool isActive( VGpu& vgpu ); }; #endif /* EW_GPUNODE_H */