/*
* EasyWave - A realtime tsunami simulation program with GPU support.
* Copyright (C) 2014 Andrey Babeyko, Johannes Spazier
* GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de)
*
* Parts of this program (especially the GPU extension) were developed
* within the context of the following publicly funded project:
* - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723
* (http://www.tridec-online.eu)
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Affero General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Affero General Public License for more details.
*
* You should have received a copy of the GNU Affero General Public License
* along with this program. If not, see .
*/
#ifndef EW_GPUNODE_H
#define EW_GPUNODE_H
/* FIXME: check header dependencies */
#include "easywave.h"
#include "ewNode.h"
#include
#define CUDA_CALL(x) if( x != cudaSuccess ) { fprintf( stderr, "Error in file %s on line %u: %s\n", __FILE__, __LINE__, cudaGetErrorString( cudaGetLastError() ) ); return 1; }
#undef idx
class Params {
public:
int mTime;
int nI;
int nJ;
int iMin;
int iMax;
int jMin;
int jMax;
float sshArrivalThreshold;
float sshZeroThreshold;
float sshClipThreshold;
/* pitch / sizeof(float) */
size_t pI;
size_t lpad;
};
class KernelData {
public:
/* 2-dim */
float *d;
float *h;
float *hMax;
float *fM;
float *fN;
float *cR1;
float *cR2;
float *cR4;
float *tArr;
/* 1-dim */
float *cR6;
float *cB1;
float *cB2;
float *cB3;
float *cB4;
Params params;
int4 *extend;
int devID;
int devNum;
__device__ int le( int ij ) { return ij - params.pI; }
__device__ int ri( int ij ) { return ij + params.pI; }
__device__ int up( int ij ) { return ij + 1; }
__device__ int dn( int ij ) { return ij - 1; }
__host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; }
};
class Gpu {
public:
int id;
int maxId;
static const short NEVENTS = 7;
cudaEvent_t evtStart[NEVENTS];
cudaEvent_t evtEnd[NEVENTS];
float dur[NEVENTS];
};
class VGpu {
public:
int off;
int end;
int size;
KernelData data;
int gt, gb;
static const short NSTREAMS = 2;
cudaStream_t stream[NSTREAMS];
cudaEvent_t evtSync;
Gpu *dev;
int relId;
int nBlocks;
dim3 threads;
dim3 blocks;
bool hasLine( int i ) { return (i >= off && i <= end ); }
int getRel( int i ) { return (i - off + 1 + gt); }
};
/* GPU dependent */
class CGpuNode : public CArrayNode {
protected:
VGpu *vgpus;
Params params;
Gpu *gpus;
int4 *extend;
/* line size in bytes */
size_t pitch;
/* specifies if data was already copied in the current calculation step */
bool copied;
/* multiple GPUs */
int num_virtual_gpus;
int num_real_gpus;
public:
CGpuNode();
~CGpuNode();
int mallocMem();
int copyToGPU();
int copyFromGPU();
int copyIntermediate();
int copyPOIs();
int freeMem();
int run();
private:
int init_vgpus();
int updateParams( VGpu& vgpu );
bool isActive( VGpu& vgpu );
};
#endif /* EW_GPUNODE_H */