ewGpuNode.cuh 3.34 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * EasyWave - A realtime tsunami simulation program with GPU support.
 * Copyright (C) 2014  Andrey Babeyko, Johannes Spazier
 * GFZ German Research Centre for Geosciences (http://www.gfz-potsdam.de)
 *
 * Parts of this program (especially the GPU extension) were developed
 * within the context of the following publicly funded project:
 * - TRIDEC, EU 7th Framework Programme, Grant Agreement 258723
 *   (http://www.tridec-online.eu)
 *
 * This program is free software: you can redistribute it and/or modify
 * it under the terms of the GNU Affero General Public License as
 * published by the Free Software Foundation, either version 3 of the
 * License, or (at your option) any later version.
 *
 * This program is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Affero General Public License for more details.
 *
 * You should have received a copy of the GNU Affero General Public License
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 */

25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86
#ifndef EW_GPUNODE_H
#define EW_GPUNODE_H

/* FIXME: check header dependencies */
#include "easywave.h"
#include "ewNode.h"
#include <stdio.h>

#define CUDA_CALL(x) if( x != cudaSuccess ) { fprintf( stderr, "Error in file %s on line %u: %s\n", __FILE__, __LINE__, cudaGetErrorString( cudaGetLastError() ) ); return 1; }

#undef idx

class Params {

public:
	int mTime;
	int nI;
	int nJ;
	int iMin;
	int iMax;
	int jMin;
	int jMax;
	float sshArrivalThreshold;
	float sshZeroThreshold;
	float sshClipThreshold;

	/* pitch / sizeof(float) */
	size_t pI;
	size_t lpad;
};

class KernelData {

public:
	/* 2-dim */
	float *d;
	float *h;
	float *hMax;
	float *fM;
	float *fN;
	float *cR1;
	float *cR2;
	float *cR4;
	float *tArr;

	/* 1-dim */
	float *cR6;
	float *cB1;
	float *cB2;
	float *cB3;
	float *cB4;

	Params params;

	int4 *extend;
	int devID;
	int devNum;

	__device__ int le( int ij ) { return ij - params.pI; }
	__device__ int ri( int ij ) { return ij + params.pI; }
	__device__ int up( int ij ) { return ij + 1; }
	__device__ int dn( int ij ) { return ij - 1; }
87
	__host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; }
88 89
};

90 91 92 93 94 95 96 97 98 99 100 101 102 103
class Gpu {

public:
	int id;

	int maxId;

	static const short NEVENTS = 7;
	cudaEvent_t evtStart[NEVENTS];
	cudaEvent_t evtEnd[NEVENTS];
	float dur[NEVENTS];

};

104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
class VGpu {

public:
	int off;
	int end;
	int size;
	KernelData data;

	int gt, gb;

	static const short NSTREAMS = 2;
	cudaStream_t stream[NSTREAMS];

	cudaEvent_t evtSync;

119 120
	Gpu *dev;
	int relId;
121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136

	int nBlocks;
	dim3 threads;
	dim3 blocks;

	bool hasLine( int i ) { return (i >= off && i <= end ); }
	int getRel( int i ) { return (i - off + 1 + gt); }
};

/* GPU dependent */
class CGpuNode : public CArrayNode {

protected:
	VGpu *vgpus;
	Params params;

137 138
	Gpu *gpus;

139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157
	int4 *extend;

	/* line size in bytes */
	size_t pitch;

	/* specifies if data was already copied in the current calculation step */
	bool copied;

	/* multiple GPUs */
	int num_virtual_gpus;
	int num_real_gpus;

public:
	CGpuNode();
	~CGpuNode();
	int mallocMem();
	int copyToGPU();
	int copyFromGPU();
	int copyIntermediate();
158
	int copyPOIs();
159 160 161 162 163 164 165 166 167 168
	int freeMem();
	int run();

private:
	int init_vgpus();
	int updateParams( VGpu& vgpu );
	bool isActive( VGpu& vgpu );
};

#endif /* EW_GPUNODE_H */