ewGpuNode.cuh 2.25 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62
#ifndef EW_GPUNODE_H
#define EW_GPUNODE_H

/* FIXME: check header dependencies */
#include "easywave.h"
#include "ewNode.h"
#include <stdio.h>

#define CUDA_CALL(x) if( x != cudaSuccess ) { fprintf( stderr, "Error in file %s on line %u: %s\n", __FILE__, __LINE__, cudaGetErrorString( cudaGetLastError() ) ); return 1; }

#undef idx

class Params {

public:
	int mTime;
	int nI;
	int nJ;
	int iMin;
	int iMax;
	int jMin;
	int jMax;
	float sshArrivalThreshold;
	float sshZeroThreshold;
	float sshClipThreshold;

	/* pitch / sizeof(float) */
	size_t pI;
	size_t lpad;
};

class KernelData {

public:
	/* 2-dim */
	float *d;
	float *h;
	float *hMax;
	float *fM;
	float *fN;
	float *cR1;
	float *cR2;
	float *cR4;
	float *tArr;

	/* 1-dim */
	float *cR6;
	float *cB1;
	float *cB2;
	float *cB3;
	float *cB4;

	Params params;

	int4 *extend;
	int devID;
	int devNum;

	__device__ int le( int ij ) { return ij - params.pI; }
	__device__ int ri( int ij ) { return ij + params.pI; }
	__device__ int up( int ij ) { return ij + 1; }
	__device__ int dn( int ij ) { return ij - 1; }
63
	__host__ __device__ int idx( int i, int j ) { return (j-1) + (i-1) * params.pI + params.lpad; }
64 65
};

66 67 68 69 70 71 72 73 74 75 76 77 78 79
class Gpu {

public:
	int id;

	int maxId;

	static const short NEVENTS = 7;
	cudaEvent_t evtStart[NEVENTS];
	cudaEvent_t evtEnd[NEVENTS];
	float dur[NEVENTS];

};

80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
class VGpu {

public:
	int off;
	int end;
	int size;
	KernelData data;

	int gt, gb;

	static const short NSTREAMS = 2;
	cudaStream_t stream[NSTREAMS];

	cudaEvent_t evtSync;

95 96
	Gpu *dev;
	int relId;
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112

	int nBlocks;
	dim3 threads;
	dim3 blocks;

	bool hasLine( int i ) { return (i >= off && i <= end ); }
	int getRel( int i ) { return (i - off + 1 + gt); }
};

/* GPU dependent */
class CGpuNode : public CArrayNode {

protected:
	VGpu *vgpus;
	Params params;

113 114
	Gpu *gpus;

115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133
	int4 *extend;

	/* line size in bytes */
	size_t pitch;

	/* specifies if data was already copied in the current calculation step */
	bool copied;

	/* multiple GPUs */
	int num_virtual_gpus;
	int num_real_gpus;

public:
	CGpuNode();
	~CGpuNode();
	int mallocMem();
	int copyToGPU();
	int copyFromGPU();
	int copyIntermediate();
134
	int copyPOIs();
135 136 137 138 139 140 141 142 143 144
	int freeMem();
	int run();

private:
	int init_vgpus();
	int updateParams( VGpu& vgpu );
	bool isActive( VGpu& vgpu );
};

#endif /* EW_GPUNODE_H */