Commit d3825031 authored by Johannes Spazier's avatar Johannes Spazier

All files were moved into a new subdirectory named "src/".

parent 917d9b0b
......@@ -17,17 +17,14 @@ OBJECTS=$(patsubst %.cpp, %.o, $(CPPS) ) $(patsubst %.cu,%.o,$(CUS) )
all: EasyWave
EasyWave: $(OBJECTS) link.o
EasyWave: $(OBJECTS)
$(NVCC) -o $@ $^
%.o: %.cpp *.h
$(CC) -c $(CFLAGS) -o $@ $<
%.o: %.cu *.cuh *.h
$(NVCC) -dc $(NVFLAGS) -x cu -o $@ $<
link.o: $(CU_OBJS)
$(NVCC) -dlink $(NVFLAGS) -o $@ $^
$(NVCC) $(NVFLAGS) -c -x cu -o $@ $<
clean:
rm -f EasyWave *.o
#include "ewGpuNode.cuh"
#include "ewCudaKernels.cuh"
__device__ int4 g_MinMax = {0,0,0,0};
__global__ void runWaveUpdateKernel( KernelData data ) {
Params& dp = data.params;
......@@ -209,5 +207,5 @@ __global__ void runGridExtendKernel2( KernelData data ) {
tmp.z = dp.jMin;
tmp.w = dp.jMax;
g_MinMax = tmp;
*(data.g_MinMax) = tmp;
}
#ifndef EW_KERNELS_H
#define EW_KERNELS_H
extern __device__ int4 g_MinMax;
__global__ void runWaveUpdateKernel( KernelData data );
__global__ void runWaveBoundaryKernel( KernelData data );
__global__ void runFluxUpdateKernel( KernelData data );
......
......@@ -49,6 +49,8 @@ int CGpuNode::mallocMem() {
CUDA_CALL( cudaMalloc( &(data.cB3), dp.nI * sizeof(float) ) );
CUDA_CALL( cudaMalloc( &(data.cB4), dp.nJ * sizeof(float) ) );
CUDA_CALL( cudaMalloc( &(data.g_MinMax), sizeof(int4) ) );
/* TODO: make sure that pitch is a multiple of 4 and the same for each cudaMallocPitch() call */
dp.pI = pitch / sizeof(float);
......@@ -154,6 +156,8 @@ int CGpuNode::freeMem() {
CUDA_CALL( cudaFree( data.cB3 ) );
CUDA_CALL( cudaFree( data.cB4 ) );
CUDA_CALL( cudaFree( data.g_MinMax ) );
float total_dur = 0.f;
for( int j = 0; j < 5; j++ ) {
printf_v("Duration %u: %.3f\n", j, dur[j]);
......@@ -201,11 +205,8 @@ int CGpuNode::run() {
runGridExtendKernel2<<<1,1>>>( data );
CUDA_CALL( cudaEventRecord( evtEnd[4], 0 ) );
void *g_mm;
CUDA_CALL( cudaGetSymbolAddress( &g_mm, g_MinMax ) );
int4 MinMax;
CUDA_CALL( cudaMemcpy( &MinMax, g_mm, sizeof(int4), cudaMemcpyDeviceToHost ) );
CUDA_CALL( cudaMemcpy( &MinMax, data.g_MinMax, sizeof(int4), cudaMemcpyDeviceToHost ) );
cudaDeviceSynchronize();
Imin = dp.iMin = MinMax.x;
Imax = dp.iMax = MinMax.y;
......
......@@ -52,6 +52,8 @@ public:
Params params;
int4 *g_MinMax;
__device__ int le( int ij ) { return ij - params.pI; }
__device__ int ri( int ij ) { return ij + params.pI; }
__device__ int up( int ij ) { return ij + 1; }
......
......@@ -74,36 +74,59 @@ int ewLoadBathymetry()
Dx = Re * g2r( DLon ); // in m along the equator
Dy = Re * g2r( DLat );
/* NOTE: optimal would be reading everything in one step, but that does not work because rows and columns are transposed
* (only possible with binary data at all) - use temporary buffer for now (consumes additional memory!) */
float *buf = new float[ NLat*NLon ];
ierr = fread( buf, sizeof(float), NLat*NLon, fp );
if( isBin ) {
for( i=1; i<=NLon; i++ ) {
for( j=1; j<=NLat; j++ ) {
/* NOTE: optimal would be reading everything in one step, but that does not work because rows and columns are transposed
* (only possible with binary data at all) - use temporary buffer for now (consumes additional memory!) */
float *buf = new float[ NLat*NLon ];
ierr = fread( buf, sizeof(float), NLat*NLon, fp );
m = idx(j,i);
for( i=1; i<=NLon; i++ ) {
for( j=1; j<=NLat; j++ ) {
m = idx(j,i);
if( isBin )
fval = buf[ (j-1) * NLon + (i-1) ];
//ierr = fread( &fval, sizeof(float), 1, fp );
else
ierr = fscanf( fp, " %f ", &fval );
if( isBin )
fval = buf[ (j-1) * NLon + (i-1) ];
//ierr = fread( &fval, sizeof(float), 1, fp );
Node(m, iTopo) = fval;
Node(m, iTime) = -1;
Node(m, iD) = -fval;
Node(m, iTopo) = fval;
Node(m, iTime) = -1;
Node(m, iD) = -fval;
if( Node(m, iD) < 0 ) {
Node(m, iD) = 0.0f;
} else if( Node(m, iD) < Par.dmin ) {
Node(m, iD) = Par.dmin;
if( Node(m, iD) < 0 ) {
Node(m, iD) = 0.0f;
} else if( Node(m, iD) < Par.dmin ) {
Node(m, iD) = Par.dmin;
}
}
}
}
}
delete[] buf;
} else {
delete[] buf;
for( j=1; j<=NLat; j++ ) {
for( i=1; i<=NLon; i++ ) {
m = idx(j,i);
ierr = fscanf( fp, " %f ", &fval );
Node(m, iTopo) = fval;
Node(m, iTime) = -1;
Node(m, iD) = -fval;
if( Node(m, iD) < 0 ) {
Node(m, iD) = 0.0f;
} else if( Node(m, iD) < Par.dmin ) {
Node(m, iD) = Par.dmin;
}
}
}
}
for( k=1; k<MAX_VARS_PER_NODE-2; k++ ) {
Node.initMemory( k, 0 );
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment