Commit 46df63b3 authored by Johannes Spazier's avatar Johannes Spazier

- Prefer L1-cache.

- Run *Update- and *Boundary-kernels in different streams and thus concurrently.
parent ddec049d
......@@ -19,6 +19,8 @@ CGpuNode::CGpuNode() {
cudaSetDevice( j );
cudaDeviceSetCacheConfig( cudaFuncCachePreferL1 );
gpus[j].id = j;
for( int i = 0; i < gpus[j].NEVENTS; i++ ) {
......@@ -377,31 +379,13 @@ int CGpuNode::run() {
if( isActive( vgpu ) ) {
runWaveUpdateKernel<<<vgpu.blocks,vgpu.threads,0,vgpu.stream[0]>>>( data );
runWaveBoundaryKernel<<<vgpu.nBlocks,nThreads,0,vgpu.stream[1]>>>( data );
}
if( Par.verbose && vgpu.relId == vgpu.dev->maxId )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[0] ) );
}
for( int i = 0; i < num_virtual_gpus; i++ ) {
VGpu& vgpu = vgpus[i];
KernelData& data = vgpu.data;
CUDA_CALL( cudaSetDevice( vgpu.dev->id ) );
if( Par.verbose && vgpu.relId == 0 )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[1] ) );
if( isActive( vgpu ) ) {
runWaveBoundaryKernel<<<vgpu.nBlocks,nThreads,0,vgpu.stream[0]>>>( data );
}
if( Par.verbose && vgpu.relId == vgpu.dev->maxId )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[1] ) );
}
for( int i = 0; i < num_virtual_gpus; i++ ) {
VGpu& vgpu = vgpus[i];
......@@ -460,31 +444,13 @@ int CGpuNode::run() {
if( isActive( vgpu ) ) {
runFluxUpdateKernel<<<vgpu.blocks,vgpu.threads,0,vgpu.stream[0]>>>( data );
runFluxBoundaryKernel<<<vgpu.nBlocks,nThreads,0,vgpu.stream[1]>>>( data );
}
if( Par.verbose && vgpu.relId == vgpu.dev->maxId )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[2] ) );
}
for( int i = 0; i < num_virtual_gpus; i++ ) {
VGpu& vgpu = vgpus[i];
KernelData& data = vgpu.data;
CUDA_CALL( cudaSetDevice( vgpu.dev->id ) );
if( Par.verbose && vgpu.relId == 0 )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtStart[3] ) );
if( isActive( vgpu ) ) {
runFluxBoundaryKernel<<<vgpu.nBlocks,nThreads,0,vgpu.stream[0]>>>( data );
}
if( Par.verbose && vgpu.relId == vgpu.dev->maxId )
CUDA_CALL( cudaEventRecord( vgpu.dev->evtEnd[3] ) );
}
for( int i = 0; i < num_virtual_gpus; i++ ) {
VGpu& vgpu = vgpus[i];
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment