Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
E
easyWave
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Merge Requests
0
Merge Requests
0
Packages & Registries
Packages & Registries
Container Registry
Analytics
Analytics
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Commits
Open sidebar
geoperil
easyWave
Commits
9e4d7de8
Commit
9e4d7de8
authored
Feb 15, 2014
by
Johannes Spazier
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
- Update of GridExtend-Kernel.
- The memory alignment was adjusted. - The progress output is now flushed.
parent
9f6d2882
Changes
4
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
69 additions
and
88 deletions
+69
-88
code/trunk/src/EasyWave.cpp
code/trunk/src/EasyWave.cpp
+3
-1
code/trunk/src/ewCudaKernels.cu
code/trunk/src/ewCudaKernels.cu
+40
-72
code/trunk/src/ewCudaKernels.cuh
code/trunk/src/ewCudaKernels.cuh
+1
-2
code/trunk/src/ewGpuNode.cu
code/trunk/src/ewGpuNode.cu
+25
-13
No files found.
code/trunk/src/EasyWave.cpp
View file @
9e4d7de8
...
@@ -129,7 +129,9 @@ int main( int argc, char **argv )
...
@@ -129,7 +129,9 @@ int main( int argc, char **argv )
lastProgress
=
0
;
lastProgress
=
0
;
}
}
}
}
fflush
(
stdout
);
if
(
Par
.
outPropagation
)
{
if
(
Par
.
outPropagation
)
{
if
(
lastPropagation
>=
Par
.
outPropagation
)
{
if
(
lastPropagation
>=
Par
.
outPropagation
)
{
Node
.
copyIntermediate
();
Node
.
copyIntermediate
();
...
...
code/trunk/src/ewCudaKernels.cu
View file @
9e4d7de8
...
@@ -35,7 +35,7 @@ __global__ void runWaveUpdateKernel( KernelData data ) {
...
@@ -35,7 +35,7 @@ __global__ void runWaveUpdateKernel( KernelData data ) {
float
absH
;
float
absH
;
/* maybe unnecessary if controlled from outside */
/* maybe unnecessary if controlled from outside */
if
(
i
<=
dp
.
iMax
&&
j
<=
dp
.
jMax
&&
data
.
d
[
ij
]
!=
0
)
{
if
(
i
<=
dp
.
iMax
&&
j
<=
dp
.
jMax
&&
data
.
d
[
ij
]
!=
0
)
{
float
hh
=
data
.
h
[
ij
]
-
data
.
cR1
[
ij
]
*
(
data
.
fM
[
ij
]
-
data
.
fM
[
data
.
le
(
ij
)]
+
data
.
fN
[
ij
]
*
data
.
cR6
[
j
]
-
data
.
fN
[
data
.
dn
(
ij
)]
*
data
.
cR6
[
j
-
1
]
);
float
hh
=
data
.
h
[
ij
]
-
data
.
cR1
[
ij
]
*
(
data
.
fM
[
ij
]
-
data
.
fM
[
data
.
le
(
ij
)]
+
data
.
fN
[
ij
]
*
data
.
cR6
[
j
]
-
data
.
fN
[
data
.
dn
(
ij
)]
*
data
.
cR6
[
j
-
1
]
);
...
@@ -49,10 +49,9 @@ __global__ void runWaveUpdateKernel( KernelData data ) {
...
@@ -49,10 +49,9 @@ __global__ void runWaveUpdateKernel( KernelData data ) {
}
}
if
(
dp
.
sshArrivalThreshold
&&
data
.
tArr
[
ij
]
<
0
&&
absH
>
dp
.
sshArrivalThreshold
)
if
(
dp
.
sshArrivalThreshold
&&
data
.
tArr
[
ij
]
<
0
&&
absH
>
dp
.
sshArrivalThreshold
)
data
.
tArr
[
ij
]
=
dp
.
mTime
;
data
.
tArr
[
ij
]
=
dp
.
mTime
;
data
.
h
[
ij
]
=
hh
;
data
.
h
[
ij
]
=
hh
;
}
}
}
}
...
@@ -65,7 +64,7 @@ __global__ void runFluxUpdateKernel( KernelData data ) {
...
@@ -65,7 +64,7 @@ __global__ void runFluxUpdateKernel( KernelData data ) {
int
j
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
+
dp
.
jMin
;
int
j
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
+
dp
.
jMin
;
int
ij
=
data
.
idx
(
i
,
j
);
int
ij
=
data
.
idx
(
i
,
j
);
if
(
i
<=
dp
.
iMax
&&
j
<=
dp
.
jMax
&&
data
.
d
[
ij
]
!=
0
)
{
if
(
i
<=
dp
.
iMax
&&
j
<=
dp
.
jMax
&&
data
.
d
[
ij
]
!=
0
)
{
float
hh
=
data
.
h
[
ij
];
float
hh
=
data
.
h
[
ij
];
...
@@ -171,106 +170,75 @@ __global__ void runFluxBoundaryKernel( KernelData data ) {
...
@@ -171,106 +170,75 @@ __global__ void runFluxBoundaryKernel( KernelData data ) {
}
}
__device__
int4
c_MinMax
=
{
0
,
0
,
0
,
0
};
__global__
void
runGridExtendKernel
(
KernelData
data
)
{
__global__
void
runGridExtendKernel1
(
KernelData
data
)
{
#if (__CUDA_ARCH__ >= 130)
Params
&
dp
=
data
.
params
;
Params
&
dp
=
data
.
params
;
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
+
1
;
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
+
1
;
#if (__CUDA_ARCH__ >= 130)
if
(
id
>=
dp
.
jMin
&&
id
<=
dp
.
jMax
)
{
if
(
id
>=
dp
.
jMin
&&
id
<=
dp
.
jMax
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMin
+
2
,
id
)])
>
dp
.
sshClipThreshold
)
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMin
+
2
,
id
)])
>
dp
.
sshClipThreshold
)
atomicAdd
(
&
c_MinMax
.
x
,
1
);
atomicAdd
(
&
(
data
.
g_MinMax
->
x
)
,
1
);
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMax
-
2
,
id
)])
>
dp
.
sshClipThreshold
)
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMax
-
2
,
id
)])
>
dp
.
sshClipThreshold
)
atomicAdd
(
&
c_MinMax
.
y
,
1
);
atomicAdd
(
&
(
data
.
g_MinMax
->
y
)
,
1
);
}
}
if
(
id
>=
dp
.
iMin
&&
id
<=
dp
.
iMax
)
{
if
(
id
>=
dp
.
iMin
&&
id
<=
dp
.
iMax
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
id
,
dp
.
jMin
+
2
)])
>
dp
.
sshClipThreshold
)
if
(
fabsf
(
data
.
h
[
data
.
idx
(
id
,
dp
.
jMin
+
2
)])
>
dp
.
sshClipThreshold
)
atomicAdd
(
&
c_MinMax
.
z
,
1
);
atomicAdd
(
&
(
data
.
g_MinMax
->
z
)
,
1
);
if
(
fabsf
(
data
.
h
[
data
.
idx
(
id
,
dp
.
jMax
-
2
)])
>
dp
.
sshClipThreshold
)
if
(
fabsf
(
data
.
h
[
data
.
idx
(
id
,
dp
.
jMax
-
2
)])
>
dp
.
sshClipThreshold
)
atomicAdd
(
&
c_MinMax
.
w
,
1
);
atomicAdd
(
&
(
data
.
g_MinMax
->
w
)
,
1
);
}
}
#endif
}
__global__
void
runGridExtendKernel2
(
KernelData
data
)
{
Params
&
dp
=
data
.
params
;
int
id
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
id
==
0
)
{
#
if (__CUDA_ARCH__ >= 130)
#
else
if
(
c_MinMax
.
x
)
if
(
id
==
1
)
{
dp
.
iMin
=
max
(
dp
.
iMin
-
1
,
2
);
if
(
c_MinMax
.
y
)
for
(
int
j
=
dp
.
jMin
;
j
<=
dp
.
jMax
;
j
++
)
{
dp
.
iMax
=
min
(
dp
.
iMax
+
1
,
dp
.
nI
-
1
);
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMin
+
2
,
j
)])
>
dp
.
sshClipThreshold
)
{
data
.
g_MinMax
->
x
=
1
;
break
;
}
if
(
c_MinMax
.
z
)
}
dp
.
jMin
=
max
(
dp
.
jMin
-
1
,
2
);
if
(
c_MinMax
.
w
)
for
(
int
j
=
dp
.
jMin
;
j
<=
dp
.
jMax
;
j
++
)
{
dp
.
jMax
=
min
(
dp
.
jMax
+
1
,
dp
.
nJ
-
1
);
c_MinMax
.
x
=
0
;
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMax
-
2
,
j
)])
>
dp
.
sshClipThreshold
)
{
c_MinMax
.
y
=
0
;
data
.
g_MinMax
->
y
=
1
;
c_MinMax
.
z
=
0
;
break
;
c_MinMax
.
w
=
0
;
}
#else
}
for
(
int
j
=
dp
.
jMin
;
j
<=
dp
.
jMax
;
j
++
)
{
for
(
int
i
=
dp
.
iMin
;
i
<=
dp
.
iMax
;
i
++
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
i
,
dp
.
jMin
+
2
)])
>
dp
.
sshClipThreshold
)
{
data
.
g_MinMax
->
z
=
1
;
break
;
}
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMin
+
2
,
j
)])
>
dp
.
sshClipThreshold
)
{
}
dp
.
iMin
=
max
(
dp
.
iMin
-
1
,
2
);
break
;
}
}
for
(
int
j
=
dp
.
jMin
;
j
<=
dp
.
jMax
;
j
++
)
{
for
(
int
i
=
dp
.
iMin
;
i
<=
dp
.
iMax
;
i
++
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
dp
.
iMax
-
2
,
j
)])
>
dp
.
sshClipThreshold
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
i
,
dp
.
jMax
-
2
)])
>
dp
.
sshClipThreshold
)
{
dp
.
iMax
=
min
(
dp
.
iMax
+
1
,
dp
.
nI
-
1
);
data
.
g_MinMax
->
w
=
1
;
break
;
break
;
}
}
}
for
(
int
i
=
dp
.
iMin
;
i
<=
dp
.
iMax
;
i
++
)
{
if
(
fabsf
(
data
.
h
[
data
.
idx
(
i
,
dp
.
jMin
+
2
)])
>
dp
.
sshClipThreshold
)
{
}
dp
.
jMin
=
max
(
dp
.
jMin
-
1
,
2
);
break
;
}
}
for
(
int
i
=
dp
.
iMin
;
i
<=
dp
.
iMax
;
i
++
)
{
}
if
(
fabsf
(
data
.
h
[
data
.
idx
(
i
,
dp
.
jMax
-
2
)])
>
dp
.
sshClipThreshold
)
{
dp
.
jMax
=
min
(
dp
.
jMax
+
1
,
dp
.
nJ
-
1
);
break
;
}
}
#endif
#endif
int4
tmp
;
tmp
.
x
=
dp
.
iMin
;
tmp
.
y
=
dp
.
iMax
;
tmp
.
z
=
dp
.
jMin
;
tmp
.
w
=
dp
.
jMax
;
*
(
data
.
g_MinMax
)
=
tmp
;
}
}
}
code/trunk/src/ewCudaKernels.cuh
View file @
9e4d7de8
...
@@ -29,7 +29,6 @@ __global__ void runWaveUpdateKernel( KernelData data );
...
@@ -29,7 +29,6 @@ __global__ void runWaveUpdateKernel( KernelData data );
__global__
void
runWaveBoundaryKernel
(
KernelData
data
);
__global__
void
runWaveBoundaryKernel
(
KernelData
data
);
__global__
void
runFluxUpdateKernel
(
KernelData
data
);
__global__
void
runFluxUpdateKernel
(
KernelData
data
);
__global__
void
runFluxBoundaryKernel
(
KernelData
data
);
__global__
void
runFluxBoundaryKernel
(
KernelData
data
);
__global__
void
runGridExtendKernel1
(
KernelData
data
);
__global__
void
runGridExtendKernel
(
KernelData
data
);
__global__
void
runGridExtendKernel2
(
KernelData
data
);
#endif
/* EW_KERNELS_H */
#endif
/* EW_KERNELS_H */
code/trunk/src/ewGpuNode.cu
View file @
9e4d7de8
...
@@ -50,7 +50,7 @@ int CGpuNode::mallocMem() {
...
@@ -50,7 +50,7 @@ int CGpuNode::mallocMem() {
dp
.
sshArrivalThreshold
=
Par
.
sshArrivalThreshold
;
dp
.
sshArrivalThreshold
=
Par
.
sshArrivalThreshold
;
dp
.
sshClipThreshold
=
Par
.
sshClipThreshold
;
dp
.
sshClipThreshold
=
Par
.
sshClipThreshold
;
dp
.
sshZeroThreshold
=
Par
.
sshZeroThreshold
;
dp
.
sshZeroThreshold
=
Par
.
sshZeroThreshold
;
dp
.
lpad
=
0
;
dp
.
lpad
=
31
;
size_t
nJ_aligned
=
dp
.
nJ
+
dp
.
lpad
;
size_t
nJ_aligned
=
dp
.
nJ
+
dp
.
lpad
;
...
@@ -85,13 +85,15 @@ int CGpuNode::copyToGPU() {
...
@@ -85,13 +85,15 @@ int CGpuNode::copyToGPU() {
Params
&
dp
=
data
.
params
;
Params
&
dp
=
data
.
params
;
/* fill in further fields here */
/* align left grid boundary to a multiple of 32 with an offset 1 */
dp
.
iMin
=
Imin
;
Jmin
-=
(
Jmin
-
2
)
%
32
;
/* fill in further fields here */
dp
.
iMin
=
Imin
;
dp
.
iMax
=
Imax
;
dp
.
iMax
=
Imax
;
dp
.
jMin
=
Jmin
;
dp
.
jMin
=
Jmin
;
dp
.
jMax
=
Jmax
;
dp
.
jMax
=
Jmax
;
/* FIXME: should not work correctly */
/* add offset to data.d to guarantee alignment: data.d + LPAD */
/* add offset to data.d to guarantee alignment: data.d + LPAD */
/* 2-dim */
/* 2-dim */
CUDA_CALL
(
cudaMemcpy2D
(
data
.
d
+
dp
.
lpad
,
pitch
,
d
,
dp
.
nJ
*
sizeof
(
float
),
dp
.
nJ
*
sizeof
(
float
),
dp
.
nI
,
cudaMemcpyHostToDevice
)
);
CUDA_CALL
(
cudaMemcpy2D
(
data
.
d
+
dp
.
lpad
,
pitch
,
d
,
dp
.
nJ
*
sizeof
(
float
),
dp
.
nJ
*
sizeof
(
float
),
dp
.
nI
,
cudaMemcpyHostToDevice
)
);
...
@@ -202,8 +204,10 @@ int CGpuNode::run() {
...
@@ -202,8 +204,10 @@ int CGpuNode::run() {
int
xThreads
=
32
;
int
xThreads
=
32
;
int
yThreads
=
nThreads
/
xThreads
;
int
yThreads
=
nThreads
/
xThreads
;
int
xBlocks
=
ceil
(
(
float
)
dp
.
nJ
/
(
float
)
xThreads
);
int
NJ
=
dp
.
jMax
-
dp
.
jMin
+
1
;
int
yBlocks
=
ceil
(
(
float
)
dp
.
nI
/
(
float
)
yThreads
);
int
NI
=
dp
.
iMax
-
dp
.
iMin
+
1
;
int
xBlocks
=
ceil
(
(
float
)
NJ
/
(
float
)
xThreads
);
int
yBlocks
=
ceil
(
(
float
)
NI
/
(
float
)
yThreads
);
dim3
threads
(
xThreads
,
yThreads
);
dim3
threads
(
xThreads
,
yThreads
);
dim3
blocks
(
xBlocks
,
yBlocks
);
dim3
blocks
(
xBlocks
,
yBlocks
);
...
@@ -225,17 +229,25 @@ int CGpuNode::run() {
...
@@ -225,17 +229,25 @@ int CGpuNode::run() {
runFluxBoundaryKernel
<<<
nBlocks
,
nThreads
>>>
(
data
);
runFluxBoundaryKernel
<<<
nBlocks
,
nThreads
>>>
(
data
);
CUDA_CALL
(
cudaEventRecord
(
evtEnd
[
3
],
0
)
);
CUDA_CALL
(
cudaEventRecord
(
evtEnd
[
3
],
0
)
);
CUDA_CALL
(
cudaEventRecord
(
evtStart
[
4
],
0
)
);
CUDA_CALL
(
cudaEventRecord
(
evtStart
[
4
],
0
)
);
runGridExtendKernel1
<<<
nBlocks
,
nThreads
>>>
(
data
);
CUDA_CALL
(
cudaMemset
(
data
.
g_MinMax
,
0
,
sizeof
(
int4
)
)
);
runGridExtendKernel
2
<<<
1
,
1
>>>
(
data
);
runGridExtendKernel
<<<
nBlocks
,
nThreads
>>>
(
data
);
CUDA_CALL
(
cudaEventRecord
(
evtEnd
[
4
],
0
)
);
CUDA_CALL
(
cudaEventRecord
(
evtEnd
[
4
],
0
)
);
int4
MinMax
;
int4
MinMax
;
CUDA_CALL
(
cudaMemcpy
(
&
MinMax
,
data
.
g_MinMax
,
sizeof
(
int4
),
cudaMemcpyDeviceToHost
)
);
CUDA_CALL
(
cudaMemcpy
(
&
MinMax
,
data
.
g_MinMax
,
sizeof
(
int4
),
cudaMemcpyDeviceToHost
)
);
cudaDeviceSynchronize
();
cudaDeviceSynchronize
();
Imin
=
dp
.
iMin
=
MinMax
.
x
;
Imax
=
dp
.
iMax
=
MinMax
.
y
;
if
(
MinMax
.
x
)
Jmin
=
dp
.
jMin
=
MinMax
.
z
;
Imin
=
dp
.
iMin
=
max
(
dp
.
iMin
-
1
,
2
);
Jmax
=
dp
.
jMax
=
MinMax
.
w
;
if
(
MinMax
.
y
)
Imax
=
dp
.
iMax
=
min
(
dp
.
iMax
+
1
,
dp
.
nI
-
1
);
if
(
MinMax
.
z
)
Jmin
=
dp
.
jMin
=
max
(
dp
.
jMin
-
32
,
2
);
if
(
MinMax
.
w
)
Jmax
=
dp
.
jMax
=
min
(
dp
.
jMax
+
1
,
dp
.
nJ
-
1
);
float
_dur
;
float
_dur
;
for
(
int
j
=
0
;
j
<
5
;
j
++
)
{
for
(
int
j
=
0
;
j
<
5
;
j
++
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment