From b1b96525344252a337d43708ea6f67ed9f5743b5 Mon Sep 17 00:00:00 2001 From: Johannes Spazier Date: Fri, 25 Oct 2013 12:49:29 +0000 Subject: [PATCH] Merged optimizations from trunk r7 into this branch. --- cpu/branches/multi-gpu/ewGrid.cpp | 58 ++++++++++++++++++------------- cpu/branches/multi-gpu/ewNode.h | 24 +++++++++++++ 2 files changed, 58 insertions(+), 24 deletions(-) diff --git a/cpu/branches/multi-gpu/ewGrid.cpp b/cpu/branches/multi-gpu/ewGrid.cpp index 610dbf3..fbf705a 100644 --- a/cpu/branches/multi-gpu/ewGrid.cpp +++ b/cpu/branches/multi-gpu/ewGrid.cpp @@ -75,44 +75,56 @@ int ewLoadBathymetry() Dx = Re * g2r( DLon ); // in m along the equator Dy = Re * g2r( DLat ); - for( j=1; j<=NLat; j++ ) { - for( i=1; i<=NLon; i++ ) { + /* NOTE: optimal would be reading everything in one step, but that does not work because rows and columns are transposed + * (only possible with binary data at all) - use temporary buffer for now (consumes additional memory!) */ + float *buf = new float[ NLat*NLon ]; + ierr = fread( buf, sizeof(float), NLat*NLon, fp ); + + for( i=1; i<=NLon; i++ ) { + for( j=1; j<=NLat; j++ ) { + m = idx(j,i); if( isBin ) - ierr = fread( &fval, sizeof(float), 1, fp ); + fval = buf[ (j-1) * NLon + (i-1) ]; + //ierr = fread( &fval, sizeof(float), 1, fp ); else ierr = fscanf( fp, " %f ", &fval ); Node(m, iTopo) = fval; - - for( k=0; k 15 ) Par.dt = 15; else if( dtLoc > 10 ) Par.dt = 10; @@ -122,7 +134,6 @@ int ewLoadBathymetry() else return Err.post("Bathymetry requires too small time step (<1sec)"); } - // Correct bathymetry for edge artefacts for( i=1; i<=NLon; i++ ) { if( Node(idx(1,i), iD) != 0 && Node(idx(2,i), iD) == 0 ) Node(idx(1,i), iD) = 0.; @@ -139,7 +150,6 @@ int ewLoadBathymetry() R6[j] = cosdeg( LatMin + (j-0.5)*DLat ); } - /* FIXME: change loops */ for( i=1; i<=NLon; i++ ) { for( j=1; j<=NLat; j++ ) { diff --git a/cpu/branches/multi-gpu/ewNode.h b/cpu/branches/multi-gpu/ewNode.h index a62ff96..2535cb1 100644 --- a/cpu/branches/multi-gpu/ewNode.h +++ b/cpu/branches/multi-gpu/ewNode.h @@ -2,6 +2,8 @@ #define EW_NODE_H #include +#include +#include "easywave.h" #define CHKRET( x ) if( (x) == NULL ) return 1; @@ -19,6 +21,8 @@ public: virtual int copyPOIs() = 0; virtual int freeMem() = 0; virtual int run() = 0; + + virtual void initMemory( int index, int val ) = 0; }; class CStructNode : public CNode { @@ -32,6 +36,17 @@ public: return node[idx1][idx2]; } + void initMemory( int index, int val ) { + + int m; + for( int i=1; i<=NLon; i++ ) { + for( int j=1; j<=NLat; j++ ) { + m = idx(j,i); + this->operator ()(m, index) = val; + } + } + } + int mallocMem() { CHKRET( this->node = (Float*) malloc( sizeof(Float) * NLon * NLat) ); @@ -70,6 +85,7 @@ public: int copyFromGPU() { return 0; } int copyIntermediate() { return 0; } int copyPOIs() { return 0; } + }; #pragma pack(push, 1) @@ -95,6 +111,14 @@ public: return ((float**)&d)[idx2][idx1]; } + void *getBuf( int idx ) { return ((float**)&d)[idx]; } + + virtual void initMemory( int index, int val ) { + + memset( getBuf(index), 0, NLat * NLon * sizeof(float) ); + + } + virtual int mallocMem() { CHKRET( this->d = (float*) malloc( sizeof(float) * NLon * NLat ) ); -- GitLab