Commit a498117a authored by Daniel Eggert's avatar Daniel Eggert
Browse files

revised cloudcover update mechanism

parent a36b8e9e
......@@ -12,36 +12,38 @@ import de.potsdam.gfz.gms.database.SceneDatabase;
public class LandsatCollection1CsvUpdate {
private static class Scene {
long id;
String entityId;
long id;
String entityId;
double cloudcover;
Scene(long id, String entityid) {
Scene(long id, String entityid, double cloudcover) {
this.id = id;
this.entityId = entityid;
this.cloudcover = cloudcover;
}
}
// browseAvailable,browseURL,sceneID,LANDSAT_PRODUCT_ID,sensor,acquisitionDate,dateUpdated,path,row,upperLeftCornerLatitude,upperLeftCornerLongitude,upperRightCornerLatitude,upperRightCornerLongitude,lowerLeftCornerLatitude,lowerLeftCornerLongitude,lowerRightCornerLatitude,lowerRightCornerLongitude,sceneCenterLatitude,sceneCenterLongitude,cloudCover,cloudCoverFull,dayOrNight,sunElevation,sunAzimuth,receivingStation,sceneStartTime,sceneStopTime,imageQuality1,DATA_TYPE_L1,cartURL,ROLL_ANGLE,GEOMETRIC_RMSE_MODEL,GEOMETRIC_RMSE_MODEL_X,GEOMETRIC_RMSE_MODEL_Y,FULL_PARTIAL_SCENE,NADIR_OFFNADIR,PROCESSING_SOFTWARE_VERSION,CPF_NAME,RLUT_FILE_NAME,BPF_NAME_OLI,BPF_NAME_TIRS,GROUND_CONTROL_POINTS_MODEL,GROUND_CONTROL_POINTS_VERSION,DATE_L1_GENERATED,TIRS_SSM_MODEL,COLLECTION_NUMBER,COLLECTION_CATEGORY,CLOUD_COVER_LAND
public static void main(String[] args) throws Throwable {
// get all entityids for L8C1 from database as well as from the csv file
// get all entityids from database as well as from the csv file
SceneDatabase db = SceneDatabase.getInstance();
PreparedStatement updatePst = db.prepareCustomStatement("update scenes set entityid=? where id=?;");
PreparedStatement updatePst = db.prepareCustomStatement("update scenes set entityid=?, cloudcover=? where id=?;");
ResultSet rs = db.placeCustomQuery(
"select id, entityid from scenes where datasetid=250 and proc_level<'DOWNLOADED'::proc_level;");
ResultSet rs = db.placeCustomQuery("select id, entityid, cloudcover from scenes where datasetid=252 and proc_level<'DOWNLOADED'::proc_level;");
Map<String, Scene> dbEntityIds = new HashMap<>();
while (rs.next()) {
long id = rs.getLong(1);
String entityid = rs.getString(2);
dbEntityIds.put(entityid.substring(0, entityid.length() - 2), new Scene(id, entityid));
double cloudcover = rs.getDouble(3);
dbEntityIds.put(entityid.substring(0, entityid.length() - 2), new Scene(id, entityid, cloudcover));
}
System.out.println(dbEntityIds.size() + " scenes in db");
Scanner s = new Scanner(Paths.get("/home/eggert/Downloads/LANDSAT_8_C1.csv"));
Scanner s = new Scanner(Paths.get("/home/eggi/Downloads/LANDSAT_TM_C1.csv"));
// skip header
s.nextLine();
......@@ -50,15 +52,22 @@ public class LandsatCollection1CsvUpdate {
String[] lineSplit = s.nextLine().split(",");
String entityid = lineSplit[2];
double cloudCover = Double.parseDouble(lineSplit[lineSplit.length - 1]);
String mapKey = entityid.substring(0, entityid.length() - 2);
if (dbEntityIds.containsKey(mapKey)) {
// match found - check entire entityId
Scene scene = dbEntityIds.remove(mapKey);
if (!scene.entityId.equals(entityid)) {
if (scene.cloudcover < 0 || Double.isNaN(scene.cloudcover)) {
scene.cloudcover = cloudCover;
}
// entityids differ in last two digits - update db
updatePst.setLong(2, scene.id);
updatePst.setString(1, entityid);
updatePst.setDouble(2, scene.cloudcover);
updatePst.setLong(3, scene.id);
updatePst.executeUpdate();
// db.commit();
}
......
......@@ -6,6 +6,7 @@ package de.potsdam.gfz.gms.metadatacrawler;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.Types;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.List;
......@@ -47,7 +48,7 @@ public class MetadatacrawlerModule {
// System.out.println(dm.name + " \t " + dm.description + " \t " + dm.imageType + " \t ");
// }
// }
printProvidedDatasets(usgs);
// printProvidedDatasets(usgs);
// DatasetMetadata landsat7 = new DatasetMetadata();
// landsat7.id = 112;
......@@ -66,7 +67,7 @@ public class MetadatacrawlerModule {
DatasetMetadata s2 = new DatasetMetadata();
s2.id = 249;
s2.name = "Sentinel-2";
// crawlerTasks.put(scihub, Arrays.asList(s2));
crawlerTasks.put(scihub, Arrays.asList(s2));
final boolean queryLatestOnly = true;
......
package de.potsdam.gfz.gms.metadatacrawler.cloudcover;
import java.nio.file.Paths;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.util.HashMap;
import java.util.Map;
import java.util.Scanner;
import de.potsdam.gfz.gms.database.SceneDatabase;
public class CloudCoverUpdateFromCSV {
public static void main(String[] args) throws Throwable {
// get all entityids from database as well as from the csv file
SceneDatabase db = SceneDatabase.getInstance();
PreparedStatement updatePst = db.prepareCustomStatement("update scenes set cloudcover=? where id=?;");
ResultSet rs = db.placeCustomQuery("select entityid, id from scenes where datasetid=251 and (cloudcover='NaN' or cloudcover<0);");
Map<String, Long> dbScenes = new HashMap<>();
while (rs.next()) {
dbScenes.put(rs.getString(1), rs.getLong(2));
}
System.out.println(dbScenes.size() + " scenes in db");
Scanner s = new Scanner(Paths.get("/home/eggi/Downloads/LANDSAT_ETM_C1.csv"));
// skip header
s.nextLine();
while (s.hasNext()) {
String[] lineSplit = s.nextLine().split(",");
String entityid = lineSplit[2];
double cloudCover = Double.parseDouble(lineSplit[lineSplit.length - 1]);
if (cloudCover >= 0 && dbScenes.containsKey(entityid)) {
// match found
updatePst.setDouble(1, cloudCover);
updatePst.setLong(2, dbScenes.remove(entityid));
updatePst.executeUpdate();
if (dbScenes.size() % 1000 == 0) {
db.commit();
System.out.println(dbScenes.size() + " left.");
}
}
}
s.close();
db.commit();
System.out.println(dbScenes.size() + " unmatched scenes in db");
}
}
package de.potsdam.gfz.gms.metadatacrawler;
package de.potsdam.gfz.gms.metadatacrawler.cloudcover;
import java.io.BufferedWriter;
import java.io.FileWriter;
......@@ -30,7 +30,7 @@ import org.w3c.dom.Document;
import de.potsdam.gfz.gms.database.SceneDatabase;
public class CloudCoverUpdate {
public class CloudCoverUpdateFromMetadataUrl {
private static class Scene {
long id;
......@@ -43,11 +43,12 @@ public class CloudCoverUpdate {
}
}
private static final Scene FINAL_SCENE = new Scene(-1, -1);
private static final Scene FINAL_SCENE = new Scene(-1, -1);
private static final Random rand = new Random();
public static void main(String[] args) throws Exception {
// landsat_tm(108), landsat_mss(107), landsat_8(104), landsat_7(112)
short datasetids[] = new short[] { 250 }; // , 108, 107, 112 };
short datasetids[] = new short[] { 251 }; // , 108, 107, 112 };
// Timestamp start = Timestamp.MIN;
// Timestamp end = Timestamp.now();
// Timestamp end = start.plus(20, Timestamp.TimeUnits.DAYS);
......@@ -100,23 +101,6 @@ public class CloudCoverUpdate {
PreparedStatement pst = db.prepareCustomStatement("SELECT id, metadataurl FROM scenes WHERE datasetid=? AND cloudcover<0;");
for (short datasetid : datasetids) {
// buffer.clear();
// db.getCompactSceneMetaDataListFromIndex(datasetid, start, end, seasonCode, minCloudcover, maxCloudcover, polygon, buffer);
// buffer.flip();
/*
* Decode as follows: [short] datasetid [int] number of scenes list of scenes, with each scene: [int] sceneid [long] timestamp [byte] cloudcover
*/
// short returnedDatasetid = buffer.getShort();
// if (returnedDatasetid != datasetid) {
// System.err.println("dataset ids dont match");
// System.exit(1);
// }
// int numScenes = buffer.getInt();
final Random rand = new Random();
System.out.println("processing dataset " + datasetid);
List<Future<?>> fList = new ArrayList<>();
......@@ -135,88 +119,21 @@ public class CloudCoverUpdate {
public void run() {
InputStream stream = null;
HttpURLConnection con = null;
try {
// random sleep
Thread.sleep(50 + rand.nextInt(150));
// establish connection
URL url = new URL(urlStr);
con = (HttpURLConnection) url.openConnection();
con.setConnectTimeout(5000);
int code = 0;
// check response code
try {
con.connect();
code = con.getResponseCode();
} catch (Exception e) {
}
stream = establishConnection(urlStr, true);
if (code != 200) {
// connection error - backoff and retry
// random sleep
Thread.sleep(15000l + rand.nextInt(10000));
// re-establish connection
url = new URL(urlStr);
con = (HttpURLConnection) url.openConnection();
if (con.getResponseCode() != 200) {
// connection error - again - skip scene
System.err.println("Connection error: " + con.getResponseCode() + " - " + con.getResponseMessage());
writer.write(id + "\t" + url + "\t" + con.getResponseMessage() + "\n");
writer.flush();
con.disconnect();
return;
}
}
if (stream != null) {
// connection successfully established - open stream
try {
stream = url.openStream();
} catch (Exception e) {
// stream error - backoff and retry
// random sleep
Thread.sleep(15000l + rand.nextInt(10000));
// re-establish connection
url = new URL(urlStr);
con = (HttpURLConnection) url.openConnection();
if (con.getResponseCode() != 200) {
System.err.println("Connection error after stream error: " + con.getResponseCode() + " - " + con.getResponseMessage());
writer.write(id + "\t" + url + "\t" + con.getResponseMessage() + "\n");
writer.flush();
if (stream != null) {
stream.close();
}
return;
}
try {
stream = url.openStream();
} catch (Exception ex) {
System.err.println("Stream error: " + con.getResponseCode() + " - " + con.getResponseMessage() + "\t" + ex.getMessage());
writer.write(id + "\t" + url + "\t" + con.getResponseMessage() + "\n");
writer.flush();
if (stream != null) {
stream.close();
}
con.disconnect();
return;
final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final XPath xpath = XPathFactory.newInstance().newXPath();
final XPathExpression expr = xpath.compile("//*[contains(@name,'Cloud Cover')]/*/text()");
Document doc = builder.parse(stream);
double cc = (Double) expr.evaluate(doc, XPathConstants.NUMBER);
if (cc < 0) {
cc = Double.NaN;
}
sceneQueue.add(new Scene(id, cc));
}
final DocumentBuilder builder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
final XPath xpath = XPathFactory.newInstance().newXPath();
final XPathExpression expr = xpath.compile("//*[contains(@name,'Cloud Cover')]/*/text()");
Document doc = builder.parse(stream);
double cc = (Double) expr.evaluate(doc, XPathConstants.NUMBER);
if (cc < 0) {
cc = Double.NaN;
}
sceneQueue.add(new Scene(id, cc));
} catch (Exception e) {
e.printStackTrace();
}
......@@ -227,9 +144,9 @@ public class CloudCoverUpdate {
} catch (IOException e) {
}
}
if (con != null) {
con.disconnect();
}
// if (con != null) {
// con.disconnect();
// }
}
});
......@@ -260,4 +177,60 @@ public class CloudCoverUpdate {
}
private static InputStream establishConnection(String urlStr, boolean retry) {
// random sleep
try {
Thread.sleep(50 + rand.nextInt(150));
} catch (InterruptedException e1) {
}
InputStream stream = null;
try {
// establish connection
URL url = new URL(urlStr);
HttpURLConnection con = (HttpURLConnection) url.openConnection();
con.setConnectTimeout(5000);
// connect and check response code
try {
con.connect();
} catch (Exception e) {
}
switch (con.getResponseCode()) {
case HttpURLConnection.HTTP_MOVED_PERM:
case HttpURLConnection.HTTP_MOVED_TEMP:
stream = establishConnection(con.getHeaderField("Location"), true);
break;
case HttpURLConnection.HTTP_OK:
try {
stream = con.getInputStream();
} catch (Throwable t) {
stream = null;
}
break;
default:
break;
}
} catch (Throwable t) {
stream = null;
}
if (stream == null && retry) {
// retry
// connection error - backoff and retry
// random sleep
try {
Thread.sleep(15000l + rand.nextInt(10000));
} catch (InterruptedException e) {
}
stream = establishConnection(urlStr, false);
}
return stream;
}
}
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment