Commit 70325b10 authored by Daniel Eggert's avatar Daniel Eggert
Browse files

bugfixes

- fixed missing bounds, by simply omitting those scenes
- fixed duplicate S2 scenes, by also comparing filenames
parent 3b186651
......@@ -85,6 +85,16 @@ public class MetadatacrawlerModule {
}
}
private static class DBSceneSets {
private Set<String> entityids = new HashSet<>();
private Set<String> filenames = new HashSet<>();
public void clear() {
entityids.clear();
filenames.clear();
}
}
/**
* Returns a set of scene entity ids of the existing scene data in the gms database
*
......@@ -92,11 +102,11 @@ public class MetadatacrawlerModule {
* @return
* @throws Exception
*/
private static Set<String> getExistingScenes(DatasetMetadata dataset, Timestamp latestAcquisitionDate) throws Exception {
private static DBSceneSets getExistingScenes(DatasetMetadata dataset, Timestamp latestAcquisitionDate) throws Exception {
GeoMultiSensMetadataCrawler gmsCrawler = new GeoMultiSensMetadataCrawler();
BlockingQueue<SceneMetadata> queue = new LinkedBlockingQueue<>();
Set<String> entityIdSet = new HashSet<>();
DBSceneSets dbScenes = new DBSceneSets();
gmsCrawler.startCrawling(null, null, queue, dataset);
......@@ -106,7 +116,11 @@ public class MetadatacrawlerModule {
continue;
}
entityIdSet.add(scene.entityid);
dbScenes.entityids.add(scene.entityid);
if (scene.filename != null && !scene.filename.isEmpty()) {
// ignore file extension
dbScenes.filenames.add(scene.filename.substring(0, scene.filename.lastIndexOf(".")));
}
// check date
if (latestAcquisitionDate.before(scene.acquisitiondate)) {
......@@ -114,7 +128,7 @@ public class MetadatacrawlerModule {
}
}
return entityIdSet;
return dbScenes;
}
......@@ -129,9 +143,9 @@ public class MetadatacrawlerModule {
System.out.print("getting existing scenes for dataset: ");
Timestamp latestDateInDB = new Timestamp(Timestamp.MIN);
Set<String> dbScenes = getExistingScenes(dataset, latestDateInDB);
DBSceneSets dbScenes = getExistingScenes(dataset, latestDateInDB);
System.out.println("done (number of scenes in DB:" + dbScenes.size() + ")");
System.out.println("done (number of scenes in DB:" + dbScenes.entityids.size() + ")");
System.out.println("latest scene timestamp in DB: " + latestDateInDB.toISO8601String());
BlockingQueue<SceneMetadata> queue = new LinkedBlockingQueue<>();
......@@ -161,12 +175,25 @@ public class MetadatacrawlerModule {
continue;
}
if (dbScenes.contains(scene.entityid)) {
if (dbScenes.entityids.contains(scene.entityid)) {
// we already have this scene information - skip
continue;
}
if (scene.entityid == null || scene.acquisitiondate == null) {
if (dataset.id == 249) {
// the controller will alter the entityid for downloaded scenes - so we need to check the corresponding filename
if (scene.filename != null && !scene.filename.isEmpty()) {
// check file name (ignore file extension)
if (dbScenes.filenames.contains(scene.filename.substring(0, scene.filename.lastIndexOf(".")))) {
// we already have this scene information - skip
continue;
}
}
}
if (scene.entityid == null || scene.acquisitiondate == null || scene.bounds == null) {
// skip invalid scenemetadata
continue;
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment