Commit 55f1e69f authored by Daniel Eggert's avatar Daniel Eggert
Browse files

removed refactored scihub-api lib

parent 9a072dd3
......@@ -119,7 +119,7 @@
<dependency>
<groupId>de.potsdam.gfz</groupId>
<artifactId>scihub-api</artifactId>
<version>0.0.1-SNAPSHOT</version>
<version>1.0.0</version>
<exclusions>
<exclusion>
<artifactId>slf4j-simple</artifactId>
......
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>de.potsdam.gfz</groupId>
<artifactId>scihub-api</artifactId>
<version>0.0.1-SNAPSHOT</version>
<packaging>jar</packaging>
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<java.version>1.8</java.version>
</properties>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
<version>3.1</version>
<configuration>
<source>${java.version}</source>
<target>${java.version}</target>
</configuration>
</plugin>
</plugins>
</build>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>RELEASE</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-simple</artifactId>
<version>1.7.25</version>
</dependency>
<dependency>
<groupId>org.dom4j</groupId>
<artifactId>dom4j</artifactId>
<version>2.1.0</version>
</dependency>
</dependencies>
</project>
/**
*
*/
package de.potsdam.gfz.scihubapi;
import java.io.BufferedInputStream;
import java.io.ByteArrayInputStream;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.net.HttpURLConnection;
import java.net.URI;
import java.net.URL;
import java.util.List;
import java.util.Map;
import java.util.Map.Entry;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
/**
* @author Daniel Eggert (daniel.eggert@gfz-potsdam.de)
*
*/
public class UrlUtils {
private static final Logger LOG = LoggerFactory.getLogger(UrlUtils.class);
/**
* Establishes a connection to the given url, appends the provided credentials and returns the communication stream on success.
* In case the connection attempt fails, an IOException is thrown.
*
* @param url
* @return
*/
public static InputStream getStreamWithException(URL url, String base64Credentials) throws IOException {
try {
// open channel to url
HttpURLConnection con = (HttpURLConnection) url.openConnection();
// set timeouts 2min
con.setConnectTimeout(120000);
con.setReadTimeout(120000);
// add credentials to header
// String basicAuth = "Basic " + javax.xml.bind.DatatypeConverter.printBase64Binary(USER_PASS.getBytes());
if (base64Credentials != null && !base64Credentials.isEmpty()) {
con.setRequestProperty("Authorization", "Basic " + base64Credentials);
con.setRequestProperty("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8");
}
// establish connection, send get request (including the credentials) and extract the code from the response
con.connect();
int responsecode = con.getResponseCode();
if (responsecode != 200) {
// TODO: back off and try again
throw new IOException("Error connecting to " + url.toExternalForm() + "\nMessage (Code " + responsecode + "): " + con.getResponseMessage());
}
// successfully established connection - return stream
// System.out.println(url.toExternalForm() + ": connected");
return con.getInputStream();
} catch (Exception e) {
throw new IOException("Error establishing connection to " + url.toExternalForm(), e);
}
}
/**
* Same as {@link UrlUtils#getStreamWithException(URL, String)}.<br>
* In case of an exception the error message is written to the log and <code>null</code> is returned.
*
* @see UrlUtils#getStreamWithException(URL, String)
* @param url
* @param base64Credentials
* @return
*/
public static InputStream getStream(URL url, String base64Credentials, int numRetries) {
InputStream stream = null;
int tryCount = 0;
Exception ex = null;
int backoffMillis = 500;
while (++tryCount <= numRetries && stream == null) {
try {
return getStreamWithException(url, base64Credentials);
} catch (Exception e) {
ex = e;
try {
Thread.sleep(backoffMillis);
} catch (InterruptedException e1) {
}
backoffMillis += backoffMillis;
}
}
if (stream == null) {
String errMsg = "Connection error, cause: ";
if (ex != null) {
errMsg += ex.getMessage();
if (ex.getCause() != null) {
errMsg += " (cause: " + ex.getCause().getMessage() + ")";
}
} else {
errMsg += "unknown";
}
LOG.error(errMsg);
}
return stream;
}
/**
* Creates a valid URL from the given parameters. Throws an {@link IllegalArgumentException} when no valid url can be generated.
*
* @param prot
* @param host
* @param path
* @param query
* @return
* @throws IllegalArgumentException
*/
public static URL getValidatedUrlWithException(String prot, String host, String path, String query) throws IllegalArgumentException {
// make url valid by building an uri and converting to url afterwards
try {
URI uri = new URI(prot, null, host, -1, path, query, null);
return uri.toURL();
} catch (Exception e) {
throw new IllegalArgumentException("error generating a valid url from given parameters", e);
}
}
/**
* Same as {@link UrlUtils#getValidatedUrlWithException(String, String, String, String)}.<br>
* In case of an exception the error message is written to the log and <code>null</code> is returned.
*
* @see UrlUtils#getValidatedUrlWithException(String, String, String, String)
* @param prot
* @param host
* @param path
* @param query
* @return
*/
public static URL getValidatedUrl(String prot, String host, String path, String query) {
try {
return getValidatedUrlWithException(prot, host, path, query);
} catch (Exception e) {
LOG.error("URL error", e);
}
return null;
}
/**
* Creates a valid URL from the given url string. Throws an {@link IllegalArgumentException} when no valid url can be generated.
*
* @param urlString
* @return
* @throws IllegalArgumentException
*/
public static URL getValidatedUrlWithException(String urlString) throws IllegalArgumentException {
// make url valid by building an uri and converting to url afterwards
try {
URL url = new URL(urlString);
// encode properly
URI uri = new URI(url.getProtocol(), null, url.getHost(), url.getPort(), url.getPath(), url.getQuery(), null);
return uri.toURL();
} catch (Exception e) {
throw new IllegalArgumentException("error generating a valid url from given parameters", e);
}
}
/**
* Same as {@link UrlUtils#getValidatedUrlWithException(String)}.<br>
* In case of an exception the error message is written to the log and <code>null</code> is returned.
*
* @see UrlUtils#getValidatedUrlWithException(String)
* @param urlString
* @return
*/
public static URL getValidatedUrl(String urlString) {
try {
return getValidatedUrlWithException(urlString);
} catch (Exception e) {
}
return null;
}
/**
* Downloads the data from the given inputstreams and returns a {@link ByteArrayInputStream} holding the entire data.
*
* @param stream
* @return
*/
public static ByteArrayInputStream download(InputStream stream) {
BufferedInputStream bis = new BufferedInputStream(stream);
ByteArrayOutputStream baos = new ByteArrayOutputStream();
byte[] buf = new byte[4 * 1024];
try {
int read = bis.read(buf);
while (read > -1) {
baos.write(buf, 0, read);
read = bis.read(buf);
}
} catch (Exception e) {
LOG.error("Error downloading from stream", e);
}
return new ByteArrayInputStream(baos.toByteArray());
}
public static void printHeaderFields(Map<String, List<String>> headerFieldMap) {
for (Entry<String, List<String>> e : headerFieldMap.entrySet()) {
System.out.print(e.getKey() + ":");
for (String value : e.getValue()) {
System.out.print(value + "\t");
}
System.out.println();
}
}
}
/**
*
*/
package de.potsdam.gfz.scihubapi.opendata;
import java.util.ArrayList;
import java.util.List;
import org.xml.sax.Attributes;
import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
* @author Daniel Eggert (daniel.eggert@gfz-potsdam.de)
*
*/
public class GranuleListResponseHandler extends DefaultHandler {
private List<String> granules = null;
private boolean inEntry = false;
private boolean inEntryTitle = false;
private boolean proceedToNextEntry = false;
private static final String XML_TAG_NAME_ENTRY = "entry";
private static final String XML_TAG_NAME_TITLE = "title";
/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#startElement(java.lang.String, java.lang.String, java.lang.String, org.xml.sax.Attributes)
*/
@Override
public void startElement(String uri, String localName, String qName, Attributes attributes) throws SAXException {
if (proceedToNextEntry) {
return;
}
if (inEntry) {
// we are within an entry - check for title tag
inEntryTitle = qName.equals(XML_TAG_NAME_TITLE);
} else {
// check whether we are entering an entry
inEntry = qName.equals(XML_TAG_NAME_ENTRY);
}
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#endElement(java.lang.String, java.lang.String, java.lang.String)
*/
@Override
public void endElement(String uri, String localName, String qName) throws SAXException {
if (qName.equals(XML_TAG_NAME_ENTRY)) {
inEntry = false;
proceedToNextEntry = false;
}
}
/*
* (non-Javadoc)
*
* @see org.xml.sax.helpers.DefaultHandler#characters(char[], int, int)
*/
@Override
public void characters(char[] ch, int start, int length) throws SAXException {
if (inEntryTitle) {
if (granules == null) {
granules = new ArrayList<>();
}
granules.add(new String(ch, start, length));
proceedToNextEntry = true;
inEntryTitle = false;
}
}
public List<String> getGranules() {
return granules;
}
}
/**
*
*/
package de.potsdam.gfz.scihubapi.opendata;
import java.io.InputStream;
import java.net.URL;
import java.util.ArrayList;
import java.util.Deque;
import java.util.List;
import java.util.concurrent.ConcurrentLinkedDeque;
import javax.xml.parsers.DocumentBuilder;
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.ParserConfigurationException;
import javax.xml.parsers.SAXParser;
import javax.xml.parsers.SAXParserFactory;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import org.xml.sax.XMLReader;
import de.potsdam.gfz.scihubapi.UrlUtils;
/**
* @author Daniel Eggert (daniel.eggert@gfz-potsdam.de)
*
*/
public class SciHubOpenData {
private static final Logger LOG = LoggerFactory.getLogger(SciHubOpenData.class);
// https://scihub.copernicus.eu/dhus/odata/v1/Products('283adccc-f786-42d8-a4b0-c30739a71238')/Nodes('S2A_OPER_PRD_MSIL1C_PDMC_20160114T043657_R073_V20160113T235206_20160113T235206.SAFE')/Nodes('GRANULE')/Nodes
// https://scihub.copernicus.eu/dhus/odata/v1/Products(<title>)/Nodes(<filename>)/Nodes('GRANULE')/Nodes
// URI/URL open data constants
private static final String ODATA_PROTOCOL_SCHEME = "https";
private static final String ODATA_HOST = "scihub.copernicus.eu";
private static final String ODATA_PATH_PREFIX = "/dhus/odata/v1/";
private static final String ODATA_PATH_PRODUCTS_PREFIX = "Products('";
private static final String ODATA_PATH_NODES_PREFIX = "Nodes('";
private static final String ODATA_PATH_DELIMITER = "')/";
private static final String AWS_BASE_URL = "http://sentinel-s2-l1c.s3.amazonaws.com/";
private static final String AWS_PRODUCT_PATH = "products/";
private static final String AWS_METADATA_FILE = "metadata.xml";
private final String base64Credentials;
private final Deque<SAXParser> PARSER_POOL = new ConcurrentLinkedDeque<>();
private final Deque<DocumentBuilder> DOC_BUILDER_POOL = new ConcurrentLinkedDeque<>();
// private SAXParser saxParser;
// private DocumentBuilder docBuilder;
/**
*
*/
public SciHubOpenData(String username, String password) {
String userPass = username + ":" + password;
base64Credentials = javax.xml.bind.DatatypeConverter.printBase64Binary(userPass.getBytes());
}
// init parser
// try {
// saxParser = SAXParserFactory.newInstance().newSAXParser();
// docBuilder = DocumentBuilderFactory.newInstance().newDocumentBuilder();
// } catch (Exception e) {
// e.printStackTrace();
// }
private DocumentBuilder getPooledDocBuilder() {
if (DOC_BUILDER_POOL.isEmpty()) {
try {
return DocumentBuilderFactory.newInstance().newDocumentBuilder();
} catch (ParserConfigurationException e) {
e.printStackTrace();
}
return null;
} else {
return DOC_BUILDER_POOL.pop();
}
}
private void returnDocBuilderToPool(DocumentBuilder docBuilder) {
DOC_BUILDER_POOL.push(docBuilder);
}
private SAXParser getPooledSaxParser() {
if (PARSER_POOL.isEmpty()) {
try {
return SAXParserFactory.newInstance().newSAXParser();
} catch (Exception e) {
e.printStackTrace();
}
return null;
} else {
return PARSER_POOL.pop();
}
}
private void returnSaxParserToPool(SAXParser parser) {
PARSER_POOL.push(parser);
}
public List<String> getGranuleIdsFromAWS(String filename) {
List<String> granules = null;
// build url to s2-aws metadata.xml
// products/[year]/[month]/[day]/[product name]
StringBuilder sb = new StringBuilder(AWS_BASE_URL);
sb.append(AWS_PRODUCT_PATH);
// parse given filename and extract product name, and date
// S2A_OPER_PRD_MSIL1C_PDMC_20160101T045837_R030_V20160101T000037_20160101T000037
// stripping off the file extension will give us the product name
String name = null;
int extensionIdx = filename.lastIndexOf(".");
if (extensionIdx > 0) {
name = filename.substring(0, extensionIdx);
} else {
name = filename;
}
// no extract the year, month and date from product name
String year = name.substring(25, 29);
String month = name.substring(29, 31);
String day = name.substring(31, 33);
// remove trailing zeros
if (month.startsWith("0")) {
month = month.substring(1, 2);
}
if (day.startsWith("0")) {
day = day.substring(1, 2);
}
// now assemble the complete url
sb.append(year);
sb.append("/");
sb.append(month);
sb.append("/");
sb.append(day);
sb.append("/");
sb.append(name);
sb.append("/");
sb.append(AWS_METADATA_FILE);
DocumentBuilder docBuilder = null;
try {
// connect to url, download and parse xml file
InputStream stream = UrlUtils.getStreamWithException(new URL(sb.toString()), null);
stream = UrlUtils.download(stream);
docBuilder = getPooledDocBuilder();
Document doc = docBuilder.parse(stream);
// extract the granule information
NodeList nl = doc.getElementsByTagName("Granules");
final int numGranules = nl.getLength();
granules = new ArrayList<>(numGranules);
for (int i = 0; i < numGranules; ++i) {
Element e = (Element) nl.item(i);
granules.add(e.getAttribute("granuleIdentifier"));
}
} catch (Exception e) {
if (!e.getCause().getMessage().contains("404")) {
e.printStackTrace();
}
}
if (docBuilder != null) {
returnDocBuilderToPool(docBuilder);
}
return granules;
}
public List<String> getGranuleIds(String id, String filename) {
// try aws first
List<String> granules = getGranuleIdsFromAWS(filename);
if (granules != null) {
return granules;
}
StringBuilder sb = new StringBuilder(ODATA_PATH_PREFIX);
// set product id
sb.append(ODATA_PATH_PRODUCTS_PREFIX);
sb.append(id);
sb.append(ODATA_PATH_DELIMITER);
// set filename
sb.append(ODATA_PATH_NODES_PREFIX);
sb.append(filename);
sb.append(ODATA_PATH_DELIMITER);
// set granule node
sb.append(ODATA_PATH_NODES_PREFIX);
sb.append("GRANULE");
sb.append(ODATA_PATH_DELIMITER);
// set folder listing nodes
sb.append("Nodes");
// build valid url
URL url = UrlUtils.getValidatedUrl(ODATA_PROTOCOL_SCHEME, ODATA_HOST, sb.toString(), null);
// URL url = UrlUtils.getValidatedUrl(
// "https://scihub.copernicus.eu/dhus/odata/v1/Products('283adccc-f786-42d8-a4b0-c30739a71238')/Nodes('S2A_OPER_PRD_MSIL1C_PDMC_20160114T043657_R073_V20160113T235206_20160113T235206.SAFE')/Nodes('GRANULE')/Nodes");
long backoffMillis = 9375;
long maxBackoff = 600000;
SAXParser saxParser = null;
if (url != null) {
// valid url - download data
InputStream stream = null;