Commit 24f1486c authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Fixed FutureWarning regarding the use of GeoDataFrame and GeoSeries for data...


Fixed FutureWarning regarding the use of GeoDataFrame and GeoSeries for data without geopandas geometry (switched to plain pandas classes).
Signed-off-by: Daniel Scheffler's avatarDaniel Scheffler <danschef@gfz-potsdam.de>
parent 6c93afa0
Pipeline #15810 passed with stages
in 23 minutes and 8 seconds
...@@ -39,11 +39,11 @@ from pkg_resources import parse_version ...@@ -39,11 +39,11 @@ from pkg_resources import parse_version
import numpy as np import numpy as np
import pandas as pd import pandas as pd
from pandas.io.sql import pandasSQL_builder, SQLTable from pandas.io.sql import pandasSQL_builder, SQLTable, DataFrame, Series
import psycopg2 import psycopg2
from shapely.wkb import loads as wkb_loads from shapely.wkb import loads as wkb_loads
from geoalchemy2.types import Geometry as GEOMETRY from geoalchemy2.types import Geometry as GEOMETRY
from geopandas import GeoDataFrame, GeoSeries from geopandas import GeoDataFrame
from shapely.geometry import Polygon, box, MultiPolygon from shapely.geometry import Polygon, box, MultiPolygon
from sqlalchemy import create_engine from sqlalchemy import create_engine
from sqlalchemy.types import to_instance, TypeEngine from sqlalchemy.types import to_instance, TypeEngine
...@@ -672,7 +672,7 @@ class GMS_JOB(object): ...@@ -672,7 +672,7 @@ class GMS_JOB(object):
# defaults # defaults
self.conn = conn_db self.conn = conn_db
self.dataframe = GeoDataFrame() self.dataframe = DataFrame()
self.scene_counts = {} # set by self.create() self.scene_counts = {} # set by self.create()
self.exists_in_db = False self.exists_in_db = False
...@@ -710,7 +710,7 @@ class GMS_JOB(object): ...@@ -710,7 +710,7 @@ class GMS_JOB(object):
self.analysis_parameter = None self.analysis_parameter = None
def __repr__(self): def __repr__(self):
return 'GMS job:\n\n' + GeoSeries(self.db_entry).to_string() return 'GMS job:\n\n' + Series(self.db_entry).to_string()
@property @property
def virtualsensorid(self): def virtualsensorid(self):
...@@ -789,8 +789,8 @@ class GMS_JOB(object): ...@@ -789,8 +789,8 @@ class GMS_JOB(object):
else: else:
datadict['filenames'] = [datadict['filenames']] datadict['filenames'] = [datadict['filenames']]
# find all duplicates in input datadicts and build common geodataframe # find all duplicates in input datadicts and build common dataframe
all_gdfs = [] all_dfs = []
for datadict in dictlist_data2process: for datadict in dictlist_data2process:
assert isinstance(datadict, dict) assert isinstance(datadict, dict)
...@@ -799,7 +799,7 @@ class GMS_JOB(object): ...@@ -799,7 +799,7 @@ class GMS_JOB(object):
raise NotImplementedError raise NotImplementedError
else: else:
temp_gdf = GeoDataFrame(datadict, columns=['satellite', 'sensor', 'filenames']) temp_df = DataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
if re.search(r'Landsat-7', datadict['satellite'], re.I) and \ if re.search(r'Landsat-7', datadict['satellite'], re.I) and \
re.search(r'ETM+', datadict['sensor'], re.I): re.search(r'ETM+', datadict['sensor'], re.I):
...@@ -809,18 +809,18 @@ class GMS_JOB(object): ...@@ -809,18 +809,18 @@ class GMS_JOB(object):
def get_L7_sensor(fN): def get_L7_sensor(fN):
return LED(fN.split('.tar.gz')[0]).sensorIncSLC return LED(fN.split('.tar.gz')[0]).sensorIncSLC
temp_gdf['sensor'] = list(temp_gdf['filenames'].map(get_L7_sensor)) temp_df['sensor'] = list(temp_df['filenames'].map(get_L7_sensor))
all_gdfs.append(temp_gdf) all_dfs.append(temp_df)
gdf = GeoDataFrame(pd.concat(all_gdfs)).drop_duplicates() df = DataFrame(pd.concat(all_dfs)).drop_duplicates()
gdf.columns = ['satellite', 'sensor', 'filename'] df.columns = ['satellite', 'sensor', 'filename']
# run self.from_dictlist # run self.from_dictlist
sceneInfoGDF = self._get_validated_sceneInfoGDFs(gdf) sceneInfoDF = self._get_validated_sceneInfoDFs(df)
# populate attributes # populate attributes
self._populate_jobAttrs_from_sceneInfoGDF(sceneInfoGDF) self._populate_jobAttrs_from_sceneInfoDF(sceneInfoDF)
return self return self
...@@ -842,7 +842,7 @@ class GMS_JOB(object): ...@@ -842,7 +842,7 @@ class GMS_JOB(object):
list_sceneIDs = list(list_sceneIDs) list_sceneIDs = list(list_sceneIDs)
# query 'satellite', 'sensor', 'filename' from database and summarize in GeoDataFrame # query 'satellite', 'sensor', 'filename' from database and summarize in DataFrame
with psycopg2.connect(self.conn) as conn: with psycopg2.connect(self.conn) as conn:
with conn.cursor() as cursor: with conn.cursor() as cursor:
execute_pgSQL_query(cursor, execute_pgSQL_query(cursor,
...@@ -850,27 +850,27 @@ class GMS_JOB(object): ...@@ -850,27 +850,27 @@ class GMS_JOB(object):
LEFT JOIN satellites on scenes.satelliteid=satellites.id LEFT JOIN satellites on scenes.satelliteid=satellites.id
LEFT JOIN sensors on scenes.sensorid=sensors.id LEFT JOIN sensors on scenes.sensorid=sensors.id
WHERE scenes.id in (%s)""" % ','.join([str(i) for i in list_sceneIDs])) WHERE scenes.id in (%s)""" % ','.join([str(i) for i in list_sceneIDs]))
gdf = GeoDataFrame(cursor.fetchall(), columns=['sceneid', 'satellite', 'sensor', 'filename']) df = DataFrame(cursor.fetchall(), columns=['sceneid', 'satellite', 'sensor', 'filename'])
# FIXME overwriting 'ETM+_SLC_OFF' with 'ETM+' causes _get_validated_sceneInfoGDFs() to fail because the # FIXME overwriting 'ETM+_SLC_OFF' with 'ETM+' causes _get_validated_sceneInfoDFs() to fail because the
# FIXME sensorid for ETM+_SLC_OFF cannot be retrieved # FIXME sensorid for ETM+_SLC_OFF cannot be retrieved
# gdf['sensor'] = gdf['sensor'].apply(lambda senN: senN if senN != 'ETM+_SLC_OFF' else 'ETM+') # df['sensor'] = df['sensor'].apply(lambda senN: senN if senN != 'ETM+_SLC_OFF' else 'ETM+')
gdf = gdf.drop_duplicates() df = df.drop_duplicates()
if gdf.empty: if df.empty:
raise ValueError('None of the given scene IDs could be found in the GeoMultiSens database. ' raise ValueError('None of the given scene IDs could be found in the GeoMultiSens database. '
'Job creation failed.') 'Job creation failed.')
else: else:
missing_IDs = [i for i in list_sceneIDs if i not in gdf['sceneid'].values] missing_IDs = [i for i in list_sceneIDs if i not in df['sceneid'].values]
if missing_IDs: if missing_IDs:
warnings.warn('The following scene IDs could not been found in the GeoMultiSens database: \n%s' warnings.warn('The following scene IDs could not been found in the GeoMultiSens database: \n%s'
% '\n'.join([str(i) for i in missing_IDs])) % '\n'.join([str(i) for i in missing_IDs]))
# run self.from_dictlist # run self.from_dictlist
sceneInfoGDF = self._get_validated_sceneInfoGDFs(gdf) sceneInfoDF = self._get_validated_sceneInfoDFs(df)
# populate attributes # populate attributes
self._populate_jobAttrs_from_sceneInfoGDF(sceneInfoGDF) self._populate_jobAttrs_from_sceneInfoDF(sceneInfoDF)
return self return self
...@@ -920,24 +920,24 @@ class GMS_JOB(object): ...@@ -920,24 +920,24 @@ class GMS_JOB(object):
return self.from_sceneIDlist(list_sceneIDs, virtual_sensor_id, return self.from_sceneIDlist(list_sceneIDs, virtual_sensor_id,
datasetid_spatial_ref=datasetid_spatial_ref, comment=comment) datasetid_spatial_ref=datasetid_spatial_ref, comment=comment)
def _get_validated_sceneInfoGDFs(self, GDF_SatSenFname): def _get_validated_sceneInfoDFs(self, DF_SatSenFname):
# type: (GeoDataFrame) -> GeoDataFrame # type: (DataFrame) -> DataFrame
""" """
:param GDF_SatSenFname: :param DF_SatSenFname:
:return: :return:
""" """
gdf = GDF_SatSenFname df = DF_SatSenFname
# loop through all satellite-sensor combinations and get scene information from database # loop through all satellite-sensor combinations and get scene information from database
all_gdf_recs, all_gdf_miss = [], [] all_df_recs, all_df_miss = [], []
all_satellites, all_sensors = zip( all_satellites, all_sensors = zip(
*[i.split('__') for i in (np.unique(gdf['satellite'] + '__' + gdf['sensor']))]) *[i.split('__') for i in (np.unique(df['satellite'] + '__' + df['sensor']))])
for satellite, sensor in zip(all_satellites, all_sensors): for satellite, sensor in zip(all_satellites, all_sensors):
cur_gdf = gdf.loc[(gdf['satellite'] == satellite) & (gdf['sensor'] == sensor)] cur_df = df.loc[(df['satellite'] == satellite) & (df['sensor'] == sensor)]
filenames = list(cur_gdf['filename']) filenames = list(cur_df['filename'])
satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': satellite}) satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': satellite})
senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': sensor}) senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': sensor})
...@@ -945,55 +945,55 @@ class GMS_JOB(object): ...@@ -945,55 +945,55 @@ class GMS_JOB(object):
assert len(senID_res), "No sensor named '%s' found in database." % sensor assert len(senID_res), "No sensor named '%s' found in database." % sensor
# append sceneid and wkb_hex bounds # append sceneid and wkb_hex bounds
if 'sceneid' in gdf.columns: if 'sceneid' in df.columns:
sceneIDs = list(cur_gdf['sceneid']) sceneIDs = list(cur_df['sceneid'])
conddict = dict(id=sceneIDs, satelliteid=satID_res[0][0], sensorid=senID_res[0][0]) conddict = dict(id=sceneIDs, satelliteid=satID_res[0][0], sensorid=senID_res[0][0])
else: else:
conddict = dict(filename=filenames, satelliteid=satID_res[0][0], sensorid=senID_res[0][0]) conddict = dict(filename=filenames, satelliteid=satID_res[0][0], sensorid=senID_res[0][0])
records = get_info_from_postgreSQLdb( records = get_info_from_postgreSQLdb(
self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'], conddict) self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'], conddict)
records = GeoDataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom']) records = DataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
if 'sceneid' in gdf.columns: if 'sceneid' in df.columns:
del records['sceneid'] del records['sceneid']
cur_gdf = cur_gdf.merge(records, on='filename', how="outer", copy=False) cur_df = cur_df.merge(records, on='filename', how="outer", copy=False)
# separate records with valid matches in database from invalid matches (filename not found in database) # separate records with valid matches in database from invalid matches (filename not found in database)
gdf_recs = cur_gdf[ df_recs = cur_df[
cur_gdf.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later) cur_df.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
gdf_miss = cur_gdf[cur_gdf.sceneid.isnull()] # creates a view df_miss = cur_df[cur_df.sceneid.isnull()] # creates a view
# convert scene ids from floats to integers # convert scene ids from floats to integers
gdf_recs['sceneid'] = list(gdf_recs.sceneid.map(lambda sceneid: int(sceneid))) df_recs['sceneid'] = list(df_recs.sceneid.map(lambda sceneid: int(sceneid)))
# wkb_hex bounds to shapely polygons # wkb_hex bounds to shapely polygons
gdf_recs['polygons'] = list(gdf_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True))) df_recs['polygons'] = list(df_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
all_gdf_recs.append(gdf_recs) all_df_recs.append(df_recs)
all_gdf_miss.append(gdf_miss) all_df_miss.append(df_miss)
# merge all dataframes of all satellite-sensor combinations # merge all dataframes of all satellite-sensor combinations
gdf_recs_compl = GeoDataFrame(pd.concat(all_gdf_recs)) df_recs_compl = DataFrame(pd.concat(all_df_recs))
gdf_miss_compl = GeoDataFrame(pd.concat(all_gdf_miss)) df_miss_compl = DataFrame(pd.concat(all_df_miss))
# populate attributes # populate attributes
if not gdf_miss_compl.empty: if not df_miss_compl.empty:
warnings.warn('The following scene filenames could not been found in the GeoMultiSens database: \n%s' warnings.warn('The following scene filenames could not been found in the GeoMultiSens database: \n%s'
% '\n'.join(list(gdf_miss_compl['filename']))) % '\n'.join(list(df_miss_compl['filename'])))
return gdf_recs_compl return df_recs_compl
def _populate_jobAttrs_from_sceneInfoGDF(self, sceneInfoGDF): def _populate_jobAttrs_from_sceneInfoDF(self, sceneInfoDF):
# type: (GeoDataFrame) -> None # type: (DataFrame) -> None
""" """
:param sceneInfoGDF: :param sceneInfoDF:
:return: :return:
""" """
if not sceneInfoGDF.empty: if not sceneInfoDF.empty:
self.dataframe = sceneInfoGDF self.dataframe = sceneInfoDF
self.sceneids = list(self.dataframe['sceneid']) self.sceneids = list(self.dataframe['sceneid'])
self.statistics = [len(self.sceneids)] + [0] * 8 self.statistics = [len(self.sceneids)] + [0] * 8
self.bounds = box(*MultiPolygon(list(self.dataframe['polygons'])).bounds) self.bounds = box(*MultiPolygon(list(self.dataframe['polygons'])).bounds)
...@@ -1019,21 +1019,21 @@ class GMS_JOB(object): ...@@ -1019,21 +1019,21 @@ class GMS_JOB(object):
records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['satelliteid', 'sensorid', 'filename', records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['satelliteid', 'sensorid', 'filename',
'id', 'acquisitiondate', 'bounds'], 'id', 'acquisitiondate', 'bounds'],
{'id': self.sceneids}) {'id': self.sceneids})
gdf = GeoDataFrame(records, df = DataFrame(records,
columns=['satelliteid', 'sensorid', 'filename', 'sceneid', 'acquisitiondate', 'geom']) columns=['satelliteid', 'sensorid', 'filename', 'sceneid', 'acquisitiondate', 'geom'])
all_satIDs = gdf.satelliteid.unique().tolist() all_satIDs = df.satelliteid.unique().tolist()
all_senIDs = gdf.sensorid.unique().tolist() all_senIDs = df.sensorid.unique().tolist()
satName_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['name'], {'id': all_satIDs}) satName_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['name'], {'id': all_satIDs})
senName_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['name'], {'id': all_senIDs}) senName_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['name'], {'id': all_senIDs})
all_satNames = [i[0] for i in satName_res] all_satNames = [i[0] for i in satName_res]
all_senNames = [i[0] for i in senName_res] all_senNames = [i[0] for i in senName_res]
id_satName_dict = dict(zip(all_satIDs, all_satNames)) id_satName_dict = dict(zip(all_satIDs, all_satNames))
id_senName_dict = dict(zip(all_senIDs, all_senNames)) id_senName_dict = dict(zip(all_senIDs, all_senNames))
gdf.insert(0, 'satellite', list(gdf.satelliteid.map(lambda satID: id_satName_dict[satID]))) df.insert(0, 'satellite', list(df.satelliteid.map(lambda satID: id_satName_dict[satID])))
gdf.insert(1, 'sensor', list(gdf.sensorid.map(lambda senID: id_senName_dict[senID]))) df.insert(1, 'sensor', list(df.sensorid.map(lambda senID: id_senName_dict[senID])))
gdf['polygons'] = list(gdf.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True))) df['polygons'] = list(df.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
self.dataframe = gdf[['satellite', 'sensor', 'filename', 'sceneid', 'acquisitiondate', 'geom', 'polygons']] self.dataframe = df[['satellite', 'sensor', 'filename', 'sceneid', 'acquisitiondate', 'geom', 'polygons']]
return self return self
...@@ -1050,8 +1050,8 @@ class GMS_JOB(object): ...@@ -1050,8 +1050,8 @@ class GMS_JOB(object):
self.update_db_entry() self.update_db_entry()
def _get_dataframe(self, datadict): # FIXME deprecated def _get_dataframe(self, datadict): # FIXME deprecated
gdf = GeoDataFrame(datadict, columns=['satellite', 'sensor', 'filenames']) df = DataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
gdf.columns = ['satellite', 'sensor', 'filename'] df.columns = ['satellite', 'sensor', 'filename']
satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': datadict['satellite']}) satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': datadict['satellite']})
senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': datadict['sensor']}) senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': datadict['sensor']})
...@@ -1062,20 +1062,20 @@ class GMS_JOB(object): ...@@ -1062,20 +1062,20 @@ class GMS_JOB(object):
records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'], records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'],
{'filename': datadict['filenames'], {'filename': datadict['filenames'],
'satelliteid': satID_res[0][0], 'sensorid': senID_res[0][0]}) 'satelliteid': satID_res[0][0], 'sensorid': senID_res[0][0]})
records = GeoDataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom']) records = DataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
gdf = gdf.merge(records, on='filename', how="outer") df = df.merge(records, on='filename', how="outer")
# separate records with valid matches in database from invalid matches (filename not found in database) # separate records with valid matches in database from invalid matches (filename not found in database)
gdf_recs = gdf[gdf.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later) df_recs = df[df.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
gdf_miss = gdf[gdf.sceneid.isnull()] # creates a view df_miss = df[df.sceneid.isnull()] # creates a view
# convert scene ids from floats to integers # convert scene ids from floats to integers
gdf_recs['sceneid'] = list(gdf_recs.sceneid.map(lambda sceneid: int(sceneid))) df_recs['sceneid'] = list(df_recs.sceneid.map(lambda sceneid: int(sceneid)))
# wkb_hex bounds to shapely polygons # wkb_hex bounds to shapely polygons
gdf_recs['polygons'] = list(gdf_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True))) df_recs['polygons'] = list(df_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
return gdf_recs, gdf_miss return df_recs, df_miss
def create(self): def create(self):
# type: () -> int # type: () -> int
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment