Commit 24f1486c authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Fixed FutureWarning regarding the use of GeoDataFrame and GeoSeries for data...


Fixed FutureWarning regarding the use of GeoDataFrame and GeoSeries for data without geopandas geometry (switched to plain pandas classes).
Signed-off-by: Daniel Scheffler's avatarDaniel Scheffler <danschef@gfz-potsdam.de>
parent 6c93afa0
Pipeline #15810 passed with stages
in 23 minutes and 8 seconds
......@@ -39,11 +39,11 @@ from pkg_resources import parse_version
import numpy as np
import pandas as pd
from pandas.io.sql import pandasSQL_builder, SQLTable
from pandas.io.sql import pandasSQL_builder, SQLTable, DataFrame, Series
import psycopg2
from shapely.wkb import loads as wkb_loads
from geoalchemy2.types import Geometry as GEOMETRY
from geopandas import GeoDataFrame, GeoSeries
from geopandas import GeoDataFrame
from shapely.geometry import Polygon, box, MultiPolygon
from sqlalchemy import create_engine
from sqlalchemy.types import to_instance, TypeEngine
......@@ -672,7 +672,7 @@ class GMS_JOB(object):
# defaults
self.conn = conn_db
self.dataframe = GeoDataFrame()
self.dataframe = DataFrame()
self.scene_counts = {} # set by self.create()
self.exists_in_db = False
......@@ -710,7 +710,7 @@ class GMS_JOB(object):
self.analysis_parameter = None
def __repr__(self):
return 'GMS job:\n\n' + GeoSeries(self.db_entry).to_string()
return 'GMS job:\n\n' + Series(self.db_entry).to_string()
@property
def virtualsensorid(self):
......@@ -789,8 +789,8 @@ class GMS_JOB(object):
else:
datadict['filenames'] = [datadict['filenames']]
# find all duplicates in input datadicts and build common geodataframe
all_gdfs = []
# find all duplicates in input datadicts and build common dataframe
all_dfs = []
for datadict in dictlist_data2process:
assert isinstance(datadict, dict)
......@@ -799,7 +799,7 @@ class GMS_JOB(object):
raise NotImplementedError
else:
temp_gdf = GeoDataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
temp_df = DataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
if re.search(r'Landsat-7', datadict['satellite'], re.I) and \
re.search(r'ETM+', datadict['sensor'], re.I):
......@@ -809,18 +809,18 @@ class GMS_JOB(object):
def get_L7_sensor(fN):
return LED(fN.split('.tar.gz')[0]).sensorIncSLC
temp_gdf['sensor'] = list(temp_gdf['filenames'].map(get_L7_sensor))
temp_df['sensor'] = list(temp_df['filenames'].map(get_L7_sensor))
all_gdfs.append(temp_gdf)
all_dfs.append(temp_df)
gdf = GeoDataFrame(pd.concat(all_gdfs)).drop_duplicates()
gdf.columns = ['satellite', 'sensor', 'filename']
df = DataFrame(pd.concat(all_dfs)).drop_duplicates()
df.columns = ['satellite', 'sensor', 'filename']
# run self.from_dictlist
sceneInfoGDF = self._get_validated_sceneInfoGDFs(gdf)
sceneInfoDF = self._get_validated_sceneInfoDFs(df)
# populate attributes
self._populate_jobAttrs_from_sceneInfoGDF(sceneInfoGDF)
self._populate_jobAttrs_from_sceneInfoDF(sceneInfoDF)
return self
......@@ -842,7 +842,7 @@ class GMS_JOB(object):
list_sceneIDs = list(list_sceneIDs)
# query 'satellite', 'sensor', 'filename' from database and summarize in GeoDataFrame
# query 'satellite', 'sensor', 'filename' from database and summarize in DataFrame
with psycopg2.connect(self.conn) as conn:
with conn.cursor() as cursor:
execute_pgSQL_query(cursor,
......@@ -850,27 +850,27 @@ class GMS_JOB(object):
LEFT JOIN satellites on scenes.satelliteid=satellites.id
LEFT JOIN sensors on scenes.sensorid=sensors.id
WHERE scenes.id in (%s)""" % ','.join([str(i) for i in list_sceneIDs]))
gdf = GeoDataFrame(cursor.fetchall(), columns=['sceneid', 'satellite', 'sensor', 'filename'])
df = DataFrame(cursor.fetchall(), columns=['sceneid', 'satellite', 'sensor', 'filename'])
# FIXME overwriting 'ETM+_SLC_OFF' with 'ETM+' causes _get_validated_sceneInfoGDFs() to fail because the
# FIXME overwriting 'ETM+_SLC_OFF' with 'ETM+' causes _get_validated_sceneInfoDFs() to fail because the
# FIXME sensorid for ETM+_SLC_OFF cannot be retrieved
# gdf['sensor'] = gdf['sensor'].apply(lambda senN: senN if senN != 'ETM+_SLC_OFF' else 'ETM+')
gdf = gdf.drop_duplicates()
# df['sensor'] = df['sensor'].apply(lambda senN: senN if senN != 'ETM+_SLC_OFF' else 'ETM+')
df = df.drop_duplicates()
if gdf.empty:
if df.empty:
raise ValueError('None of the given scene IDs could be found in the GeoMultiSens database. '
'Job creation failed.')
else:
missing_IDs = [i for i in list_sceneIDs if i not in gdf['sceneid'].values]
missing_IDs = [i for i in list_sceneIDs if i not in df['sceneid'].values]
if missing_IDs:
warnings.warn('The following scene IDs could not been found in the GeoMultiSens database: \n%s'
% '\n'.join([str(i) for i in missing_IDs]))
# run self.from_dictlist
sceneInfoGDF = self._get_validated_sceneInfoGDFs(gdf)
sceneInfoDF = self._get_validated_sceneInfoDFs(df)
# populate attributes
self._populate_jobAttrs_from_sceneInfoGDF(sceneInfoGDF)
self._populate_jobAttrs_from_sceneInfoDF(sceneInfoDF)
return self
......@@ -920,24 +920,24 @@ class GMS_JOB(object):
return self.from_sceneIDlist(list_sceneIDs, virtual_sensor_id,
datasetid_spatial_ref=datasetid_spatial_ref, comment=comment)
def _get_validated_sceneInfoGDFs(self, GDF_SatSenFname):
# type: (GeoDataFrame) -> GeoDataFrame
def _get_validated_sceneInfoDFs(self, DF_SatSenFname):
# type: (DataFrame) -> DataFrame
"""
:param GDF_SatSenFname:
:param DF_SatSenFname:
:return:
"""
gdf = GDF_SatSenFname
df = DF_SatSenFname
# loop through all satellite-sensor combinations and get scene information from database
all_gdf_recs, all_gdf_miss = [], []
all_df_recs, all_df_miss = [], []
all_satellites, all_sensors = zip(
*[i.split('__') for i in (np.unique(gdf['satellite'] + '__' + gdf['sensor']))])
*[i.split('__') for i in (np.unique(df['satellite'] + '__' + df['sensor']))])
for satellite, sensor in zip(all_satellites, all_sensors):
cur_gdf = gdf.loc[(gdf['satellite'] == satellite) & (gdf['sensor'] == sensor)]
filenames = list(cur_gdf['filename'])
cur_df = df.loc[(df['satellite'] == satellite) & (df['sensor'] == sensor)]
filenames = list(cur_df['filename'])
satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': satellite})
senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': sensor})
......@@ -945,55 +945,55 @@ class GMS_JOB(object):
assert len(senID_res), "No sensor named '%s' found in database." % sensor
# append sceneid and wkb_hex bounds
if 'sceneid' in gdf.columns:
sceneIDs = list(cur_gdf['sceneid'])
if 'sceneid' in df.columns:
sceneIDs = list(cur_df['sceneid'])
conddict = dict(id=sceneIDs, satelliteid=satID_res[0][0], sensorid=senID_res[0][0])
else:
conddict = dict(filename=filenames, satelliteid=satID_res[0][0], sensorid=senID_res[0][0])
records = get_info_from_postgreSQLdb(
self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'], conddict)
records = GeoDataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
if 'sceneid' in gdf.columns:
records = DataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
if 'sceneid' in df.columns:
del records['sceneid']
cur_gdf = cur_gdf.merge(records, on='filename', how="outer", copy=False)
cur_df = cur_df.merge(records, on='filename', how="outer", copy=False)
# separate records with valid matches in database from invalid matches (filename not found in database)
gdf_recs = cur_gdf[
cur_gdf.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
gdf_miss = cur_gdf[cur_gdf.sceneid.isnull()] # creates a view
df_recs = cur_df[
cur_df.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
df_miss = cur_df[cur_df.sceneid.isnull()] # creates a view
# convert scene ids from floats to integers
gdf_recs['sceneid'] = list(gdf_recs.sceneid.map(lambda sceneid: int(sceneid)))
df_recs['sceneid'] = list(df_recs.sceneid.map(lambda sceneid: int(sceneid)))
# wkb_hex bounds to shapely polygons
gdf_recs['polygons'] = list(gdf_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
df_recs['polygons'] = list(df_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
all_gdf_recs.append(gdf_recs)
all_gdf_miss.append(gdf_miss)
all_df_recs.append(df_recs)
all_df_miss.append(df_miss)
# merge all dataframes of all satellite-sensor combinations
gdf_recs_compl = GeoDataFrame(pd.concat(all_gdf_recs))
gdf_miss_compl = GeoDataFrame(pd.concat(all_gdf_miss))
df_recs_compl = DataFrame(pd.concat(all_df_recs))
df_miss_compl = DataFrame(pd.concat(all_df_miss))
# populate attributes
if not gdf_miss_compl.empty:
if not df_miss_compl.empty:
warnings.warn('The following scene filenames could not been found in the GeoMultiSens database: \n%s'
% '\n'.join(list(gdf_miss_compl['filename'])))
% '\n'.join(list(df_miss_compl['filename'])))
return gdf_recs_compl
return df_recs_compl
def _populate_jobAttrs_from_sceneInfoGDF(self, sceneInfoGDF):
# type: (GeoDataFrame) -> None
def _populate_jobAttrs_from_sceneInfoDF(self, sceneInfoDF):
# type: (DataFrame) -> None
"""
:param sceneInfoGDF:
:param sceneInfoDF:
:return:
"""
if not sceneInfoGDF.empty:
self.dataframe = sceneInfoGDF
if not sceneInfoDF.empty:
self.dataframe = sceneInfoDF
self.sceneids = list(self.dataframe['sceneid'])
self.statistics = [len(self.sceneids)] + [0] * 8
self.bounds = box(*MultiPolygon(list(self.dataframe['polygons'])).bounds)
......@@ -1019,21 +1019,21 @@ class GMS_JOB(object):
records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['satelliteid', 'sensorid', 'filename',
'id', 'acquisitiondate', 'bounds'],
{'id': self.sceneids})
gdf = GeoDataFrame(records,
df = DataFrame(records,
columns=['satelliteid', 'sensorid', 'filename', 'sceneid', 'acquisitiondate', 'geom'])
all_satIDs = gdf.satelliteid.unique().tolist()
all_senIDs = gdf.sensorid.unique().tolist()
all_satIDs = df.satelliteid.unique().tolist()
all_senIDs = df.sensorid.unique().tolist()
satName_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['name'], {'id': all_satIDs})
senName_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['name'], {'id': all_senIDs})
all_satNames = [i[0] for i in satName_res]
all_senNames = [i[0] for i in senName_res]
id_satName_dict = dict(zip(all_satIDs, all_satNames))
id_senName_dict = dict(zip(all_senIDs, all_senNames))
gdf.insert(0, 'satellite', list(gdf.satelliteid.map(lambda satID: id_satName_dict[satID])))
gdf.insert(1, 'sensor', list(gdf.sensorid.map(lambda senID: id_senName_dict[senID])))
gdf['polygons'] = list(gdf.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
df.insert(0, 'satellite', list(df.satelliteid.map(lambda satID: id_satName_dict[satID])))
df.insert(1, 'sensor', list(df.sensorid.map(lambda senID: id_senName_dict[senID])))
df['polygons'] = list(df.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
self.dataframe = gdf[['satellite', 'sensor', 'filename', 'sceneid', 'acquisitiondate', 'geom', 'polygons']]
self.dataframe = df[['satellite', 'sensor', 'filename', 'sceneid', 'acquisitiondate', 'geom', 'polygons']]
return self
......@@ -1050,8 +1050,8 @@ class GMS_JOB(object):
self.update_db_entry()
def _get_dataframe(self, datadict): # FIXME deprecated
gdf = GeoDataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
gdf.columns = ['satellite', 'sensor', 'filename']
df = DataFrame(datadict, columns=['satellite', 'sensor', 'filenames'])
df.columns = ['satellite', 'sensor', 'filename']
satID_res = get_info_from_postgreSQLdb(self.conn, 'satellites', ['id'], {'name': datadict['satellite']})
senID_res = get_info_from_postgreSQLdb(self.conn, 'sensors', ['id'], {'name': datadict['sensor']})
......@@ -1062,20 +1062,20 @@ class GMS_JOB(object):
records = get_info_from_postgreSQLdb(self.conn, 'scenes', ['filename', 'id', 'acquisitiondate', 'bounds'],
{'filename': datadict['filenames'],
'satelliteid': satID_res[0][0], 'sensorid': senID_res[0][0]})
records = GeoDataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
gdf = gdf.merge(records, on='filename', how="outer")
records = DataFrame(records, columns=['filename', 'sceneid', 'acquisitiondate', 'geom'])
df = df.merge(records, on='filename', how="outer")
# separate records with valid matches in database from invalid matches (filename not found in database)
gdf_recs = gdf[gdf.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
gdf_miss = gdf[gdf.sceneid.isnull()] # creates a view
df_recs = df[df.sceneid.notnull()].copy() # creates a copy (needed to be able to apply maps later)
df_miss = df[df.sceneid.isnull()] # creates a view
# convert scene ids from floats to integers
gdf_recs['sceneid'] = list(gdf_recs.sceneid.map(lambda sceneid: int(sceneid)))
df_recs['sceneid'] = list(df_recs.sceneid.map(lambda sceneid: int(sceneid)))
# wkb_hex bounds to shapely polygons
gdf_recs['polygons'] = list(gdf_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
df_recs['polygons'] = list(df_recs.geom.map(lambda wkb_hex: wkb_loads(wkb_hex, hex=True)))
return gdf_recs, gdf_miss
return df_recs, df_miss
def create(self):
# type: () -> int
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment