Commit 1613ba35 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Improved memory usage and removed geometry sotrage

parent 951cea46
Pipeline #24805 passed with stage
in 4 minutes and 37 seconds
...@@ -19,7 +19,6 @@ ...@@ -19,7 +19,6 @@
import os import os
import logging import logging
import geopandas
import pandas import pandas
# Initialize log # Initialize log
...@@ -28,34 +27,7 @@ logger = logging.getLogger(__name__) ...@@ -28,34 +27,7 @@ logger = logging.getLogger(__name__)
class FileProcessor: class FileProcessor:
@staticmethod @staticmethod
def write_tiles_to_csv( def write_tiles_to_csv_no_geom(list_of_dictionaries, output_pathname):
list_of_dictionaries, output_pathname, column_geometry="built_area", crs="epsg:4326"
):
"""Write a csv file from a list of dictionaries.
Args:
list_of_dictionaries (list): List of dictionaries with built-up areas to
write.
output_pathname (str): Target path name for the csv file.
column_geometry (str): Name of the field that contains geometries.
Default = "built_area"
crs (str): EPSG code of the data projection. Default = "epsg:4326"
"""
tiles_gdf = geopandas.GeoDataFrame(
list_of_dictionaries, geometry=column_geometry, crs=crs
)
filepath_out = os.path.join(
output_pathname, "{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index))
)
logger.info("Creating {}".format(filepath_out))
tiles_gdf.to_csv(filepath_out, index=False)
@staticmethod
def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname):
"""Write a csv file from a list of dictionaries without geometries. """Write a csv file from a list of dictionaries without geometries.
Args: Args:
...@@ -66,9 +38,10 @@ class FileProcessor: ...@@ -66,9 +38,10 @@ class FileProcessor:
""" """
tiles_df = pandas.DataFrame(list_of_dictionaries) tiles_df = pandas.DataFrame(list_of_dictionaries)
tiles_df = tiles_df.drop_duplicates(keep="first")
filepath_out = os.path.join( filepath_out = os.path.join(
output_pathname, output_pathname,
"OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)), "{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
) )
logger.info("Creating {}".format(filepath_out)) logger.info("Creating {}".format(filepath_out))
tiles_df.to_csv(filepath_out, index=False) tiles_df.to_csv(filepath_out, index=False)
...@@ -116,7 +116,8 @@ def multiprocess_built_estimation_batch(quadkey_batch): ...@@ -116,7 +116,8 @@ def multiprocess_built_estimation_batch(quadkey_batch):
if built_up_areas: if built_up_areas:
# Write output into a csv file # Write output into a csv file
FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname) FileProcessor.write_tiles_to_csv_no_geom(built_up_areas, output_pathname)
del built_up_areas
roads_database.connection.close() roads_database.connection.close()
...@@ -163,10 +164,11 @@ def multiprocess_buildings_batch(quadkey_batch): ...@@ -163,10 +164,11 @@ def multiprocess_buildings_batch(quadkey_batch):
if obm_built_up_areas: if obm_built_up_areas:
# Write output into a csv file # Write output into a csv file
FileProcessor.write_obm_tiles_to_csv( FileProcessor.write_tiles_to_csv_no_geom(
list_of_dictionaries=obm_built_up_areas, list_of_dictionaries=obm_built_up_areas,
output_pathname=obm_output_pathname, output_pathname=obm_output_pathname,
) )
del obm_built_up_areas
buildings_database.connection.close() buildings_database.connection.close()
......
...@@ -278,7 +278,6 @@ class TileProcessor: ...@@ -278,7 +278,6 @@ class TileProcessor:
Contains: Contains:
quadkey (str): Tile quadkey quadkey (str): Tile quadkey
source_id (int): Integer associated to a predefined method source_id (int): Integer associated to a predefined method
built_area (str): Polygon string projected to WGS84 coordinates.
built_area_size (float): Area measured in squared meters. built_area_size (float): Area measured in squared meters.
last_update (str): Date when the pickle was generated. last_update (str): Date when the pickle was generated.
...@@ -302,7 +301,6 @@ class TileProcessor: ...@@ -302,7 +301,6 @@ class TileProcessor:
results = { results = {
"quadkey": tile.quadkey, "quadkey": tile.quadkey,
"source_id": datasource.source_id, "source_id": datasource.source_id,
"built_area": TileProcessor.reproject_polygon(built_polygon, tile.crs, "epsg:4326"),
"built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs), "built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs),
"last_update": str(date.today()), "last_update": str(date.today()),
} }
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment