Commit 1613ba35 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Improved memory usage and removed geometry sotrage

parent 951cea46
Pipeline #24805 passed with stage
in 4 minutes and 37 seconds
......@@ -19,7 +19,6 @@
import os
import logging
import geopandas
import pandas
# Initialize log
......@@ -28,34 +27,7 @@ logger = logging.getLogger(__name__)
class FileProcessor:
@staticmethod
def write_tiles_to_csv(
list_of_dictionaries, output_pathname, column_geometry="built_area", crs="epsg:4326"
):
"""Write a csv file from a list of dictionaries.
Args:
list_of_dictionaries (list): List of dictionaries with built-up areas to
write.
output_pathname (str): Target path name for the csv file.
column_geometry (str): Name of the field that contains geometries.
Default = "built_area"
crs (str): EPSG code of the data projection. Default = "epsg:4326"
"""
tiles_gdf = geopandas.GeoDataFrame(
list_of_dictionaries, geometry=column_geometry, crs=crs
)
filepath_out = os.path.join(
output_pathname, "{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index))
)
logger.info("Creating {}".format(filepath_out))
tiles_gdf.to_csv(filepath_out, index=False)
@staticmethod
def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname):
def write_tiles_to_csv_no_geom(list_of_dictionaries, output_pathname):
"""Write a csv file from a list of dictionaries without geometries.
Args:
......@@ -66,9 +38,10 @@ class FileProcessor:
"""
tiles_df = pandas.DataFrame(list_of_dictionaries)
tiles_df = tiles_df.drop_duplicates(keep="first")
filepath_out = os.path.join(
output_pathname,
"OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
"{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
)
logger.info("Creating {}".format(filepath_out))
tiles_df.to_csv(filepath_out, index=False)
......@@ -116,7 +116,8 @@ def multiprocess_built_estimation_batch(quadkey_batch):
if built_up_areas:
# Write output into a csv file
FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname)
FileProcessor.write_tiles_to_csv_no_geom(built_up_areas, output_pathname)
del built_up_areas
roads_database.connection.close()
......@@ -163,10 +164,11 @@ def multiprocess_buildings_batch(quadkey_batch):
if obm_built_up_areas:
# Write output into a csv file
FileProcessor.write_obm_tiles_to_csv(
FileProcessor.write_tiles_to_csv_no_geom(
list_of_dictionaries=obm_built_up_areas,
output_pathname=obm_output_pathname,
)
del obm_built_up_areas
buildings_database.connection.close()
......
......@@ -278,7 +278,6 @@ class TileProcessor:
Contains:
quadkey (str): Tile quadkey
source_id (int): Integer associated to a predefined method
built_area (str): Polygon string projected to WGS84 coordinates.
built_area_size (float): Area measured in squared meters.
last_update (str): Date when the pickle was generated.
......@@ -302,7 +301,6 @@ class TileProcessor:
results = {
"quadkey": tile.quadkey,
"source_id": datasource.source_id,
"built_area": TileProcessor.reproject_polygon(built_polygon, tile.crs, "epsg:4326"),
"built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs),
"last_update": str(date.today()),
}
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment