Commit 36b5f566 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Improved memory usage and let geometry storage optional

parent 951cea46
Pipeline #25663 passed with stage
in 5 minutes and 3 seconds
......@@ -35,13 +35,26 @@ pip3 install .
## Running obmgapanalysis
copy the config-example.yml to your working directory as config.yml and modify
the variables regarding the data source, database credentials and tiles for input quadkeys.
the variables regarding the data source, database credentials, multiprocessing
framework and tiles for input quadkeys.
To assess built-up areas with a configured dataset:
```bash
cd /your/working/directory
obmgapanalysis
```
To assess built-up areas based on a `buildings_database`:
```bash
obmgapanalysis --obm_built_up
```
To insert entries from the `import_pathname` into the `target_database`:
```bash
obmgapanalysis --import_csv
```
## Copyright and copyleft
Copyright (C) 2021
......
......@@ -15,6 +15,7 @@ obm_output_pathname: ./obm_results
import_pathname: ./results
number_cores: 1
batch_size: 1000
get_geometry: False
roads_database:
host: your_host.dir.request_data
......
......@@ -35,8 +35,11 @@ amount of tiles to be handled per process. Each CSV file may contain maximum thi
all of them provide built areas.
output_pathname (str): Target path name for the csv file writing and reading.
obm_output_pathname (str): Target path name for the OBM csv file writing and reading.
import_pathname (str): Target path name with csv files to import.
number_cores (int): Desired maximum number of parallel processes to execute.
batch_size (int): Maximum amount of tiles to be handled per process
batch_size (int): Maximum amount of tiles to be handled per process.
get_geometry (bool): If True, geometries will be stored in the output csv files.
The last sections refer to database connections. `database` holds a database from which roads can be
extracted to refine built areas, also it may contain buildings if the program wants to calculate a
......
......@@ -19,8 +19,8 @@
import os
import logging
import geopandas
import pandas
import geopandas
# Initialize log
logger = logging.getLogger(__name__)
......@@ -29,7 +29,11 @@ logger = logging.getLogger(__name__)
class FileProcessor:
@staticmethod
def write_tiles_to_csv(
list_of_dictionaries, output_pathname, column_geometry="built_area", crs="epsg:4326"
list_of_dictionaries,
output_pathname,
get_geometry=False,
column_geometry="built_area",
crs="epsg:4326",
):
"""Write a csv file from a list of dictionaries.
......@@ -39,36 +43,30 @@ class FileProcessor:
output_pathname (str): Target path name for the csv file.
get_geometry (bool): Set if the geometry will be written
column_geometry (str): Name of the field that contains geometries.
Default = "built_area"
crs (str): EPSG code of the data projection. Default = "epsg:4326"
"""
tiles_gdf = geopandas.GeoDataFrame(
list_of_dictionaries, geometry=column_geometry, crs=crs
)
filepath_out = os.path.join(
output_pathname, "{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index))
)
logger.info("Creating {}".format(filepath_out))
tiles_gdf.to_csv(filepath_out, index=False)
@staticmethod
def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname):
"""Write a csv file from a list of dictionaries without geometries.
Args:
list_of_dictionaries (list): List of dictionaries with built-up areas to
write.
output_pathname (str): Target path name for the csv file.
"""
tiles_df = pandas.DataFrame(list_of_dictionaries)
filepath_out = os.path.join(
output_pathname,
"OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
)
logger.info("Creating {}".format(filepath_out))
tiles_df.to_csv(filepath_out, index=False)
if get_geometry is False:
tiles_df = pandas.DataFrame(list_of_dictionaries)
tiles_df = tiles_df.drop_duplicates(keep="first")
filepath_out = os.path.join(
output_pathname,
"{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
)
logger.info("Creating {}".format(filepath_out))
tiles_df.to_csv(filepath_out, index=False)
else:
tiles_gdf = geopandas.GeoDataFrame(
list_of_dictionaries, geometry=column_geometry, crs=crs
)
filepath_out = os.path.join(
output_pathname,
"{}_{}.csv".format(tiles_gdf.quadkey.iloc[0], len(tiles_gdf.index)),
)
logger.info("Creating {}".format(filepath_out))
tiles_gdf.to_csv(filepath_out, index=False)
......@@ -65,6 +65,8 @@ if args.import_csv:
target_db_config = config["target_database"]
import_pathname = os.path.abspath(config["import_pathname"])
get_geometry = config["get_geometry"]
def multiprocess_built_estimation_batch(quadkey_batch):
"""
......@@ -106,6 +108,7 @@ def multiprocess_built_estimation_batch(quadkey_batch):
database_crs_number=roads_database_crs_number,
table_config=db_config["roads_table"],
buffer_magnitude=db_config["process_buffer_magnitude"],
get_geometry=get_geometry,
)
if result is not None:
built_up_areas.append(result)
......@@ -116,7 +119,10 @@ def multiprocess_built_estimation_batch(quadkey_batch):
if built_up_areas:
# Write output into a csv file
FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname)
FileProcessor.write_tiles_to_csv(
built_up_areas, output_pathname, get_geometry=get_geometry
)
del built_up_areas
roads_database.connection.close()
......@@ -163,10 +169,11 @@ def multiprocess_buildings_batch(quadkey_batch):
if obm_built_up_areas:
# Write output into a csv file
FileProcessor.write_obm_tiles_to_csv(
FileProcessor.write_tiles_to_csv(
list_of_dictionaries=obm_built_up_areas,
output_pathname=obm_output_pathname,
)
del obm_built_up_areas
buildings_database.connection.close()
......
......@@ -272,12 +272,12 @@ class TileProcessor:
return polygon.area
@staticmethod
def build_dictionary(tile, datasource, built_polygon):
def build_dictionary(tile, datasource, built_polygon, get_geometry=False):
"""Returns a dictionary with the built-up area related attributes
associated to the Tile and a given DataSource.
Contains:
quadkey (str): Tile quadkey
source_id (int): Integer associated to a predefined method
source_id (int): Integer associated to a predefined method.
built_area (str): Polygon string projected to WGS84 coordinates.
built_area_size (float): Area measured in squared meters.
last_update (str): Date when the pickle was generated.
......@@ -298,22 +298,40 @@ class TileProcessor:
if built_polygon.is_empty:
logging.info("No built area found in {}".format(tile.quadkey))
return
results = {
"quadkey": tile.quadkey,
"source_id": datasource.source_id,
"built_area": TileProcessor.reproject_polygon(built_polygon, tile.crs, "epsg:4326"),
"built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs),
"last_update": str(date.today()),
}
if get_geometry is False:
results = {
"quadkey": tile.quadkey,
"source_id": datasource.source_id,
"built_area_size": TileProcessor.albers_area_calculation(
built_polygon, tile.crs
),
"last_update": str(date.today()),
}
else:
results = {
"quadkey": tile.quadkey,
"source_id": datasource.source_id,
"built_area": TileProcessor.reproject_polygon(
built_polygon, tile.crs, "epsg:4326"
),
"built_area_size": TileProcessor.albers_area_calculation(
built_polygon, tile.crs
),
"last_update": str(date.today()),
}
if not results["source_id"]:
del results["source_id"]
return results
@staticmethod
def get_built_up_area(
quadkey, datasource, database, database_crs_number, table_config, buffer_magnitude
quadkey,
datasource,
database,
database_crs_number,
table_config,
buffer_magnitude,
get_geometry=False,
):
"""Run the complete processing of a quadkey and returns a dictionary
created with TileProcessor.build_dictionary.
......@@ -362,7 +380,9 @@ class TileProcessor:
refined_built_area = TileProcessor.polygon_difference(
clip_built_geometry, roads_processed
)
result = TileProcessor.build_dictionary(tile, datasource, refined_built_area)
result = TileProcessor.build_dictionary(
tile, datasource, refined_built_area, get_geometry=get_geometry
)
return result
@staticmethod
......
......@@ -36,6 +36,7 @@ setup(
"babelgrid",
"fiona",
"rtree",
"pandas",
"geopandas",
"rasterio",
"psycopg2-binary",
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment