Commit 7f42e20a authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Included obm_built_up program and debug

parent 41368c59
Pipeline #22584 passed with stage
in 4 minutes and 1 second
......@@ -11,10 +11,11 @@ tiles:
txt_filepath: tiles.txt
output_pathname: ./results
obm_output_pathname: ./obm_results
number_cores: 1
batch_size: 1000
database:
roads_database:
host: your_host.dir.request_data
dbname: gis
port: 5433
......@@ -25,6 +26,16 @@ database:
geometry_field: way
process_buffer_magnitude: False
buildings_database:
host: your_host.dir.request_data
dbname: gis
port: 5433
username: postgres
password: secret_pass
buildings_table:
tablename: planet_osm_roads
geometry_field: way
target_database:
host: your_host.dir.request_data
dbname: gis
......
......@@ -20,6 +20,7 @@
import os
import logging
import geopandas
import pandas
# Initialize log
logger = logging.getLogger(__name__)
......@@ -52,3 +53,22 @@ class FileProcessor:
)
logger.info("Creating {}".format(filepath_out))
tiles_gdf.to_csv(filepath_out, index=False)
@staticmethod
def write_obm_tiles_to_csv(list_of_dictionaries, output_pathname):
"""Write a csv file from a list of dictionaries without geometries.
Args:
list_of_dictionaries (list): List of dictionaries with built-up areas to
write.
output_pathname (str): Target path name for the csv file.
"""
tiles_df = pandas.DataFrame(list_of_dictionaries)
filepath_out = os.path.join(
output_pathname,
"OBM_{}_{}.csv".format(tiles_df.quadkey.iloc[0], len(tiles_df.index)),
)
logger.info("Creating {}".format(filepath_out))
tiles_df.to_csv(filepath_out, index=False)
......@@ -41,22 +41,31 @@ parser = argparse.ArgumentParser()
parser.add_argument(
"--import_csv", action="store_true", help="Import CSV files into target database"
)
parser.add_argument("--obm_built_up", action="store_true", help="Find built-up areas from OBM")
args = parser.parse_args()
# Get program configuration from config.yml
with open("config.yml", "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
db_config = config["database"]
datasource_config = config["datasource"]
tiles_config = config["tiles"]
db_config = config["roads_database"]
datasource_config = config["datasource"]
output_pathname = os.path.abspath(config["output_pathname"])
target_db_config = config["target_database"]
if not os.path.exists(output_pathname):
os.makedirs(output_pathname)
buildings_db_config = config["buildings_database"]
obm_output_pathname = os.path.abspath(config["obm_output_pathname"])
if not os.path.exists(obm_output_pathname):
os.makedirs(obm_output_pathname)
def multiprocess_chunk(quadkey_batch):
if args.import_csv:
target_db_config = config["target_database"]
def multiprocess_built_estimation_batch(quadkey_batch):
"""
Wrapper funtion that writes a CSV file with built-up areas found in the
quadkey_batch. The ouput is based on TileProcessor.build_dictionary.
......@@ -86,10 +95,10 @@ def multiprocess_chunk(quadkey_batch):
)
# Build a list with all tiles with reported built-up areas
build_up_areas = []
built_up_areas = []
for quadkey in quadkey_batch:
try:
result = TileProcessor.get_build_up_area(
result = TileProcessor.get_built_up_area(
quadkey=quadkey,
database=roads_database,
datasource=datasource,
......@@ -98,20 +107,70 @@ def multiprocess_chunk(quadkey_batch):
buffer_magnitude=db_config["process_buffer_magnitude"],
)
if result is not None:
build_up_areas.append(result)
built_up_areas.append(result)
except Exception as e:
logger.info("Error in quadkey: {}".format(quadkey))
logger.info("Error caught: {}".format(e))
if build_up_areas:
if built_up_areas:
# Write output into a csv file
FileProcessor.write_tiles_to_csv(build_up_areas, output_pathname)
FileProcessor.write_tiles_to_csv(built_up_areas, output_pathname)
roads_database.connection.close()
def multiprocess_csv(csv_filepath):
def multiprocess_buildings_batch(quadkey_batch):
"""
Wrapper function that writes a CSV file with OBM built-up areas found in the
quadkey_batch.
Output filenames are: OBM_<number of tiles>_<first quadkey>.csv
Args:
quadkey_batch (list): List of quadkeys to process with settlement data
"""
# Connect to the OBM database
buildings_database = Database(**buildings_db_config)
buildings_database.create_connection_and_cursor()
buildings_database_crs_number = buildings_database.get_crs_from_geometry_field(
**buildings_db_config["buildings_table"]
)
logger.info(
"Connection established to {} in {}".format(
buildings_db_config["dbname"], buildings_db_config["host"]
)
)
# Build a list with all tiles with reported obm built-up areas
obm_built_up_areas = []
for quadkey in quadkey_batch:
# try:
result = TileProcessor.get_obm_built_up_area(
quadkey=quadkey,
database=buildings_database,
database_crs_number=buildings_database_crs_number,
table_config=buildings_db_config["buildings_table"],
)
if result is not None:
obm_built_up_areas.append(result)
# except Exception as e:
# logger.info("Error in quadkey: {}".format(quadkey))
# logger.info("Error caught: {}".format(e))
if obm_built_up_areas:
# Write output into a csv file
FileProcessor.write_obm_tiles_to_csv(
list_of_dictionaries=obm_built_up_areas,
output_pathname=obm_output_pathname,
)
buildings_database.connection.close()
def multiprocess_write_csv(csv_filepath):
"""
Wrapper function that imports a CSV file into a configured database table
......@@ -159,7 +218,7 @@ def main():
logging.info("Creating multiprocessing pool")
with multiprocessing.Pool(processes=num_processes) as pool:
logging.info("Start parallel processing")
pool.map(multiprocess_csv, csv_filepaths)
pool.map(multiprocess_write_csv, csv_filepaths)
logging.info("Parallel processing finished, closing pool")
pool.close()
......@@ -183,10 +242,23 @@ def main():
tiles_list[i : i + batch_size] for i in range(0, len(tiles_list), batch_size)
]
logging.info("Creating multiprocessing pool")
if args.obm_built_up:
logging.info("Creating multiprocessing pool for obm built-up areas")
with multiprocessing.Pool(processes=num_processes) as pool:
logging.info("Start parallel processing of {} batches".format(len(quadkey_batches)))
pool.map(multiprocess_buildings_batch, quadkey_batches)
logging.info("Parallel processing finished, closing pool")
pool.close()
pool.join()
logger.info("Task finished, closing obmgapanalysis")
sys.exit()
logging.info("Creating multiprocessing pool for settlement layer built-up areas")
with multiprocessing.Pool(processes=num_processes) as pool:
logging.info("Start parallel processing of {} batches".format(len(quadkey_batches)))
pool.map(multiprocess_chunk, quadkey_batches)
pool.map(multiprocess_built_estimation_batch, quadkey_batches)
logging.info("Parallel processing finished, closing pool")
pool.close()
......
......@@ -216,7 +216,9 @@ class TileProcessor:
"""
input_dataframe = input_dataframe.to_crs(tile.crs)
input_dataframe = geopandas.clip(input_dataframe, tile.geometry)
input_dataframe = geopandas.clip(
input_dataframe, tile.geometry.buffer(buffer_magnitude)
)
geometry = input_dataframe.unary_union
if buffer_magnitude > 0.0:
geometry = geometry.buffer(buffer_magnitude)
......@@ -304,7 +306,7 @@ class TileProcessor:
return results
@staticmethod
def get_build_up_area(
def get_built_up_area(
quadkey, datasource, database, database_crs_number, table_config, buffer_magnitude
):
"""Run the complete processing of a quadkey and returns a dictionary
......@@ -356,3 +358,46 @@ class TileProcessor:
)
result = TileProcessor.build_dictionary(tile, datasource, refined_built_area)
return result
@staticmethod
def get_obm_built_up_area(quadkey, database, database_crs_number, table_config):
"""Run the processing for obm built-up area extraction of a quadkey
and returns a dictionary created with quadkey and obm_built_area_size
Args:
quadkey (str): Quadkey code associated with a Bing quadtree tile.
database (database.Database): Database instance with credentials
and connection ready to perform data queries.
database_crs_number (str): SRID number of the target table.
table_config (dict): Dictionary with table name, schema and geometry_field.
This is part of the config.yml file.
Returns:
results (dictionary): Dictionary with built-up area information.
"""
crs = "epsg:{}".format(database_crs_number)
tile = Tile(quadkey, crs)
# Find building footprints within the buildings database
buildings_in_tile = database.get_features_in_tile(
tile=tile, crs_number=database_crs_number, **table_config
)
if not buildings_in_tile.empty:
buildings_processed = TileProcessor.process_dataframe_with_tile(
buildings_in_tile, tile=tile
)
result = {
"quadkey": tile.quadkey,
"source_id": 0,
"built_area_size": TileProcessor.albers_area_calculation(
buildings_processed, tile.crs
),
"last_update": str(date.today()),
}
return result
......@@ -126,24 +126,20 @@ def test_clip_to_tile_extent():
def test_process_dataframe_with_tile():
expected_geometry_roads = (
"POLYGON ((2550002.184577069 4629637.952911595, 2550002.438602296 "
"4629670.059528879, 2550002.454491734 4629670.34505833, 2550007.486132718 "
"4629718.894118963, 2550007.534455809 4629719.202675823, 2550007.614596096 "
"4629719.504536704, 2550020.082379065 4629758.216188851, 2550020.178233149 "
"4629758.472967537, 2550020.297122533 4629758.719926265, 2550020.438054846 "
"4629758.955003661, 2550020.599853719 4629759.176237528, 2550020.781168612 "
"4629759.381781219, 2550030.443700413 4629769.375572084, 2550030.760692946 "
"4629769.659947647, 2550049.072185669 4629783.876796038, 2550053.931105311 "
"4629790.803233128, 2550060.593091394 4629790.803233128, 2550060.594888479 "
"4629790.73518196, 2550060.573776196 4629790.441533886, 2550060.523983029 "
"4629790.151369173, 2550060.445988514 4629789.867482267, 2550060.340543782 "
"4629789.592607153, 2550060.208664322 4629789.329391029, 2550060.051620206 "
"4629789.08036881, 2550053.724702562 4629780.061286326, 2550053.541161971 "
"POLYGON ((2550002.438602296 4629670.059528879, 2550002.454491734 "
"4629670.34505833, 2550007.486132718 4629718.894118963, 2550007.534455809 "
"4629719.202675823, 2550007.614596096 4629719.504536704, 2550020.082379065 "
"4629758.216188851, 2550020.178233149 4629758.472967537, 2550020.297122533 "
"4629758.719926265, 2550020.438054846 4629758.955003661, 2550020.599853719 "
"4629759.176237528, 2550020.781168612 4629759.381781219, 2550030.443700413 "
"4629769.375572084, 2550030.760692946 4629769.659947647, 2550049.072185669 "
"4629783.876796038, 2550053.931105311 4629790.803233128, 2550061.2602155 "
"4629790.803233128, 2550053.724702562 4629780.061286326, 2550053.541161971 "
"4629779.825551415, 2550053.335084511 4629779.609241042, 2550053.108513971 "
"4629779.41450048, 2550034.608849079 4629765.051557215, 2550025.57961757 "
"4629755.712777978, 2550013.42183824 4629717.963666889, 2550008.437281965 "
"4629669.86891498, 2550008.184577072 4629637.929176555, 2550002.185554927 "
"4629637.929176555, 2550002.184577069 4629637.952911595))"
"4629669.86891498, 2550008.184577072 4629637.929176555, 2550002.184389279 "
"4629637.929176555, 2550002.438602296 4629670.059528879))"
)
roads_process = TileProcessor.process_dataframe_with_tile(
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment