Commit 0603058f authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Improved config and import behaviour.

parent 36b5f566
Pipeline #26354 passed with stage
in 5 minutes and 16 seconds
...@@ -12,8 +12,8 @@ tiles: ...@@ -12,8 +12,8 @@ tiles:
output_pathname: ./results output_pathname: ./results
obm_output_pathname: ./obm_results obm_output_pathname: ./obm_results
import_pathname: ./results
number_cores: 1 number_cores: 1
number_cores_import: 1
batch_size: 1000 batch_size: 1000
get_geometry: False get_geometry: False
......
# Configuration file # Configuration file
The OpenBuildingMap (OBM) gap analysis program can be configured to fit user needs. This is done by The OpenBuildingMap (OBM) gap analysis program can be configured to fit user needs. This is done by
changing the different parameters within the `config.yml` file. A sample file can be found in changing the different parameters within the `config.yml` file. A sample file can be found in the
the package directory under the filename `config-example.yml`. package directory under the filename `config-example.yml`. by using the `-conf` argument, the
configuration can be extracted from a `.yml` file with a custom name. If not, the default file is `config.yml`
## config.yml ## config.yml
...@@ -29,15 +30,16 @@ input the tiles. If set to `True`, Quadkeys are read from `txt_filepath` instead ...@@ -29,15 +30,16 @@ input the tiles. If set to `True`, Quadkeys are read from `txt_filepath` instead
The following parameters define the processing output and can improve the performance of the program. The following parameters define the processing output and can improve the performance of the program.
First, `output_pathname` is the directory to store and read CSV files for further import in SQL. The First, `output_pathname` is the directory to store and read CSV files for further import in SQL. The
`number_cores` parameter refers to the maximum number of parallel processes the system can handle. This is defined as the number of `number_cores` parameter refers to the maximum number of parallel processes the system can handle for
cores that can be dedicated to the program execution. Finally, `batch_size` sets the maximum tile processing. `number_cores_import` allows the parallel import of csv files into a database. This
amount of tiles to be handled per process. Each CSV file may contain maximum this amount of tiles if number should not be too high, since the database could fail with many parallel imports. Finally,
all of them provide built areas. `batch_size` sets the maximum amount of tiles to be handled per process. Each CSV file may contain
maximum this amount of tiles if all of them provide built areas.
output_pathname (str): Target path name for the csv file writing and reading. output_pathname (str): Target path name for the csv file writing and import.
obm_output_pathname (str): Target path name for the OBM csv file writing and reading. obm_output_pathname (str): Target path name for the OBM csv file writing and import.
import_pathname (str): Target path name with csv files to import.
number_cores (int): Desired maximum number of parallel processes to execute. number_cores (int): Desired maximum number of parallel processes to execute.
number_cores_import (int): Desired maximum number of parallel imports to execute.
batch_size (int): Maximum amount of tiles to be handled per process. batch_size (int): Maximum amount of tiles to be handled per process.
get_geometry (bool): If True, geometries will be stored in the output csv files. get_geometry (bool): If True, geometries will be stored in the output csv files.
......
...@@ -42,11 +42,21 @@ parser.add_argument( ...@@ -42,11 +42,21 @@ parser.add_argument(
"--import_csv", action="store_true", help="Import CSV files into target database" "--import_csv", action="store_true", help="Import CSV files into target database"
) )
parser.add_argument("--obm_built_up", action="store_true", help="Find built-up areas from OBM") parser.add_argument("--obm_built_up", action="store_true", help="Find built-up areas from OBM")
parser.add_argument(
"-conf",
nargs="?",
const=1,
type=str,
default="config.yml",
help="Config filepath. Default: config.yml",
)
args = parser.parse_args() args = parser.parse_args()
config_filepath = args.conf
logger.info("Reading configuration from {}".format(config_filepath))
# Get program configuration from config.yml # Get program configuration from config.yml
with open("config.yml", "r") as ymlfile: with open(config_filepath, "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader) config = yaml.load(ymlfile, Loader=yaml.FullLoader)
tiles_config = config["tiles"] tiles_config = config["tiles"]
...@@ -63,7 +73,7 @@ if not os.path.exists(obm_output_pathname): ...@@ -63,7 +73,7 @@ if not os.path.exists(obm_output_pathname):
if args.import_csv: if args.import_csv:
target_db_config = config["target_database"] target_db_config = config["target_database"]
import_pathname = os.path.abspath(config["import_pathname"]) import_pathname = [output_pathname, obm_output_pathname]
get_geometry = config["get_geometry"] get_geometry = config["get_geometry"]
...@@ -206,14 +216,17 @@ def main(): ...@@ -206,14 +216,17 @@ def main():
if args.import_csv: if args.import_csv:
# List CSV files and insert into database # List CSV files and insert into database
csv_filepaths = [ csv_filepaths = [
os.path.join(import_pathname, file) for file in os.listdir(import_pathname) os.path.join(basepath, file)
for basepath in import_pathname
for file in os.listdir(basepath)
] ]
logger.info( logger.info(
"{} CSV files will be imported into the database".format(len(csv_filepaths)) "{} CSV files will be imported into the database".format(len(csv_filepaths))
) )
# Generate a parallel process pool with all CSV files to be dealt with # Generate a parallel process pool with all CSV files to be dealt with
num_processes = config["number_cores"] num_processes = config["number_cores_import"]
logging.info("Creating multiprocessing pool") logging.info("Creating multiprocessing pool")
with multiprocessing.Pool(processes=num_processes) as pool: with multiprocessing.Pool(processes=num_processes) as pool:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment