Commit e6450575 authored by Danijel Schorlemmer's avatar Danijel Schorlemmer Committed by Tara Evaz Zadeh
Browse files

Added database.py modified manual preprocessor.py setup.py respectively

parent 504a8ca7
Pipeline #26183 passed with stage
in 1 minute and 30 seconds
......@@ -11,3 +11,4 @@ __pycache__
build
dist
env
venv
This diff is collapsed.
......@@ -10,7 +10,13 @@ setup(
version="0.1",
description="Compute damage-state probabilities for assets in scenario earthquakes",
license="AGPLv3+",
install_requires=["pandas", "numpy", "scipy", "shapely", "pyproj"],
install_requires=[
"pandas",
"numpy",
"scipy",
"shapely",
"pyproj",
],
extras_require={
"tests": tests_require,
"linters": linters_require,
......
This diff is collapsed.
......@@ -17,274 +17,170 @@
# along with this program. If not, see http://www.gnu.org/licenses/.
"""
Retuns an augmented exposure file by appending the geometries of the tiles and
buildings.
Returns an exposure model along with the asset geometries as either a CSV or a database file.
Please use either `import` to import exposure data into the database or `export` to export to
an OpenQuake-compatible exposure CSV file.
Program Inputs:
---------------
`-i`, `--exposure-input`: str, required
Defines the filepath to the input exposure model with input exposure model format as below:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID,origin_id`
`-o`, `--exposure-output`: str, required
Defines the filepath to the output exposure model (augmented exposure model) with format
as below:
`-d`, `--exposure-database`: str, required
Defines the path to an exposure model database file.
When using import module, it points to a file where the exposure database file is to be stored.
When using export module, it points the an existing exposure database file from which a CSV
exposure file with header
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID, tile_id,
tile_geometry, building_id ,building_geometry`
tile_geometry, building_id ,building_geometry` is exported.
`-g`, `--buildingsGeometry`: str, optional
`-g`, `--building-geometries`: str, optional
Defines the path to the file that maps the building ID (`origin-id` for a building asset
in the exposure model) from which the asset is originated to the building geometry and the tile
ID that the building centroid is located in.
Only necessary if there are building assets in the input exposure model; with format as below:
Only required if there are building assets in the input exposure model; with format as below:
`origin_id;geometry;tile_id`
`-w` overwrite existing result file: optional
To overwrite an existing augmented exposure model file.
In order to use this program, please note that:
The spacing of the grid is 10 arc-seconds. The tile ID starts from North-West corner of the
world, moves East by row, and finishes at the South-East corner of the world. First tile ID
is 0, last tile ID is 8,398,079,999 (total number of cells is 8,398,080,000).
"""
import csv
import sys
import os
import argparse
import pandas as pd
import numpy as np
from functools import partial
from pyproj import transform, Proj
import shapely.wkt as wkt
import shapely.ops as ops
import shapely.geometry as shgeometry
def get_coordinates_from_cell_id(cell_id):
"""
Given a cell ID, it returns the coordinates that define the four corners of the cell.
"""
if cell_id < 8398080000 and cell_id > -1:
lon_edges = np.linspace(-180.0, 180.0, num=129601)
lat_edges_inv = np.linspace(-90.0, 90.0, num=64801)
cell_y = -(cell_id // 129600) + 64799
cell_x = cell_id - 8397950400 + 129600 * cell_y
lon_w = lon_edges[cell_x]
lon_e = lon_edges[cell_x + 1]
lat_s = lat_edges_inv[cell_y]
lat_n = lat_edges_inv[cell_y + 1]
else:
print("ERROR!! CELL ID " + str(cell_id) + " DOES NOT EXIST!!!")
lon_w = -999.9
lon_e = -999.9
lat_s = -999.9
lat_n = -999.9
return lon_w, lat_s, lon_e, lat_n
`-i`, `--import-search-pattern`: str, optional
Search pattern for the input exposure model files (e.g. `*.csv`).
It should follow the format below:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID,origin_id`
Only required when using the import module.
def get_geometry_WKT_of_cell(cell_id):
lon_w, lat_s, lon_e, lat_n = get_coordinates_from_cell_id(cell_id)
out_wkt = (
shgeometry.Polygon([(lon_w, lat_s), (lon_e, lat_s), (lon_e, lat_n), (lon_w, lat_n)])
).wkt
return out_wkt
`-o`, `--export-filepath`: str, optional
Defines the filepath to the output CSV exposure model file (OpenQuake compatible) with format
as below:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID, tile_id,
tile_geometry, building_id ,building_geometry`
Only required when using the export module.
`-s`, `--spatialite-extension`: str, optional
Defines the file path of the spatialite extension. (default: mod_spatialite)
def flip(x, y):
"""Flips the x and y coordinate values."""
return y, x
`-w` overwrite an existing result file: optional
To overwrite an existing exposure model file.
In order to use this program, please note:
The spacing of the grid is 10 arc-seconds. The tile ID starts from the north-west corner of the
world, moves east per row, and finishes at the south-east corner of the world. First tile ID
is 0, last tile ID is 8,398,079,999 (total number of tiles is 8,398,080,000).
def reproject_and_flip(polygon, epsg_from, epsg_to):
"""Reprojects building polygons and flips coordinates."""
transformation = partial(
transform, Proj("{}".format(epsg_from)), Proj("{}".format(epsg_to))
)
return ops.transform(flip, ops.transform(transformation, polygon))
Execution examples:
---------------
Create a database exposure model file:
python3 preprocessor.py import -d ../example_preprocessor/augmented_exposure.db
-i ../example_preprocessor/data/exposure_*.csv
-g ../example_preprocessor/data/building_to_cellid_map.csv
def preprocess(exposure_out_filepath, exposure_filepath, building_to_cell_id_filepath):
"""Creates an augmented exposure file by appending the geometries of the tiles and
buildings so that the format is changed from:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID,origin_id`
to
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID, tile_id,
tile_geometry, building_id ,building_geometry`
"""
# Read the third input only if it is given.
Create a CSV exposure model file:
python3 preprocessor.py export -d ../example_preprocessor/augmented_exposure.db
-o ../example_preprocessor/augmented_exposure.csv
if building_to_cell_id_filepath:
if os.path.isfile(building_to_cell_id_filepath):
building_to_cell_id_source = pd.read_csv(
building_to_cell_id_filepath, delimiter=";"
)
else:
print(
"ERROR: The file path corresponding to `-p`,`--buildingsGeometry`"
+ "option does not exist!"
)
"""
# Open input exposure file
in_file = open(exposure_filepath)
reader = csv.DictReader(in_file, delimiter=",")
# Open the output exposure file (the augmented exposure file to which the
# results will be written.)
out_file = open(exposure_out_filepath, mode="w")
fieldnames = [
"id",
"lon",
"lat",
"taxonomy",
"number",
"structural",
"night",
"occupancy",
"admin_name",
"admin_ID",
"tile_id",
"tile_geometry",
"building_id",
"building_geometry",
]
writer = csv.DictWriter(
out_file,
fieldnames=fieldnames,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_NONNUMERIC,
)
writer.writeheader()
import logging
import sys
import os
import argparse
import sqlite3
from database import ExposureDatabase
for asset in reader:
# Copy the asset to the output exposure (augmented exposure model).
write_asset = asset
# Changing format of the numeric columns to float instead of string.
write_asset["lon"] = float(asset["lon"])
write_asset["lat"] = float(asset["lat"])
write_asset["number"] = float(asset["number"])
write_asset["structural"] = float(asset["structural"])
write_asset["night"] = float(asset["night"])
# `originid` defines if this asset originated from a cell or from a building.
originid = asset["origin_id"]
# Checking if the asset is a cell asset. In this case `originid` is of
# format `'cell_'+number`.
if originid.startswith("cell_"):
# Because the asset is itself a cell asset, the `origin_id` is itself the `tile_id`.
tile_id = asset["origin_id"][
5:
] # Removes the first 5 characters (removing the part 'cell_').
write_asset["tile_id"] = int(tile_id)
# Computing the geometry of the tile in EPSG:4326.
write_asset["tile_geometry"] = get_geometry_WKT_of_cell(int(tile_id))
# A cell asset (residual) contains no buildings. Thus there is neither a
# `building_id` nor a `building_geometry`.
write_asset["building_id"] = -1
write_asset["building_geometry"] = "POINT EMPTY"
# Deleting the column `origin_id` because the `cell_id` is already extracted
# from it.
del write_asset["origin_id"]
writer.writerow(write_asset)
# Checking if the asset is a building asset. In this case `originid` is of format
# `'OSM_'+number`.
else:
building_id = int(asset["origin_id"][4:]) # Removes the first 4 characters.
# `tile_id` of a `building_id` is extracted from the `cell_id` column of the
# input `building_to_cell_id`.
input_tile_id = building_to_cell_id_source.loc[
building_to_cell_id_source["origin_id"] == originid, "cell_id"
].iloc[0]
# Keeping only the numeric part (removing the part 'cell_').
tile_id = input_tile_id[5:]
# Writing the `tile_id` as an int and not string.
write_asset["tile_id"] = int(tile_id)
write_asset["tile_geometry"] = get_geometry_WKT_of_cell(int(tile_id))
write_asset["building_id"] = int(building_id)
# Extracting the polygon of a `building_id` from the `geometry` column of the file
# `building_to_cell_id`. The geometries in this file are given in EPSG:3857.
polygon_in = wkt.loads(
building_to_cell_id_source.loc[
building_to_cell_id_source["origin_id"] == asset["origin_id"],
"geometry",
].iloc[0]
)
# Reprojecting the building polygon from '3857' to '4326' to match the
# `tile_geometry` projection.
write_asset["building_geometry"] = reproject_and_flip(
polygon_in, "epsg:3857", "epsg:4326"
).wkt
# Deleting the column `origin_id` because the `building_id` is already
# extracted from it.
del write_asset["origin_id"]
writer.writerow(write_asset)
in_file.close()
out_file.close()
# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))
def main():
"""
Returns an exposure model along with the asset geometries as either a CSV or database file.
"""
parser = argparse.ArgumentParser(
description="This program creates an augmented exposure file by appending ID and"
+ "geometry of both buildings and their containing tile for each assets in the given"
+ "exposure file."
description="This program imports exposure data into an exposure database "
+ "(Spatialite) from OpenQuake-style exposure data and OBM building data."
)
help_str = "'import' to import exposure data into the database, "
help_str += "'export' to export to an OpenQuake-compatible exposure CSV file"
parser.add_argument(
"-i",
"--exposure-input",
"command",
type=str,
help=help_str,
)
parser.add_argument(
"-d",
"--exposure-database",
required=True,
type=str,
help="Path to the input exposure model (Required)",
help="File path to the exposure model database file (Required)",
)
parser.add_argument(
"-i",
"--import-search-pattern",
required=False,
type=str,
help="Search pattern for the input exposure model files for importing (e.g. '*.csv')",
)
parser.add_argument(
"-o",
"--exposure-output",
required=True,
"--export-filepath",
required=False,
type=str,
help="Path to the file to write the augmented exposure model to (Required)",
help="File path to the exposure model CSV file",
)
parser.add_argument(
"-g",
"--buildingsGeometry",
"--building-geometries",
required=False,
type=str,
help="Path to the file that maps building-ids to their geometry and tile-id with the"
+ "header `origin_id;geometry;cell_id`",
help="File path of the CSV file with building geometries (in EPSG:3857) "
+ "following the format: `building_id;geometry;cell_id`",
)
parser.add_argument(
"-w",
"--overwrite",
required=False,
action="store_true",
help="Overwrite the existing augmented exposure model file",
help="Overwrite the existing exposure database file",
)
parser.add_argument(
"-s",
"--spatialite-extension",
required=False,
type=str,
default="mod_spatialite",
help="File path of the spatialite extension (default: mod_spatialite)",
)
args = parser.parse_args()
exposure_out_filepath = args.exposure_output
exposure_filepath = args.exposure_input
building_to_cell_id_filepath = args.buildingsGeometry
overwrite_result_file = args.overwrite
if os.path.exists(exposure_out_filepath):
if not overwrite_result_file:
raise ValueError(
"Result_filepath exists. Choose another file path or use "
+ "--overwrite if you want to overwrite the augmented exposure model."
)
else:
os.remove(exposure_out_filepath)
if len(sys.argv) <= 6:
print(
"warning: Because you have only",
len(sys.argv),
"inputs, there must be no building asset in the input exposure model, otherwise"
+ "you need to include the input `-g`,`--buildings-geometry`."
+ "Run `python3 preprocessor.py --help for more info. ",
)
preprocess(exposure_out_filepath, exposure_filepath, building_to_cell_id_filepath)
command = args.command
exposure_database = args.exposure_database
export_filepath = args.export_filepath
import_search_pattern = args.import_search_pattern
building_geometries_filepath = args.building_geometries
overwrite_exposure_database = args.overwrite
spatialite_filepath = args.spatialite_extension
if command == "import":
if os.path.exists(exposure_database):
if overwrite_exposure_database:
os.remove(exposure_database)
else:
logger.warning("Database exists. Exiting ...")
exit()
db = ExposureDatabase(exposure_database, spatialite_filepath)
try:
db.create_connection_and_cursor()
except sqlite3.OperationalError:
logger.warning("Spatialite extension cannot be loaded. Exiting ...")
exit()
db.import_exposure(import_search_pattern, building_geometries_filepath)
elif command == "export":
db = ExposureDatabase(exposure_database, spatialite_filepath)
try:
db.create_connection_and_cursor(init_spatial_metadata=False)
except sqlite3.OperationalError:
logger.warning("Spatialite extension cannot be loaded. Exiting ...")
exit()
db.export_exposure(export_filepath)
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment