Commit 9aec54b1 authored by Tara Evaz Zadeh's avatar Tara Evaz Zadeh
Browse files

Added preprocessor.py to create an expanded version of exposure files

parent 836f70c8
Pipeline #19961 passed with stage
in 1 minute and 32 seconds
SOURCES=losscalculator tests setup.py
SOURCES=losscalculator tests utils/preprocessor.py setup.py
LENGTH=96
check: $(SOURCES)
......
......@@ -10,6 +10,8 @@ region of interest and fragility functions that model the probability of exceedi
* `pandas`
* `numpy`
* `scipy`
* `pyproj`
* `shapely`
## Usage
......
File mode changed from 100755 to 100644
......@@ -10,7 +10,7 @@ setup(
version="0.1",
description="Compute damage-state probabilities for assets in scenario earthquakes",
license="AGPLv3+",
install_requires=["pandas", "numpy", "scipy"],
install_requires=["pandas", "numpy", "scipy", "shapely", "pyproj"],
extras_require={
"tests": tests_require,
"linters": linters_require,
......
#!/usr/bin/env python3
# Copyright (C) 2020-2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
"""
Retuns an augmented exposure file by appending the geometries of the tiles and
buildings.
Program Inputs:
---------------
`-i`, `--exposure-input`: str, required
Defines the filepath to the input exposure model with input exposure model format as below:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID,origin_id`
`-o`, `--exposure-output`: str, required
Defines the filepath to the output exposure model (augmented exposure model) with format
as below:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID, tile_id,
tile_geometry, building_id ,building_geometry`
`-g`, `--buildingsGeometry`: str, optional
Defines the path to the file that maps the building ID (`origin-id` for a building asset
in the exposure model) from which the asset is originated to the building geometry and the tile
ID that the building centroid is located in.
Only necessary if there are building assets in the input exposure model; with format as below:
`origin_id;geometry;tile_id`
`-w` overwrite existing result file: optional
To overwrite an existing augmented exposure model file.
In order to use this program, please note that:
The spacing of the grid is 10 arc-seconds. The tile ID starts from North-West corner of the
world, moves East by row, and finishes at the South-East corner of the world. First tile ID
is 0, last tile ID is 8,398,079,999 (total number of cells is 8,398,080,000).
"""
import csv
import sys
import os
import argparse
import pandas as pd
import numpy as np
from functools import partial
from pyproj import transform, Proj
import shapely.wkt as wkt
import shapely.ops as ops
import shapely.geometry as shgeometry
def get_coordinates_from_cell_id(cell_id):
"""
Given a cell ID, it returns the coordinates that define the four corners of the cell.
"""
if cell_id < 8398080000 and cell_id > -1:
lon_edges = np.linspace(-180.0, 180.0, num=129601)
lat_edges_inv = np.linspace(-90.0, 90.0, num=64801)
cell_y = -(cell_id // 129600) + 64799
cell_x = cell_id - 8397950400 + 129600 * cell_y
lon_w = lon_edges[cell_x]
lon_e = lon_edges[cell_x + 1]
lat_s = lat_edges_inv[cell_y]
lat_n = lat_edges_inv[cell_y + 1]
else:
print("ERROR!! CELL ID " + str(cell_id) + " DOES NOT EXIST!!!")
lon_w = -999.9
lon_e = -999.9
lat_s = -999.9
lat_n = -999.9
return lon_w, lat_s, lon_e, lat_n
def get_geometry_WKT_of_cell(cell_id):
lon_w, lat_s, lon_e, lat_n = get_coordinates_from_cell_id(cell_id)
out_wkt = (
shgeometry.Polygon([(lon_w, lat_s), (lon_e, lat_s), (lon_e, lat_n), (lon_w, lat_n)])
).wkt
return out_wkt
def flip(x, y):
"""Flips the x and y coordinate values."""
return y, x
def reproject_and_flip(polygon, epsg_from, epsg_to):
"""Reprojects building polygons and flips coordinates."""
transformation = partial(
transform, Proj("{}".format(epsg_from)), Proj("{}".format(epsg_to))
)
return ops.transform(flip, ops.transform(transformation, polygon))
def preprocess(exposure_out_filepath, exposure_filepath, building_to_cell_id_filepath):
"""Creates an augmented exposure file by appending the geometries of the tiles and
buildings so that the format is changed from:
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID,origin_id`
to
`id,lon,lat,taxonomy,number,structural,night,occupancy,admin_name,admin_ID, tile_id,
tile_geometry, building_id ,building_geometry`
"""
# Read the third input only if it is given.
if building_to_cell_id_filepath:
if os.path.isfile(building_to_cell_id_filepath):
building_to_cell_id_source = pd.read_csv(
building_to_cell_id_filepath, delimiter=";"
)
else:
print(
"ERROR: The file path corresponding to `-p`,`--buildingsGeometry`"
+ "option does not exist!"
)
# Open input exposure file
in_file = open(exposure_filepath)
reader = csv.DictReader(in_file, delimiter=",")
# Open the output exposure file (the augmented exposure file to which the
# results will be written.)
out_file = open(exposure_out_filepath, mode="w")
fieldnames = [
"id",
"lon",
"lat",
"taxonomy",
"number",
"structural",
"night",
"occupancy",
"admin_name",
"admin_ID",
"tile_id",
"tile_geometry",
"building_id",
"building_geometry",
]
writer = csv.DictWriter(
out_file,
fieldnames=fieldnames,
delimiter=",",
quotechar='"',
quoting=csv.QUOTE_NONNUMERIC,
)
writer.writeheader()
for asset in reader:
# Copy the asset to the output exposure (augmented exposure model).
write_asset = asset
# Changing format of the numeric columns to float instead of string.
write_asset["lon"] = float(asset["lon"])
write_asset["lat"] = float(asset["lat"])
write_asset["number"] = float(asset["number"])
write_asset["structural"] = float(asset["structural"])
write_asset["night"] = float(asset["night"])
# `originid` defines if this asset originated from a cell or from a building.
originid = asset["origin_id"]
# Checking if the asset is a cell asset. In this case `originid` is of
# format `'cell_'+number`.
if originid.startswith("cell_"):
# Because the asset is itself a cell asset, the `origin_id` is itself the `tile_id`.
tile_id = asset["origin_id"][
5:
] # Removes the first 5 characters (removing the part 'cell_').
write_asset["tile_id"] = int(tile_id)
# Computing the geometry of the tile in EPSG:4326.
write_asset["tile_geometry"] = get_geometry_WKT_of_cell(int(tile_id))
# A cell asset (residual) contains no buildings. Thus there is neither a
# `building_id` nor a `building_geometry`.
write_asset["building_id"] = -1
write_asset["building_geometry"] = "POINT EMPTY"
# Deleting the column `origin_id` because the `cell_id` is already extracted
# from it.
del write_asset["origin_id"]
writer.writerow(write_asset)
# Checking if the asset is a building asset. In this case `originid` is of format
# `'OSM_'+number`.
else:
building_id = int(asset["origin_id"][4:]) # Removes the first 4 characters.
# `tile_id` of a `building_id` is extracted from the `cell_id` column of the
# input `building_to_cell_id`.
input_tile_id = building_to_cell_id_source.loc[
building_to_cell_id_source["origin_id"] == originid, "cell_id"
].iloc[0]
# Keeping only the numeric part (removing the part 'cell_').
tile_id = input_tile_id[5:]
# Writing the `tile_id` as an int and not string.
write_asset["tile_id"] = int(tile_id)
write_asset["tile_geometry"] = get_geometry_WKT_of_cell(int(tile_id))
write_asset["building_id"] = int(building_id)
# Extracting the polygon of a `building_id` from the `geometry` column of the file
# `building_to_cell_id`. The geometries in this file are given in EPSG:3857.
polygon_in = wkt.loads(
building_to_cell_id_source.loc[
building_to_cell_id_source["origin_id"] == asset["origin_id"],
"geometry",
].iloc[0]
)
# Reprojecting the building polygon from '3857' to '4326' to match the
# `tile_geometry` projection.
write_asset["building_geometry"] = reproject_and_flip(
polygon_in, "epsg:3857", "epsg:4326"
).wkt
# Deleting the column `origin_id` because the `building_id` is already
# extracted from it.
del write_asset["origin_id"]
writer.writerow(write_asset)
in_file.close()
out_file.close()
def main():
parser = argparse.ArgumentParser(
description="This program creates an augmented exposure file by appending ID and"
+ "geometry of both buildings and their containing tile for each assets in the given"
+ "exposure file."
)
parser.add_argument(
"-i",
"--exposure-input",
required=True,
type=str,
help="Path to the input exposure model (Required)",
)
parser.add_argument(
"-o",
"--exposure-output",
required=True,
type=str,
help="Path to the file to write the augmented exposure model to (Required)",
)
parser.add_argument(
"-g",
"--buildingsGeometry",
required=False,
type=str,
help="Path to the file that maps building-ids to their geometry and tile-id with the"
+ "header `origin_id;geometry;cell_id`",
)
parser.add_argument(
"-w",
"--overwrite",
required=False,
action="store_true",
help="Overwrite the existing augmented exposure model file",
)
args = parser.parse_args()
exposure_out_filepath = args.exposure_output
exposure_filepath = args.exposure_input
building_to_cell_id_filepath = args.buildingsGeometry
overwrite_result_file = args.overwrite
if os.path.exists(exposure_out_filepath):
if not overwrite_result_file:
raise ValueError(
"Result_filepath exists. Choose another file path or use "
+ "--overwrite if you want to overwrite the augmented exposure model."
)
else:
os.remove(exposure_out_filepath)
if len(sys.argv) <= 6:
print(
"warning: Because you have only",
len(sys.argv),
"inputs, there must be no building asset in the input exposure model, otherwise"
+ "you need to include the input `-g`,`--buildings-geometry`."
+ "Run `python3 preprocessor.py --help for more info. ",
)
preprocess(exposure_out_filepath, exposure_filepath, building_to_cell_id_filepath)
main()
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment