Commit aed66ad5 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

Added initial functions to create cells around SERA industrial points

parent 501c80ab
"""
Copyright (C) 2021
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
GDE_TOOLS_create_industrial_cells
=================================
These functions are used to handle the fact that industrial exposure may be defined in 30-arcsec
cells instead of administrative units in the SERA exposure model. The SERA model only provides
the centroids of those cells with a certain decimal precision. These functions create the
geometry of the corresponding cells, making sure there are no gaps or overlaps in the resulting
cells (due to numerical imprecisions).
"""
import os
from copy import deepcopy
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
def define_corners(centr_lon, centr_lat, width_EW, width_NS):
"""This function returns the coordinates that define the borders of a square whose centroid
is given by (centr_lon, centr_lat) and the length of whose sides is given by width_EW and
width_NS.
If values width_EW or width_NS are not larger than zero, this function returns all four
output parameters equal to 999.9.
Args:
centr_lon (float): Longitude of centroid, in degrees.
centr_lat (float): Latitude of centroid, in degrees.
width_EW (float): Width of the cell in the east-west direction, in degrees, >0.
width_NS (float): Width of the cell in the north-south direction, in degrees, >0.
Returns:
lon_w (float): Longitude of the west border of the cell, in degrees.
lat_s (float): Latitude of the south border of the cell, in degrees.
lon_e (float): Longitude of the east border of the cell, in degrees.
lat_n (float): Latitude of the north border of the cell, in degrees.
"""
if (width_EW <= 0.0) or (width_NS <= 0.0):
print("ERROR in define_corners: width_EW and width_NS need to be positive numbers >=0")
return 999.9, 999.9, 999.9, 999.9
lon_w = centr_lon - width_EW / 2.0
lat_s = centr_lat - width_NS / 2.0
lon_e = centr_lon + width_EW / 2.0
lat_n = centr_lat + width_NS / 2.0
return lon_w, lat_s, lon_e, lat_n
def define_cell_polygon(lon_w, lat_s, lon_e, lat_n):
"""This function defines a rectangle from the four coordinates given as input.
Args:
lon_w (float): Longitude of the west border of the cell, in degrees.
lat_s (float): Latitude of the south border of the cell, in degrees.
lon_e (float): Longitude of the east border of the cell, in degrees.
lat_n (float): Latitude of the north border of the cell, in degrees.
Returns:
Shapely polygon defined by the four coordinates given as input (rectangle).
"""
return Polygon([(lon_w, lat_s), (lon_e, lat_s), (lon_e, lat_n), (lon_w, lat_n)])
def define_cells_in_dataframe(in_df, col_lon, col_lat, width_EW, width_NS, in_crs="EPSG:4326"):
"""This function defines cell geometries around input centroids. These geometries are
rectangular polygons, with side lengths defined by the parameters width_EW and width_NS.
The input centroids are given in in_df.
If values width_EW or width_NS are not larger than zero, this function returns the original
in_df.
Args:
in_df (Pandas DataFrame): Centroids around which the cells will be defined.
It must contain a column with longitudes and
another one with latitudes.
col_lon (str): Name of the column that contains the longitudes.
col_lat (str): Name of the column that contains the latitudes.
width_EW (float): Width of the cell in the east-west direction, in degrees, >0.
width_NS (float): Width of the cell in the north-south direction, in degrees, >0.
in_crs (str): CRS of the data (default="EPSG:4326").
Returns:
out_gdf: A GeoPandas DataFrame containing the same columns as in_df, plus
the geometry of cells defined around the centroids. Number of rows
equal to the number of rows in in_df.
"""
# Return the input in_df if width_EW or width_NS are not valid:
if (width_EW <= 0.0) or (width_NS <= 0.0):
print("ERROR in define_corners: with_EW and width_NS need to be positive numbers >=0")
return in_df
cells_geom = [] # initialise geometry list
cells_lon_w = np.zeros([in_df.shape[0]]) # initialise array for lon_w
cells_lat_s = np.zeros([in_df.shape[0]]) # initialise array for lat_s
cells_lon_e = np.zeros([in_df.shape[0]]) # initialise array for lon_e
cells_lat_n = np.zeros([in_df.shape[0]]) # initialise array for lat_n
for i in range(in_df.shape[0]): # go one by one the rows of in_df
# Define the corners of the cell around the centroid:
lon_w, lat_s, lon_e, lat_n = define_corners(
in_df[col_lon].values[i], in_df[col_lat].values[i], width_EW, width_NS
)
# Define the cell polygon from the coordinates of the corners:
cells_geom.append(define_cell_polygon(lon_w, lat_s, lon_e, lat_n))
# Keep values of the corners (lon lat that define the cell):
cells_lon_w[i] = lon_w
cells_lat_s[i] = lat_s
cells_lon_e[i] = lon_e
cells_lat_n[i] = lat_n
# Turn in_df into a GeoDataFrame with the geometry just defined:
out_gdf = gpd.GeoDataFrame(deepcopy(in_df), geometry=cells_geom, crs=in_crs)
# Incorporate values of the corners (lon lat that define the cell):
out_gdf["lon_w"] = cells_lon_w
out_gdf["lat_s"] = cells_lat_s
out_gdf["lon_e"] = cells_lon_e
out_gdf["lat_n"] = cells_lat_n
return out_gdf
def retrieve_unique_points(in_df, col_lon, col_lat, id_str, precision=4, in_crs="EPSG:4326"):
"""This function retrieves all the unique points present in in_df. in_df may contain several
rows of data for the same location (lon, lat). This function retrieves all locations that
are different from each other. The output thus contains the same or fewer rows than the
input. The decision of whether two locations are different or the same is made using the
number of decimal places indicated in precision (default=4).
Args:
in_df (Pandas DataFrame): DataFrame that contains a column with longitudes (labelled
"col_lon") and a column with latitudes (labelled "col_lat"),
both in CRS as per in_crs. All other columns of this
DataFrame get ignored.
col_lon (str): Name of the column in in_df that contains longitudes.
col_lat (str): Name of the column in in_df that contains latitudes.
id_str (str): Prefix of the ID to be used for each point.
precision (int): Number of decimal places to be used to determine unique points
(default=4).
in_crs (str): CRS of the data (default="EPSG:4326").
Returns:
out_gpd (GeoPandas GeoDataFrame): GeoDataFrame with all unique points present in in_df.
Columns:
id: ID of the point, generated with id_str and
an increasing counter.
lon: Longitude of the point, with precision.
lat: Latitude of the point, with precision.
geometry: Shapely Points of (lon, lat).
CRS: as per in_crs.
ids_of_unique (array of str): IDs of the unique points (those of out_gpd["id"])
associated with each row of in_df.
"""
# Concatenate longitude and latitude as strings to identify unique points with a precision:
pr_str = "{:.%sf}" % (precision)
all_pts_str = np.array(
[
"%s_%s"
% (
pr_str.format(in_df[col_lon].values[i]),
pr_str.format(in_df[col_lat].values[i])
)
for i in range(in_df.shape[0])
]
)
unique_pts_str, unique_inv = np.unique(all_pts_str, return_inverse=True) # unique points
# Separate unique points once again into longitude and latitude:
unique_pts_x = np.array(
[float(unique_pts_str[i].split("_")[0]) for i in range(unique_pts_str.shape[0])]
)
unique_pts_y = np.array(
[float(unique_pts_str[i].split("_")[1]) for i in range(unique_pts_str.shape[0])]
)
# Assign IDs to each unique point
ids = np.array(["%s_%s" % (id_str, i) for i in range(unique_pts_str.shape[0])])
# Build output GeoPandas DataFrame:
d = {
"id": ids,
col_lon: unique_pts_x,
col_lat: unique_pts_y,
"geometry": gpd.points_from_xy(unique_pts_x, unique_pts_y),
}
out_gpd = gpd.GeoDataFrame(d, crs=in_crs)
# Array of IDs of unique points associated with each row of in_df:
ids_of_unique = ids[unique_inv]
return out_gpd, ids_of_unique
numpy >= 1.13.3
pandas >= 0.22.0
geopandas >= 0.7.0
psycopg2 >= 2.8.5
Shapely >= 1.6.4
matplotlib >= 2.1.1
h5py >= 2.7.1
iso3166 >= 1.0.1
pyproj >= 1.9.5.1
case,width_EW,width_NS,centr_lon,centr_lat,res_lon_w,res_lat_s,res_lon_e,res_lat_n
1,0.00833333,0.00833333,25,36.8,24.99583334,36.79583334,25.00416667,36.80416667
1,0.00833333,0.00833333,25,-47.2,24.99583334,-47.20416667,25.00416667,-47.19583334
1,0.00833333,0.00833333,25,0,24.99583334,-0.004166665,25.00416667,0.004166665
1,0.00833333,0.00833333,-12.3,36.8,-12.30416667,36.79583334,-12.29583334,36.80416667
1,0.00833333,0.00833333,-12.3,-47.2,-12.30416667,-47.20416667,-12.29583334,-47.19583334
1,0.00833333,0.00833333,-12.3,0,-12.30416667,-0.004166665,-12.29583334,0.004166665
1,0.00833333,0.00833333,0,36.8,-0.004166665,36.79583334,0.004166665,36.80416667
1,0.00833333,0.00833333,0,-47.2,-0.004166665,-47.20416667,0.004166665,-47.19583334
1,0.00833333,0.00833333,0,0,-0.004166665,-0.004166665,0.004166665,0.004166665
2,0.00833333,0,25,36.8,999.9,999.9,999.9,999.9
2,0.00833333,0,25,-47.2,999.9,999.9,999.9,999.9
2,0.00833333,0,25,0,999.9,999.9,999.9,999.9
2,0.00833333,0,-12.3,36.8,999.9,999.9,999.9,999.9
2,0.00833333,0,-12.3,-47.2,999.9,999.9,999.9,999.9
2,0.00833333,0,-12.3,0,999.9,999.9,999.9,999.9
2,0.00833333,0,0,36.8,999.9,999.9,999.9,999.9
2,0.00833333,0,0,-47.2,999.9,999.9,999.9,999.9
2,0.00833333,0,0,0,999.9,999.9,999.9,999.9
3,0.00416667,0.00833333,25,36.8,24.99791667,36.79583334,25.00208334,36.80416667
3,0.00416667,0.00833333,25,-47.2,24.99791667,-47.20416667,25.00208334,-47.19583334
3,0.00416667,0.00833333,25,0,24.99791667,-0.004166665,25.00208334,0.004166665
3,0.00416667,0.00833333,-12.3,36.8,-12.30208334,36.79583334,-12.29791667,36.80416667
3,0.00416667,0.00833333,-12.3,-47.2,-12.30208334,-47.20416667,-12.29791667,-47.19583334
3,0.00416667,0.00833333,-12.3,0,-12.30208334,-0.004166665,-12.29791667,0.004166665
3,0.00416667,0.00833333,0,36.8,-0.002083335,36.79583334,0.002083335,36.80416667
3,0.00416667,0.00833333,0,-47.2,-0.002083335,-47.20416667,0.002083335,-47.19583334
3,0.00416667,0.00833333,0,0,-0.002083335,-0.004166665,0.002083335,0.004166665
4,0.00416667,0,25,36.8,999.9,999.9,999.9,999.9
4,0.00416667,0,25,-47.2,999.9,999.9,999.9,999.9
4,0.00416667,0,25,0,999.9,999.9,999.9,999.9
4,0.00416667,0,-12.3,36.8,999.9,999.9,999.9,999.9
4,0.00416667,0,-12.3,-47.2,999.9,999.9,999.9,999.9
4,0.00416667,0,-12.3,0,999.9,999.9,999.9,999.9
4,0.00416667,0,0,36.8,999.9,999.9,999.9,999.9
4,0.00416667,0,0,-47.2,999.9,999.9,999.9,999.9
4,0.00416667,0,0,0,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,25,36.8,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,25,-47.2,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,25,0,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,-12.3,36.8,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,-12.3,-47.2,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,-12.3,0,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,0,36.8,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,0,-47.2,999.9,999.9,999.9,999.9
5,-0.005,0.00833333,0,0,999.9,999.9,999.9,999.9
6,-0.005,0,25,36.8,999.9,999.9,999.9,999.9
6,-0.005,0,25,-47.2,999.9,999.9,999.9,999.9
6,-0.005,0,25,0,999.9,999.9,999.9,999.9
6,-0.005,0,-12.3,36.8,999.9,999.9,999.9,999.9
6,-0.005,0,-12.3,-47.2,999.9,999.9,999.9,999.9
6,-0.005,0,-12.3,0,999.9,999.9,999.9,999.9
6,-0.005,0,0,36.8,999.9,999.9,999.9,999.9
6,-0.005,0,0,-47.2,999.9,999.9,999.9,999.9
6,-0.005,0,0,0,999.9,999.9,999.9,999.9
"""
Copyright (C) 2021
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
test_GDE_TOOLS_create_industrial_cells
======================================
This code contains tests to test GDE_TOOLS_create_industrial_cells.py
"""
import os
from copy import deepcopy
import numpy as np
import pandas as pd
import geopandas as gpd
from shapely.geometry import Polygon
import GDE_TOOLS_create_industrial_cells as gdet_cr_ind
def test_define_corners():
"""The test reads both inputs and expected outputs from a datafile that was generated using
a spreadsheet.
Tolerance used for difference in results (subtraction) = 1e-08 (the number of decimal places
used in the test dataset).
"""
# Read test data:
pathname = os.path.join(
os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells"
)
test_data = pd.read_csv(os.path.join(pathname, "test_data_01.csv"), sep=",")
# Initialise arrays to store locally-calculated results:
local_res_lon_w = np.zeros([test_data.shape[0]])
local_res_lat_s = np.zeros([test_data.shape[0]])
local_res_lon_e = np.zeros([test_data.shape[0]])
local_res_lat_n = np.zeros([test_data.shape[0]])
# Calculate results using the function being tested:
for i in range(test_data.shape[0]):
centr_lon_i = test_data["centr_lon"].values[i]
centr_lat_i = test_data["centr_lat"].values[i]
width_EW_i = test_data["width_EW"].values[i]
width_NS_i = test_data["width_NS"].values[i]
lon_w, lat_s, lon_e, lat_n = gdet_cr_ind.define_corners(
centr_lon_i, centr_lat_i, width_EW_i, width_NS_i
)
local_res_lon_w[i] = lon_w
local_res_lat_s[i] = lat_s
local_res_lon_e[i] = lon_e
local_res_lat_n[i] = lat_n
# Compare locally-calculated results with those of the test data file:
np.testing.assert_allclose(
local_res_lon_w, test_data["res_lon_w"].values, rtol=0.0, atol=1e-08
)
np.testing.assert_allclose(
local_res_lat_s, test_data["res_lat_s"].values, rtol=0.0, atol=1e-08
)
np.testing.assert_allclose(
local_res_lon_e, test_data["res_lon_e"].values, rtol=0.0, atol=1e-08
)
np.testing.assert_allclose(
local_res_lat_n, test_data["res_lat_n"].values, rtol=0.0, atol=1e-08
)
def test_define_cells_in_dataframe():
"""The core functionality of the define_cells_in_dataframe() is calling functions
define_corners() and define_cell_polygon(). The former has its own test, the latter only
passes arguments to generate a Shapely Polygon.
This test uses the same datafile as the test for define_corners(), which was generated using
a spreadsheet.
"""
# Read test data:
pathname = os.path.join(
os.path.dirname(__file__), "data", "GDE_TOOLS_create_industrial_cells"
)
test_data = pd.read_csv(os.path.join(pathname, "test_data_01.csv"), sep=",")
# Parameters needed:
col_lon = "lon"
col_lat = "lat"
# The input data needs to be grouped into segments for this purpose, each segment with a
# particular combination of width_EW and width_NS (given by "case" column in test_data):
segments = np.unique(test_data["case"].values)
for segment in segments: # segments of the test data
# Create Pandas DataFrames with the test data, to be fed to the function to test:
which = np.where(test_data["case"].values == segment)[0]
d = {
"id": np.array(["ID_%s" % (i) for i in range(len(which))]),
col_lon: test_data["centr_lon"].values[which],
col_lat: test_data["centr_lat"].values[which],
"geometry": gpd.points_from_xy(
test_data["centr_lon"].values[which], test_data["centr_lat"].values[which]
),
}
test_df = gpd.GeoDataFrame(d, geometry="geometry", crs="EPSG:4326")
width_EW = test_data["width_EW"][which].values[0]
width_NS = test_data["width_NS"][which].values[0]
# Call function to test:
local_out = gdet_cr_ind.define_cells_in_dataframe(
test_df, col_lon, col_lat, width_EW, width_NS
)
# The testing is different depending on this condition:
if (width_EW <= 0.0) or (width_NS <= 0.0): # the output should be the same as the input
assert test_df.shape == local_out.shape
assert np.all(test_df.columns == local_out.columns)
np.testing.assert_allclose(
test_df["lon"].values, local_out["lon"].values, rtol=0.0, atol=1e-08
)
np.testing.assert_allclose(
test_df["lat"].values, local_out["lat"].values, rtol=0.0, atol=1e-08
)
else: # the output has extra columns with respect to the input test_df
np.testing.assert_allclose(
local_out["lon_w"].values,
test_data["res_lon_w"][which].values,
rtol=0.0,
atol=1e-08,
)
np.testing.assert_allclose(
local_out["lat_s"].values,
test_data["res_lat_s"][which].values,
rtol=0.0,
atol=1e-08,
)
np.testing.assert_allclose(
local_out["lon_e"].values,
test_data["res_lon_e"][which].values,
rtol=0.0,
atol=1e-08,
)
np.testing.assert_allclose(
local_out["lat_n"].values,
test_data["res_lat_n"][which].values,
rtol=0.0,
atol=1e-08,
)
for i in range(local_out.shape[0]):
res_bounds = np.array(
[
test_data["res_lon_w"][which].values[i],
test_data["res_lat_s"][which].values[i],
test_data["res_lon_e"][which].values[i],
test_data["res_lat_n"][which].values[i],
]
)
np.testing.assert_allclose(
np.asarray(local_out["geometry"].values[i].bounds),
res_bounds,
rtol=0.0,
atol=1e-08,
)
def test_retrieve_unique_points():
"""The function retrieve_unique_points() is tested herein using input data generated on the fly
when running the test. The key is to have a DataFrame with points (lon, lat) that repeat
themselves in several entries (=rows) of the DataFrame, hence the strategy of defining a
limited number of coordinates first and combining them to generate the DataFrame.
The test uses two alternative values of the precision parameter (precision_vals). The checks
are the same for both cases. The resulting length of the output DataFrame is verified
against values determined manually (expected_output_length).
The comparison of the final values of longitude and latitude assigned to the unique points
is carried out going by element of the original input DataFrame, which ends up linked to the
output DataFrame through function_ids_of_unique, which is an array of IDs of unique points
(those of the output DataFrame) associated with each row of the input DataFrame.
"""
# Build a Pandas DataFrame for testing:
col_lon = "LON"
col_lat = "LAT"
lon1 = 23.78386
lon2 = 23.78382
lon3 = 25.12981
lat1 = 35.46778
lat2 = 35.46772
lat3 = 37.92617
d = {
col_lon: [lon1, lon1, lon2, lon2, lon1, lon3, lon2, lon1, lon2],
col_lat: [lat1, lat1, lat2, lat2, lat1, lat3, lat2, lat3, lat1],
}
dummy_df = pd.DataFrame(d)
# Define in_crs, id_str:
in_crs = "EPSG:4326"
id_str = "IND"
# Two cases of precision will be tested:
precision_vals = [4, 3]
expected_output_length = [5, 3] # results in correspondance with precision_vals
# Run the checks:
for k in range(len(precision_vals)):
res = gdet_cr_ind.retrieve_unique_points(
dummy_df, col_lon, col_lat, id_str, precision=precision_vals[k], in_crs=in_crs
)
function_out_gpd, function_ids_of_unique = res
assert len(function_ids_of_unique) == dummy_df.shape[0]
# Number of unique points in dummy_df (as a function of the precision):
assert function_out_gpd.shape[0] == expected_output_length[k]
# Compare the original values of (lon, lat) against those that get assigned according
# to the output, considering the precision used:
for i, cell_id in enumerate(function_ids_of_unique):
orig_lon = dummy_df[col_lon].values[i]
orig_lat = dummy_df[col_lat].values[i]
which_row = np.where(function_out_gpd["id"] == cell_id)[0][0]
function_lon = function_out_gpd[col_lon].values[which_row]
function_lat = function_out_gpd[col_lat].values[which_row]
assert round(orig_lon, precision_vals[k]) == round(function_lon, precision_vals[k])
assert round(orig_lat, precision_vals[k]) == round(function_lat, precision_vals[k])
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment