Commit ac109b56 authored by Cecilia Nievas's avatar Cecilia Nievas
Browse files

New feature to generate GDE tiles as HDF5 files as output of...

New feature to generate GDE tiles as HDF5 files as output of GDE_gather_SERA_and_OBM, as before the output was just OpenQuake input files and two CSV files with a summary of the models to be loaded to QGIS
parent 0b89ddaf
......@@ -77,12 +77,16 @@ def make_list_country_admin_units_to_retrieve(admin_unit_type, list_subgroups, w
def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''):
def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, countries_shp_path, sera_meta_path, gral_output_path, gde_tile_filename, occu_case, admin_unit_type= 'all', disagg_method= 'area', which_country= '', which_admin_ID= ''):
"""
obm_hdf5_filename: full file path to the OBM HDF5 file for a particular cell and occupancy case
bdg_classes_hdf5_filename: full file path to the HDF5 file of the SERA building classes for a particular occupancy case
countries_shp_path: full file path to the folder containing all shapefiles of admin units used in SERA
sera_meta_path: full file path to the HDF5 file with the SERA metadata
gral_output_path: path to the general output directory (gde_tile_filename will be written in gral_output_path/GDE_tiles/gde_tile_filename
gde_tile_filename: filename of the output GDE tile HDF5 file (of the kind GDE_cell_XXXXX_disagg_method.hdf5).
gde_tile_filename will only be created/written to if obm_hdf5_filename exists, i.e. if there exist OBM buildings
in this cell
occu_case: Res, Com, Ind, Oth
admin_unit_type:
"all" = all admin units within the cell
......@@ -92,7 +96,6 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
(i.e. method used to distribute SERA to a grid)
which_country: only needed if admin_unit_type is "country" or "admin_unit"; full country name
which_admin_ID: only needed if admin_unit_type is "admin_unit"
Output:
- out_dict: dictionary with three sub-keys:
- DF: Pandas DataFrame with the OBM buildings in this cell (according to admin_unit_type). Its
......@@ -130,6 +133,7 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
file_found= True
except:
file_found= False
gdet_gral.write_OBM_to_GDE_tiles_initialisation(obm_hdf5_filename, gral_output_path, gde_tile_filename, occu_case, disagg_method)
if file_found:
list_country_admin_units= make_list_country_admin_units_to_retrieve(admin_unit_type, list(fle), which_country=which_country, which_admin_ID=which_admin_ID)
obm_without_classes_num_bdgs_per_adm_unit= np.zeros([len(list_country_admin_units)])
......@@ -179,7 +183,7 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
dwell_per_bdg= np.zeros([len(all_sera_classes)])
area_per_dwelling_sqm= np.zeros([len(all_sera_classes)])
cost_per_area_usd= np.zeros([len(all_sera_classes)])
ppl_per_dwell= np.zeros([len(all_sera_classes)])
ppl_per_dwell= np.zeros([len(all_sera_classes)])
for k, bdg_class in enumerate(all_sera_classes):
osm_ids.append('OSM_'+str(osm_id))
taxonomy_stars.append(bdg_class)
......@@ -194,6 +198,15 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
area_per_dwelling_sqm[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='area_per_dwelling_sqm')[0][0]].split('_')[-1])]
cost_per_area_usd[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='cost_per_area_usd')[0][0]].split('_')[-1])]
ppl_per_dwell[k]= parameter_vals[which_row_admin_id, int(col_names[np.where(np.array(col_contents)=='ppl_per_dwell')[0][0]].split('_')[-1])]
# For adding values of people and cost per building to the GDE tile HDF5 file:
ppl_per_bdg= dwell_per_bdg * ppl_per_dwell # 1D array of len(all_sera_classes)
cost_per_bdg_usd= dwell_per_bdg * area_per_dwelling_sqm * cost_per_area_usd # 1D array of len(all_sera_classes)
weighted_av_ppl= (ppl_per_bdg * all_sera_proportions[:,0]).sum()
weighted_av_cost= (cost_per_bdg_usd * all_sera_proportions[:,0]).sum()
error_writing_gde_tile= gdet_gral.write_OBM_to_GDE_tiles_add_params(gral_output_path, gde_tile_filename, occu_case, disagg_method, osm_id, country_adminID, all_sera_classes, ppl_per_bdg, cost_per_bdg_usd, weighted_av_ppl, weighted_av_cost, 'USD')
if error_writing_gde_tile!='':
error_messages= error_messages+' '+error_writing_gde_tile
# For building the DataFrame to output:
num_bdgs= np.hstack([num_bdgs, all_sera_proportions[:,0]])
num_dwells_local= all_sera_proportions[:,0] * dwell_per_bdg
num_dwells= np.hstack([num_dwells, num_dwells_local])
......@@ -244,38 +257,6 @@ def retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5
error_messages= error_messages+ ' ERROR IN retrieve_OBM_buildings_in_cell_from_hdf5: occu_case NOT RECOGNISED'
fle.close()
return out_dict, list_country_admin_units, obm_without_classes_num_bdgs_per_adm_unit, list_country_admin_units_levels, error_messages
\ No newline at end of file
......@@ -31,7 +31,6 @@ import h5py
import numpy as np
import GDE_TOOLS_general as gdet_gral
import GDE_TOOLS_world_grid as gdet_grid
import pdb
def verify_paths_are_given(gral_path, hdf5_filename, in_case, distrib_method):
......
This diff is collapsed.
......@@ -242,6 +242,25 @@ def check_parameters(config, section_name):
raise IOError('ERROR!! countries PARAMETER MISSING FROM CONFIG FILE!!')
check_of_sera_disaggregation_to_consider_parameter(config, section_name)
_= check_of_occupancy_cases_parameter(config, section_name)
elif section_name=='GDE_check_tiles_vs_visual_CSVs':
_= check_of_occupancy_cases_parameter(config, section_name)
if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'path_GDE_tiles'):
raise IOError('ERROR!! path_GDE_tiles PARAMETER MISSING FROM CONFIG FILE!!')
if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'path_visual_csv'):
raise IOError('ERROR!! path_visual_csv PARAMETER MISSING FROM CONFIG FILE!!')
else:
if config['GDE_check_tiles_vs_visual_CSVs']['path_visual_csv'].split('.')[-1]!='csv':
raise IOError('ERROR!! path_visual_csv IN CONFIG FILE IS MEANT TO BE A .csv FILE!!')
if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'decimal_places_gral'):
raise IOError('ERROR!! decimal_places_gral PARAMETER MISSING FROM CONFIG FILE!!')
else:
if not config[section_name]['decimal_places_gral'].isnumeric():
raise IOError('ERROR!! decimal_places_gral NEEDS TO BE A POSITIVE INTEGER!!')
if not config.has_option('GDE_check_tiles_vs_visual_CSVs', 'decimal_places_costs'):
raise IOError('ERROR!! decimal_places_costs PARAMETER MISSING FROM CONFIG FILE!!')
else:
if not config[section_name]['decimal_places_costs'].isnumeric():
raise IOError('ERROR!! decimal_places_costs NEEDS TO BE A POSITIVE INTEGER!!')
else:
raise IOError('ERROR IN check_parameters(): '+section_name+' NOT RECOGNISED!!')
......
"""
Copyright (C) 2020
Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU Affero General Public License as
published by the Free Software Foundation, either version 3 of the
License, or (at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU Affero General Public License for more details.
You should have received a copy of the GNU Affero General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
Global Dynamic Exposure Model
Helmholtz Centre Potsdam
GFZ German Research Centre for Geosciences
Section 2.6: Seismic Hazard and Risk Dynamics
GDE_check_tiles_vs_visual_CSVs
==============================
This code reads the visual CSV output by cell and the corresponding GDE tiles HDF5 files
and compares the number of buildings, cost and number of people in each cell according
to each of the two. An output CSV file collects the discrepancies found, if any.
Note that the code goes through each cell ID in the CSV output and attemps to open the
corresponding HDF5 file. No check is carried out to see if there is any HDF5 file wi
"""
import datetime
import os
import sys
import h5py
import pandas as pd
import numpy as np
import GDE_TOOLS_read_config_file as gdet_conf
def run_this_file(config_dict):
####################################################
# READ CONFIGURATION PARAMETERS
####################################################
print('Processing configuration parameters...')
# Path for output:
out_path= config_dict['File Paths']['out_path']
# Path to the GDE tiles HDF5 files to consider:
path_GDE_tiles = config_dict['GDE_check_tiles_vs_visual_CSVs']['path_GDE_tiles']
# Path to the by-cell visual output CSV file to consider:
path_visual_csv = config_dict['GDE_check_tiles_vs_visual_CSVs']['path_visual_csv']
# Occupancy cases and subclassifications to consider:
occupancy_cases= config_dict['GDE_check_tiles_vs_visual_CSVs']['occupancy_cases'].split(', ')
# Decimal places to use for the comparison
decimal_places_gral= int(config_dict['GDE_check_tiles_vs_visual_CSVs']['decimal_places_gral'])
decimal_places_costs= int(config_dict['GDE_check_tiles_vs_visual_CSVs']['decimal_places_costs'])
####################################################
# START
####################################################
# String with present time to be used for naming the output TXT file:
now= datetime.datetime.now()
now_str= str(now.year)+'_'+str(now.month).zfill(2)+'_'+str(now.day).zfill(2)+'_'+str(now.hour).zfill(2)+'_'+str(now.minute).zfill(2)+'_'+str(now.second).zfill(2)
# Read IDs of all cells for which there are HDF5 files in the directory path_GDE_tiles:
list_GDE_tiles= []
for file in os.listdir(path_GDE_tiles):
if file.endswith(".hdf5"):
list_GDE_tiles.append(file.split('.')[0].split('_')[-1])
problems_found= []
# Load the visual CSV file:
visual_cells= pd.read_csv(path_visual_csv, sep=';')
for i, cell_id in enumerate(visual_cells['cell_id'].values):
print('\r Working on cell ID '+str(cell_id)+'. Cell '+str(i+1)+' of '+str(visual_cells.shape[0])+'.', end='')
# Open the GDE tile HDF5 file associated with this cell:
try:
fle= h5py.File(os.path.join(path_GDE_tiles, 'GDE_cell_'+str(cell_id).zfill(10)+'.hdf5'), "r")
file_found= True
except:
file_found= False
if file_found:
# Remove this ID from the list of GDE tiles:
list_GDE_tiles.remove(str(cell_id))
# Check completeness value:
if fle.attrs['Completeness']!=visual_cells['completeness'].values[i]:
print('PROBLEM!!!')
# Check totals of GDE buildings, LeftOver buildings and total buildings:
for occupancy in occupancy_cases:
# Check numbers of buildings (GDE buildings compared as integers because they are):
if round(visual_cells['number_'+occupancy+'_OBM_with_classes'].values[i], 0)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Num_Bdgs_with_Class'], 0):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of GDE buildings with classes do not match.')
if round(visual_cells['number_'+occupancy+'_OBM'].values[i], 0)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Num_Bdgs_Total'], 0):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of total GDE buildings do not match.')
if round(visual_cells['number_'+occupancy+'_LeftOver'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Num_Bdgs_Total'], decimal_places_gral):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of LeftOver buildings do not match.')
if round(visual_cells['number_'+occupancy+'_Total'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total'].attrs['Num_Bdgs_Total'], decimal_places_gral):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of total (GDE+LeftOver) buildings do not match.')
if occupancy!='Oth':
# Check cost:
if round(visual_cells['structural_'+occupancy+'_OBM_with_classes'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Cost'], decimal_places_costs):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of GDE buildings with classes do not match.')
if round(visual_cells['structural_'+occupancy+'_LeftOver'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Cost_Total'], decimal_places_costs):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of LeftOver buildings do not match.')
if round(visual_cells['structural_'+occupancy+'_Total'].values[i], decimal_places_costs)!=round(fle[occupancy]['Total'].attrs['Cost_Total'], decimal_places_costs):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': costs of total (GDE+LeftOver) buildings do not match.')
# Check people:
if round(visual_cells['night_'+occupancy+'_OBM_with_classes'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['GDE_bdgs'].attrs['Weighted_Av_Ppl'], decimal_places_gral):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of people of GDE buildings with classes do not match.')
if round(visual_cells['night_'+occupancy+'_LeftOver'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total']['LeftOver'].attrs['Num_Ppl_Total'], decimal_places_gral):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': number of people of LeftOver buildings do not match.')
if round(visual_cells['night_'+occupancy+'_Total'].values[i], decimal_places_gral)!=round(fle[occupancy]['Total'].attrs['Num_Ppl_Total'], decimal_places_gral):
problems_found.append('Cell '+str(cell_id)+', '+occupancy+': numbers of people of total (GDE+LeftOver) buildings do not match.')
fle.close()
else:
print('\n')
print(' ERROR: GDE tile HDF5 file for cell ID '+str(cell_id)+' not found!! Moving on to next cell...')
problems_found.append('Cell '+str(cell_id)+': GDE tile HDF5 file NOT FOUND.')
if len(list_GDE_tiles)>0:
print('\n')
print(' WARNING: There are GDE tile HDF5 files in the provided path that are not in the CSV file!!')
problems_found.append('WARNING: There are GDE tile HDF5 files in the provided path that are not in the CSV file!! These are: '+', '.join(list_GDE_tiles))
print('\n')
out_txt= open(os.path.join(out_path, 'GDE_check_tiles_vs_visual_CSVs_'+now_str+'.txt'), 'w')
out_txt.write('Path to the GDE tiles analysed:\n')
out_txt.write(path_GDE_tiles+'\n')
out_txt.write('\n')
out_txt.write('Path to the by-cell visual output CSV file analysed:\n')
out_txt.write(path_visual_csv+'\n')
out_txt.write('\n')
out_txt.write('Number of decimal places used for the comparison (number of buildings and people):\n')
out_txt.write(str(decimal_places_gral)+'\n')
out_txt.write('\n')
out_txt.write('Number of decimal places used for the comparison (costs):\n')
out_txt.write(str(decimal_places_costs)+'\n')
out_txt.write('\n')
out_txt.write('================================================================\n')
out_txt.write('\n')
if len(problems_found)==0:
out_txt.write('No discrepancies found between the two kinds of output. All OK according to this check!\n')
print('No discrepancies found between the two kinds of output. All OK according to this check!')
else:
print('DISCREPANCIES FOUND BETWEEN THE TWO KINDS OF OUTPUT!! ERROR!!')
print(' Find the errors found listed in '+os.path.join(out_path, 'GDE_check_tiles_vs_visual_CSVs_'+now_str+'.txt (being stored now...)'))
out_txt.write('The following discrepancies were found between the two kinds of output. ERRORS FOUND!!:\n')
for error_message in problems_found:
out_txt.write(error_message+'\n')
out_txt.close()
print('\n')
print('Done!')
if __name__=='__main__':
# This code needs to be run from the command line as python3 namefile.py configfile.ini
config_filename= sys.argv[1] # sys.argv retrieves all the commands entered in the command line; position [0] is this code, position [1] will be the config file name
section_names_to_validate= ['File Paths', os.path.basename(__file__).split('.')[0]]
config_dict= gdet_conf.read_config_parameters(os.path.join(os.getcwd(), config_filename), section_names_to_validate)
run_this_file(config_dict)
\ No newline at end of file
......@@ -263,5 +263,14 @@ sera_disaggregation_to_consider = sat_27f
# Occupancy cases to consider:
occupancy_cases = Res, Com, Ind
[GDE_check_tiles_vs_visual_CSVs]
# Path to the GDE tiles HDF5 files to consider (directory):
path_GDE_tiles = WRITE_PATH
# Path to the by-cell visual output CSV file to consider (file):
path_visual_csv = WRITE_PATH_INCLUDING_FILENAME
# Occupancy cases to consider:
occupancy_cases = Res, Com, Ind, Oth
# Decimal places tolerance (costs tend to have discrepancies, use less decimal places):
decimal_places_gral = 4
decimal_places_costs = 0
......@@ -127,7 +127,7 @@ def run_this_file(config_dict):
bdg_classes_hdf5_filename= os.path.join(out_path,'Europe_SERA_bdg_classes_'+case+'.hdf5')
else:
bdg_classes_hdf5_filename= ''
obm_dict, obm_country_adminIDs, obm_without_classes_num_bdgs_per_adm_unit, obm_country_adminIDs_levels, error_message= gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), case, admin_unit_type= 'all', disagg_method= sera_disaggregation_to_consider)
obm_dict, obm_country_adminIDs, obm_without_classes_num_bdgs_per_adm_unit, obm_country_adminIDs_levels, error_message= gdet_obmh.retrieve_OBM_buildings_in_cell_from_hdf5(obm_hdf5_filename, bdg_classes_hdf5_filename, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', case, admin_unit_type= 'all', disagg_method= sera_disaggregation_to_consider)
if error_message!='':
print(error_message)
log.append(error_message)
......@@ -257,8 +257,12 @@ def run_this_file(config_dict):
cell_summary_dict= gdet_gral.fill_in_cell_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(cell_summary_dict, case, 'Total', total_num_bdgs, total_num_dwells, total_num_ppl, total_cost)
admin_summary_dict= gdet_gral.fill_in_admin_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(admin_summary_dict, case, 'LeftOver', sera_country_adminIDs, sera_country_adminIDs_levels, leftover_num_bdgs_per_class_and_adm_unit, leftover_num_dwells_per_class_and_adm_unit, leftover_num_ppl_per_class_and_adm_unit, leftover_cost_per_class_and_adm_unit, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), input_kind='per_class_and_adm_unit')
admin_summary_dict= gdet_gral.fill_in_admin_summary_dictionary_with_SERA_LeftOver_or_Total_bdgs_dwells_ppl(admin_summary_dict, case, 'Total', total_list_country_adminIDs, total_list_country_adminIDs_levels, total_num_bdgs_all_per_adm_unit, total_num_dwells_per_adm_unit, total_num_ppl_per_adm_unit, total_cost_per_adm_unit, sera_shp_path, os.path.join(out_path, 'Europe_SERA_metadata_v_'+version_of_SERA_metadata+'.hdf5'), input_kind='per_adm_unit')
gdet_gral.write_LeftOver_to_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', case, sera_disaggregation_to_consider, sera_classes, sera_country_adminIDs, leftover_num_bdgs_per_class_and_adm_unit, leftover_cost_per_class_and_adm_unit, leftover_num_ppl_per_class_and_adm_unit, 'USD')
gdet_gral.wrap_up_totals_in_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', sera_disaggregation_to_consider)
# Generate visual output:
gdet_gral.write_visual_output_GDE_by_cell(grid_cell_id, cell_summary_dict, os.path.join(out_path, 'export_grids_QGIS', 'GDE_visual_'+out_name+'.csv'))
# Store completeness value of cell in GDE tile:
gdet_gral.write_completness_to_GDE_tiles(out_path, 'GDE_cell_'+str(grid_cell_id).zfill(10)+'.hdf5', sera_disaggregation_to_consider, compl_cells[i])
gdet_gral.write_visual_output_GDE_by_admin_unit(admin_summary_dict, os.path.join(out_path, 'export_grids_QGIS', 'GDE_visual_'+out_name+'_by_admin_units.csv'))
print('\n')
print('Done!')
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment