Commit 6830f645 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Included database with basic data retrieval

parent ddc2f206
Pipeline #19839 passed with stage
in 3 minutes and 57 seconds
......@@ -12,3 +12,4 @@ build
dist
env
.idea
config.yml
# obmgapanalysis
Automated completeness assessment based on the relationship op the dynamic states of OpenBuildingMap and static databases derived from Remote sensing data such as Global Human Settlement Layer (GHSL) and World Settlement Footprint 2015 (WSF-2015).
Automated completeness assessment based on the relationship op the dynamic
states of OpenBuildingMap and static databases derived from remote sensing
data such as Global Human Settlement Layer (GHSL) and World Settlement Footprint
2015 (WSF-2015).
## Installing obmgapanalysis
### Software dependencies
* Python >= 3.7
* GDAL
* Make sure to have `proj-bin`, `libgeos-dev`, `gdal-bin` and `libgdal-dev` (Debian/Ubuntu specific)
### Python libraries
* `pyproj>=3.0.0.post1`
* `shapely`
* `babelgrid`
* `fiona`
* `rtree`
* `geopandas`
* `rasterio`
* `psycopg2`
* `pyyaml`
### Install
```bash
git clone https://git.gfz-potsdam.de/dynamicexposure/openbuildingmap/obmgapanalysis
cd obmgapanalysis
pip3 install .
```
## Running obmgapanalysis
copy the config-example.yml to your working directory as config.yml and modify
the variables regarding the data source, database credentials and tiles for input quadkeys.
```bash
cd /your/working/directory
obmgapanalysis
```
## Copyright and copyleft
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......
database:
host: your_host.dir
dbname: gis
port: 5433
username: postgres
password: secret_pass
roads_table:
tablename: planet_osm_roads
geometry_field: way
datasource:
crs: epsg:3857
pathname: /your/path/to/datasource
raster_files_index: GHS_TEST_INDEX.shp
built_pixel_values: [6, 5, 4, 3]
tiles:
use_txt: False
tiles_list: ['122100200320321020', '122100200320321021', '122100200320321022', '122100200320321023']
txt_file: tiles.txt
#!/usr/bin/env python3
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.
import logging
import psycopg2
import pyproj
import geopandas
from shapely.ops import transform
# Initialize log
logger = logging.getLogger(__name__)
class Database:
"""A Database object includes the necessary credentials to connect to a given postGIS
database and makes use of different methods to request information on buildings,
roads and uploads tile-based entries.
Input dataset:
The source of information are sets of raster data and a raster index file that
includes filepaths and raster footprints. These sources can either be:
Args:
host (str): Postgres Database host direction.
dbname (str): Postgres database name.
port (str): Port where the Postgres database can be found.
username (str): User to connect to the Postgres database.
password (str or getpass.getpass): Password for username argument
Attributes:
self.host (str): Postgres Database host direction.
self.dbname (str): Postgres database name.
self.port (str): Port where the Postgres database can be found.
self.username (str): User to connect to the Postgres database.
self.password (str or getpass.getpass): Password for username argument
"""
def __init__(self, host, dbname, port, username, password, **kwargs):
self.host = host
self.dbname = dbname
self.port = port
self.username = username
self.password = password
# Initialize other attributes
self.connection = None
self.cursor = None
def create_connection_and_cursor(self):
"""Create a database connection and a cursor with psycopg2 with the
given credentials and include the connection and cursor as new attributes.
Attributes:
self.connection (psycopg2.extensions.connection):
Connection object to the database.
self.cursor (psycopg2.extensions.cursor):
Cursor object to run commands on the database.
Returns:
None
"""
connection_string = "host={} dbname={} user={} password={} port={}".format(
self.host, self.dbname, self.username, self.password, self.port
)
connection = psycopg2.connect(connection_string)
setattr(self, "connection", connection)
setattr(self, "cursor", connection.cursor())
def get_crs_from_geometry_field(
self, tablename, schema="public", geometry_field="geometry"
):
"""Returns the epsg/srid number from the specified table in the database
Args:
schema (str): Database schema where the database is located. Default = "public"
tablename (str): Table name within database for searching (e.g. "obm_buildings")
geometry_field (str): Name of the column with geometries. Default = "geometry"
Returns:
crs_number (str): Number of the epsg/srid code for the specified table.
"""
crs_search_query = "SELECT Find_SRID('{}','{}','{}')".format(
schema, tablename, geometry_field
)
self.cursor.execute(crs_search_query)
crs_number = self.cursor.fetchone()[0]
return crs_number
def get_features_in_tile(
self, tile, tablename, crs_number, schema="public", geometry_field="geometry"
):
"""Returns a GeoPandas dataframe with all features that intersect the given tile.
The features are extracted from a specified table with Database.set_table().
Args:
tile (tile.Tile): Tile object with quadkey, crs and geometry attributes.
tablename (str): Table name within database for searching (e.g. "obm_buildings")
crs_number (str): Number of the epsg/srid code for the specified table.
schema (str): Database schema where the database is located. Default = "public"
geometry_field (str): Name of the column with geometries. Default = "geometry"
Returns:
output_features (geopandas.geodataframe.GeoDataFrame): GeoPandas dataframe
with all features and attributes that intersect the given quadtile.
"""
geometry = tile.geometry
print(str(geometry))
print(tile.crs)
print("epsg:{}".format(crs_number))
if tile.crs != "epsg:{}".format(crs_number):
project = pyproj.Transformer.from_proj(
pyproj.Proj(tile.crs),
pyproj.Proj("epsg:{}".format(crs_number)),
always_xy=True,
)
geometry = transform(project.transform, geometry)
print(str(geometry))
sql_query = (
"SELECT * "
"FROM {}.{} "
"WHERE ST_Intersects({},'SRID={};{}')".format(
schema, tablename, geometry_field, crs_number, geometry
)
)
output_features = geopandas.GeoDataFrame.from_postgis(
sql_query,
self.connection,
geom_col=geometry_field,
crs="epsg:{}".format(crs_number),
)
return output_features
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -18,10 +18,10 @@
import logging
import os
import geopandas
# Initialize log
logger = logging.getLogger(__name__)
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -20,9 +20,12 @@
import logging
import os
import sys
import yaml
from obmgapanalysis.datasource import DataSource
from obmgapanalysis.tile import Tile
from obmgapanalysis.processor import TileProcessor
from obmgapanalysis.database import Database
# Add a logger printing error, warning, info and debug messages to the screen
......@@ -30,39 +33,52 @@ logger = logging.getLogger()
logger.setLevel(logging.DEBUG)
logger.addHandler(logging.StreamHandler(sys.stdout))
with open("config.yml", "r") as ymlfile:
config = yaml.load(ymlfile, Loader=yaml.FullLoader)
def main():
# Check for OBMGAPANALYSIS_BASE_PATH environment variable
# and create it if necessary.
db_config = config["database"]
datasource_config = config["datasource"]
tiles_config = config["tiles"]
if "OBMGAPANALYSIS_BASE_PATH" not in os.environ:
logging.debug(
"OBMGAPANALYSIS_BASE_PATH is not a system variable \n "
"Create variable OBMGAPANALYSIS_BASE_PATH:"
)
os.environ["OBMGAPANALYSIS_BASE_PATH"] = str(input())
pass
else:
logger.info("OBMGAPANALYSIS_BASE_PATH exists")
# Read the OBMGAPANALYSIS_BASE_PATH environment variable
OBMGAPANALYSIS_BASE_PATH = os.environ["OBMGAPANALYSIS_BASE_PATH"]
def main():
# Example logging output
logger.info("obmgapanalysis has started")
# If valid instantiate the GHSL dataset as a DataSource class
if "GHSL":
datasource = DataSource(
crs="epsg:3857",
pathname=OBMGAPANALYSIS_BASE_PATH,
raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3],
datasource = DataSource(**datasource_config)
logger.info(
"DataSource raster files are registered in {}".format(
os.path.join(datasource.pathname, datasource.raster_files_index)
)
)
# Connect to the OBM database
roads_database = Database(**db_config)
roads_database.create_connection_and_cursor()
roads_table_crs_number = roads_database.get_crs_from_geometry_field(
**db_config["roads_table"]
)
logger.info(datasource.raster_files_index.head(5))
logger.info(type(datasource))
# create a tile with the same crs as the DataSource
tile = Tile("122100200320321022", datasource.crs)
logger.info(
"Connection established to {} in {}".format(db_config["dbname"], db_config["host"])
)
# List all the
if tiles_config["use_txt"]:
with open(tiles_config["txt_file"]) as fid:
tiles_list = [line.strip() for line in fid]
else:
tiles_list = tiles_config["tiles_list"]
logger.info("{} tiles will be processed".format(len(tiles_list)))
# Loop through each quadkey and retrieve information
for quadkey in tiles_list:
# Create a tile with the same crs as the DataSource
tile = Tile(quadkey, datasource.crs)
logger.debug(str(tile.geometry))
logger.info(type(tile))
......@@ -92,6 +108,18 @@ def main():
)
)
roads_in_tile = roads_database.get_features_in_tile(
tile=tile, crs_number=roads_table_crs_number, **db_config["roads_table"]
)
logger.info(
"{} road features were found for tile {}".format(
len(roads_in_tile.index), tile.quadkey
)
)
roads_database.connection.close()
# Leave the program
sys.exit()
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -18,15 +18,14 @@
import logging
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from rasterio import features
import geopandas
import os
# Initialize log
logger = logging.getLogger(__name__)
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -18,11 +18,11 @@
import logging
import pyproj
from shapely.ops import transform
from babelgrid import Babel
# Initialize log
logger = logging.getLogger(__name__)
......
......@@ -38,6 +38,7 @@ setup(
"geopandas",
"rasterio",
"psycopg2",
"pyyaml",
],
extras_require={
"tests": tests_require,
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -19,6 +19,7 @@
import logging
logger = logging.getLogger()
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......@@ -24,6 +24,7 @@ import os
import numpy
import affine
# Create an instance for a datasource and a tile
datasource = DataSource(
crs="epsg:3857",
......
#!/usr/bin/env python3
# Copyright (c) 2021:
# Copyright (C) 2021:
# Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment