Commit ddc2f206 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Included the vectorization step and polygon cropping in porcessor.py

parent 88957904
Pipeline #19640 passed with stage
in 3 minutes and 42 seconds
......@@ -70,3 +70,11 @@ The `raster_files_index` have the following minimum structure
https://ghsl.jrc.ec.europa.eu/download.php
Size: ca. 12 Gb
GHS_BUILT_LDSMT_GLOBE_R2018A_3857_30_V2_0.shp (raster_files_index)
pixel_value_legend:
0 = no data
1 = water surface
2 = land not built-up in any epoch
3 = built in epoch 2000-2014
4 = built in epoch 1990-2000
5 = built in epoch 1975-1990
6 = built before 1975
......@@ -46,13 +46,17 @@ class DataSource:
self.pathname (str): Pathname where the dataset and explanatory dataframe are located.
self.raster_files_index (geopandas.geodataframe.GeoDataFrame):
GeoPandas dataframe with raster relative filepaths and respective geometries.
self.raster_files_index (geopandas.geodataframe.GeoDataFrame): GeoPandas dataframe
with raster relative filepaths and respective geometries.
self.built_pixel_values (list): List with pixel values that represent built areas.
Hints on pixel built_pixel_values available at docs/02_Input_datasets.md
"""
def __init__(self, crs, pathname, raster_files_index):
def __init__(self, crs, pathname, raster_files_index, built_pixel_values):
self.crs = crs
self.pathname = pathname
self.raster_files_index = geopandas.read_file(
os.path.join(pathname, raster_files_index)
)
self.built_pixel_values = built_pixel_values
......@@ -22,6 +22,8 @@ import os
import sys
from obmgapanalysis.datasource import DataSource
from obmgapanalysis.tile import Tile
from obmgapanalysis.processor import TileProcessor
# Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger()
......@@ -54,15 +56,42 @@ def main():
datasource = DataSource(
crs="epsg:3857",
pathname=OBMGAPANALYSIS_BASE_PATH,
raster_files_index="GHS_BUILT_LDSMT_GLOBE_R2018A_3857_30_V2_0.shp",
raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3],
)
logger.info(datasource.raster_files_index.head(5))
logger.info(type(datasource))
# create a tile with the same crs as the DataSource
tile = Tile("122100203023320202", datasource.crs)
tile = Tile("122100200320321022", datasource.crs)
logger.debug(str(tile.geometry))
logger.info(type(tile))
# Find raster files with information for the given tile
raster_files = TileProcessor.intersect_datasource_with_tile(
datasource=datasource, tile=tile
)
# Load the raster files and extract only the pixels in the cells
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(
raster_filepaths=raster_files, tile=tile
)
# Transform the pixels into a polygon
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
logger.info(
"The built area polygon with complete pixels is: {}".format(str(built_geometry))
)
clip_built_geometry = TileProcessor.clip_to_tile_extent(built_geometry, tile)
logger.info(
"The built area polygon cropped to the extent of tile {}: {}".format(
tile.quadkey, str(clip_built_geometry)
)
)
# Leave the program
sys.exit()
......
......@@ -22,6 +22,8 @@ import logging
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
from rasterio import features
import geopandas
import os
......@@ -32,7 +34,7 @@ logger = logging.getLogger(__name__)
class TileProcessor:
@staticmethod
def intersect_datasource_with_tile(datasource, tile):
"""Return a subset of the datasource.raster_files_index with the raster tiles
"""Returns a subset of the datasource.raster_files_index with the raster tiles
that intersect the given tile.geometry
Args:
......@@ -53,7 +55,7 @@ class TileProcessor:
)
>>> tile = Tile("122100203023320202", datasource.crs)
>>> intersecting_raster_tiles(datasource, tile)
>>> intersect_datasource_with_tile(datasource, tile)
Returns:
index_list (list): List of the filepaths of the raster files that
......@@ -70,7 +72,7 @@ class TileProcessor:
@staticmethod
def get_raster_pixels_in_tile(raster_filepaths, tile):
"""Return the subset of the raster and its affine transformation based
"""Returns the subset of the raster and its affine transformation based
on which pixels intersect with the cell's geometry
Args:
......@@ -79,7 +81,7 @@ class TileProcessor:
tile (tile.Tile): Tile object with quadkey, crs and geometry attributes
Returns:
pixel_values (list of numpy.ndarray): List of matrices with pixel values that
pixel_values (list of numpy.ndarray): List of arrays with pixel values that
intersect with the tile.geometry
pixel_georeferences (list of affine.Affine): List with affine transformation
matrices of pixel_values (allows geocoding)
......@@ -97,3 +99,63 @@ class TileProcessor:
pixel_georeferences.append(masked_band[1])
return pixel_values, pixel_georeferences
@staticmethod
def polygonize_array(pixel_values, pixel_georeferences, datasource):
"""Returns a Shapely geometry object with built area (multi)polygon for a tile
based on the key pixel_values for the given datasource
Args:
pixel_values (list of numpy.ndarray): List of arrays with pixel values that
intersect with the tile.geometry
pixel_georeferences (list of affine.Affine): List with affine transformation
matrices of pixel_values (allows geocoding)
datasource (datasource.DataSource): DataSource instance with
crs, pathname and raster_files_index attributes.
Returns:
built_area_geometry (shapely.geometry.multipolygon.MultiPolygon): Shapely
polygon or multipolygon
"""
built_pixel_values = datasource.built_pixel_values
results = []
# Run through the list of matrices and extract built up features
for i in range(len(pixel_values)):
features_in_matrix = features.shapes(
pixel_values[i], transform=pixel_georeferences[i]
)
for geometry, pixel_value in features_in_matrix:
if pixel_value in built_pixel_values:
result = {
"properties": {"pixel_value": pixel_value},
"geometry": geometry,
}
results.append(result)
# Completely dissolve all features into one geometry object
if results:
results_geodataframe = geopandas.GeoDataFrame.from_features(results)
built_area_geometry = results_geodataframe.unary_union
return built_area_geometry
@staticmethod
def clip_to_tile_extent(polygon, tile):
"""Returns a Shapely geometry object of a (multi)polygon based on the
intersection of the given geometry (built areas) with the tile.geometry
Args:
polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
or multipolygon
tile (tile.Tile): Tile object with quadkey, crs and geometry attributes
Returns:
polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
or multipolygon clipped to the shape of tile.geometry
"""
clipped_polygon = polygon.intersection(tile.geometry)
return clipped_polygon
......@@ -37,6 +37,7 @@ setup(
"rtree",
"geopandas",
"rasterio",
"psycopg2",
],
extras_require={
"tests": tests_require,
......
......@@ -28,7 +28,9 @@ def test_init():
crs="epsg:3857",
pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"],
raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3], # Built pixels in GHSL
)
assert datasource.crs == "epsg:3857"
assert isinstance(datasource.raster_files_index, geopandas.GeoDataFrame)
assert isinstance(datasource.built_pixel_values, list)
......@@ -29,6 +29,7 @@ datasource = DataSource(
crs="epsg:3857",
pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"],
raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3],
)
tile = Tile("122100200320321022", "epsg:3857")
......@@ -60,3 +61,53 @@ def test_get_raster_pixels_in_tile():
# Assert content of matrices
assert pixel_values[1].all() == sample_1.all()
assert pixel_georeferences[0][2] == 2549910.0
def test_poligonize_array():
query = TileProcessor.intersect_datasource_with_tile(datasource=datasource, tile=tile)
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(query, tile)
expected_geometry = (
"MULTIPOLYGON (((2549910 4629720, 2549940 4629720, "
"2549970 4629720, 2549970 4629660, 2549940 4629660, "
"2549910 4629660, 2549910 4629720)), ((2549970 4629780, "
"2550000 4629780, 2550090 4629780, 2550090 4629810, "
"2550120 4629810, 2550120 4629750, 2550090 4629750, "
"2550090 4629720, 2550120 4629720, 2550120 4629660, "
"2550090 4629660, 2550090 4629630, 2550060 4629630, "
"2550060 4629660, 2550030 4629660, 2550030 4629690, "
"2550000 4629690, 2550000 4629720, 2549970 4629720, "
"2549970 4629780)))"
)
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
assert str(built_geometry) == expected_geometry
def test_clip_to_tile_extent():
query = TileProcessor.intersect_datasource_with_tile(datasource=datasource, tile=tile)
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(query, tile)
expected_clipped_geometry = (
"MULTIPOLYGON (((2549939.26359348 4629720, 2549940 4629720, 2549970 4629720, "
"2549970 4629660, 2549940 4629660, 2549939.26359348 4629660, 2549939.26359348 "
"4629720)), ((2549970 4629780, 2550000 4629780, 2550090 4629780, "
"2550090 4629790.803233128, 2550092.13765005 4629790.803233128, "
"2550092.13765005 4629750, 2550090 4629750, 2550090 4629720, "
"2550092.13765005 4629720, 2550092.13765005 4629660, 2550090 4629660, "
"2550090 4629637.929176555, 2550060 4629637.929176555, 2550060 4629660, "
"2550030 4629660, 2550030 4629690, 2550000 4629690, 2550000 4629720, "
"2549970 4629720, 2549970 4629780)))"
)
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
clipped_built_geometry = TileProcessor.clip_to_tile_extent(built_geometry, tile)
assert str(clipped_built_geometry) == expected_clipped_geometry
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment