Commit ddc2f206 authored by Nicolas Garcia Ospina's avatar Nicolas Garcia Ospina
Browse files

Included the vectorization step and polygon cropping in porcessor.py

parent 88957904
Pipeline #19640 passed with stage
in 3 minutes and 42 seconds
...@@ -70,3 +70,11 @@ The `raster_files_index` have the following minimum structure ...@@ -70,3 +70,11 @@ The `raster_files_index` have the following minimum structure
https://ghsl.jrc.ec.europa.eu/download.php https://ghsl.jrc.ec.europa.eu/download.php
Size: ca. 12 Gb Size: ca. 12 Gb
GHS_BUILT_LDSMT_GLOBE_R2018A_3857_30_V2_0.shp (raster_files_index) GHS_BUILT_LDSMT_GLOBE_R2018A_3857_30_V2_0.shp (raster_files_index)
pixel_value_legend:
0 = no data
1 = water surface
2 = land not built-up in any epoch
3 = built in epoch 2000-2014
4 = built in epoch 1990-2000
5 = built in epoch 1975-1990
6 = built before 1975
...@@ -46,13 +46,17 @@ class DataSource: ...@@ -46,13 +46,17 @@ class DataSource:
self.pathname (str): Pathname where the dataset and explanatory dataframe are located. self.pathname (str): Pathname where the dataset and explanatory dataframe are located.
self.raster_files_index (geopandas.geodataframe.GeoDataFrame): self.raster_files_index (geopandas.geodataframe.GeoDataFrame): GeoPandas dataframe
GeoPandas dataframe with raster relative filepaths and respective geometries. with raster relative filepaths and respective geometries.
self.built_pixel_values (list): List with pixel values that represent built areas.
Hints on pixel built_pixel_values available at docs/02_Input_datasets.md
""" """
def __init__(self, crs, pathname, raster_files_index): def __init__(self, crs, pathname, raster_files_index, built_pixel_values):
self.crs = crs self.crs = crs
self.pathname = pathname self.pathname = pathname
self.raster_files_index = geopandas.read_file( self.raster_files_index = geopandas.read_file(
os.path.join(pathname, raster_files_index) os.path.join(pathname, raster_files_index)
) )
self.built_pixel_values = built_pixel_values
...@@ -22,6 +22,8 @@ import os ...@@ -22,6 +22,8 @@ import os
import sys import sys
from obmgapanalysis.datasource import DataSource from obmgapanalysis.datasource import DataSource
from obmgapanalysis.tile import Tile from obmgapanalysis.tile import Tile
from obmgapanalysis.processor import TileProcessor
# Add a logger printing error, warning, info and debug messages to the screen # Add a logger printing error, warning, info and debug messages to the screen
logger = logging.getLogger() logger = logging.getLogger()
...@@ -54,15 +56,42 @@ def main(): ...@@ -54,15 +56,42 @@ def main():
datasource = DataSource( datasource = DataSource(
crs="epsg:3857", crs="epsg:3857",
pathname=OBMGAPANALYSIS_BASE_PATH, pathname=OBMGAPANALYSIS_BASE_PATH,
raster_files_index="GHS_BUILT_LDSMT_GLOBE_R2018A_3857_30_V2_0.shp", raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3],
) )
logger.info(datasource.raster_files_index.head(5)) logger.info(datasource.raster_files_index.head(5))
logger.info(type(datasource)) logger.info(type(datasource))
# create a tile with the same crs as the DataSource # create a tile with the same crs as the DataSource
tile = Tile("122100203023320202", datasource.crs) tile = Tile("122100200320321022", datasource.crs)
logger.debug(str(tile.geometry)) logger.debug(str(tile.geometry))
logger.info(type(tile)) logger.info(type(tile))
# Find raster files with information for the given tile
raster_files = TileProcessor.intersect_datasource_with_tile(
datasource=datasource, tile=tile
)
# Load the raster files and extract only the pixels in the cells
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(
raster_filepaths=raster_files, tile=tile
)
# Transform the pixels into a polygon
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
logger.info(
"The built area polygon with complete pixels is: {}".format(str(built_geometry))
)
clip_built_geometry = TileProcessor.clip_to_tile_extent(built_geometry, tile)
logger.info(
"The built area polygon cropped to the extent of tile {}: {}".format(
tile.quadkey, str(clip_built_geometry)
)
)
# Leave the program # Leave the program
sys.exit() sys.exit()
......
...@@ -22,6 +22,8 @@ import logging ...@@ -22,6 +22,8 @@ import logging
import rasterio import rasterio
from rasterio.mask import mask from rasterio.mask import mask
from shapely.geometry import mapping from shapely.geometry import mapping
from rasterio import features
import geopandas
import os import os
...@@ -32,7 +34,7 @@ logger = logging.getLogger(__name__) ...@@ -32,7 +34,7 @@ logger = logging.getLogger(__name__)
class TileProcessor: class TileProcessor:
@staticmethod @staticmethod
def intersect_datasource_with_tile(datasource, tile): def intersect_datasource_with_tile(datasource, tile):
"""Return a subset of the datasource.raster_files_index with the raster tiles """Returns a subset of the datasource.raster_files_index with the raster tiles
that intersect the given tile.geometry that intersect the given tile.geometry
Args: Args:
...@@ -53,7 +55,7 @@ class TileProcessor: ...@@ -53,7 +55,7 @@ class TileProcessor:
) )
>>> tile = Tile("122100203023320202", datasource.crs) >>> tile = Tile("122100203023320202", datasource.crs)
>>> intersecting_raster_tiles(datasource, tile) >>> intersect_datasource_with_tile(datasource, tile)
Returns: Returns:
index_list (list): List of the filepaths of the raster files that index_list (list): List of the filepaths of the raster files that
...@@ -70,7 +72,7 @@ class TileProcessor: ...@@ -70,7 +72,7 @@ class TileProcessor:
@staticmethod @staticmethod
def get_raster_pixels_in_tile(raster_filepaths, tile): def get_raster_pixels_in_tile(raster_filepaths, tile):
"""Return the subset of the raster and its affine transformation based """Returns the subset of the raster and its affine transformation based
on which pixels intersect with the cell's geometry on which pixels intersect with the cell's geometry
Args: Args:
...@@ -79,7 +81,7 @@ class TileProcessor: ...@@ -79,7 +81,7 @@ class TileProcessor:
tile (tile.Tile): Tile object with quadkey, crs and geometry attributes tile (tile.Tile): Tile object with quadkey, crs and geometry attributes
Returns: Returns:
pixel_values (list of numpy.ndarray): List of matrices with pixel values that pixel_values (list of numpy.ndarray): List of arrays with pixel values that
intersect with the tile.geometry intersect with the tile.geometry
pixel_georeferences (list of affine.Affine): List with affine transformation pixel_georeferences (list of affine.Affine): List with affine transformation
matrices of pixel_values (allows geocoding) matrices of pixel_values (allows geocoding)
...@@ -97,3 +99,63 @@ class TileProcessor: ...@@ -97,3 +99,63 @@ class TileProcessor:
pixel_georeferences.append(masked_band[1]) pixel_georeferences.append(masked_band[1])
return pixel_values, pixel_georeferences return pixel_values, pixel_georeferences
@staticmethod
def polygonize_array(pixel_values, pixel_georeferences, datasource):
"""Returns a Shapely geometry object with built area (multi)polygon for a tile
based on the key pixel_values for the given datasource
Args:
pixel_values (list of numpy.ndarray): List of arrays with pixel values that
intersect with the tile.geometry
pixel_georeferences (list of affine.Affine): List with affine transformation
matrices of pixel_values (allows geocoding)
datasource (datasource.DataSource): DataSource instance with
crs, pathname and raster_files_index attributes.
Returns:
built_area_geometry (shapely.geometry.multipolygon.MultiPolygon): Shapely
polygon or multipolygon
"""
built_pixel_values = datasource.built_pixel_values
results = []
# Run through the list of matrices and extract built up features
for i in range(len(pixel_values)):
features_in_matrix = features.shapes(
pixel_values[i], transform=pixel_georeferences[i]
)
for geometry, pixel_value in features_in_matrix:
if pixel_value in built_pixel_values:
result = {
"properties": {"pixel_value": pixel_value},
"geometry": geometry,
}
results.append(result)
# Completely dissolve all features into one geometry object
if results:
results_geodataframe = geopandas.GeoDataFrame.from_features(results)
built_area_geometry = results_geodataframe.unary_union
return built_area_geometry
@staticmethod
def clip_to_tile_extent(polygon, tile):
"""Returns a Shapely geometry object of a (multi)polygon based on the
intersection of the given geometry (built areas) with the tile.geometry
Args:
polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
or multipolygon
tile (tile.Tile): Tile object with quadkey, crs and geometry attributes
Returns:
polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
or multipolygon clipped to the shape of tile.geometry
"""
clipped_polygon = polygon.intersection(tile.geometry)
return clipped_polygon
...@@ -37,6 +37,7 @@ setup( ...@@ -37,6 +37,7 @@ setup(
"rtree", "rtree",
"geopandas", "geopandas",
"rasterio", "rasterio",
"psycopg2",
], ],
extras_require={ extras_require={
"tests": tests_require, "tests": tests_require,
......
...@@ -28,7 +28,9 @@ def test_init(): ...@@ -28,7 +28,9 @@ def test_init():
crs="epsg:3857", crs="epsg:3857",
pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"], pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"],
raster_files_index="GHS_TEST_INDEX.shp", raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3], # Built pixels in GHSL
) )
assert datasource.crs == "epsg:3857" assert datasource.crs == "epsg:3857"
assert isinstance(datasource.raster_files_index, geopandas.GeoDataFrame) assert isinstance(datasource.raster_files_index, geopandas.GeoDataFrame)
assert isinstance(datasource.built_pixel_values, list)
...@@ -29,6 +29,7 @@ datasource = DataSource( ...@@ -29,6 +29,7 @@ datasource = DataSource(
crs="epsg:3857", crs="epsg:3857",
pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"], pathname=os.environ["OBMGAPANALYSIS_BASE_PATH"],
raster_files_index="GHS_TEST_INDEX.shp", raster_files_index="GHS_TEST_INDEX.shp",
built_pixel_values=[6, 5, 4, 3],
) )
tile = Tile("122100200320321022", "epsg:3857") tile = Tile("122100200320321022", "epsg:3857")
...@@ -60,3 +61,53 @@ def test_get_raster_pixels_in_tile(): ...@@ -60,3 +61,53 @@ def test_get_raster_pixels_in_tile():
# Assert content of matrices # Assert content of matrices
assert pixel_values[1].all() == sample_1.all() assert pixel_values[1].all() == sample_1.all()
assert pixel_georeferences[0][2] == 2549910.0 assert pixel_georeferences[0][2] == 2549910.0
def test_poligonize_array():
query = TileProcessor.intersect_datasource_with_tile(datasource=datasource, tile=tile)
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(query, tile)
expected_geometry = (
"MULTIPOLYGON (((2549910 4629720, 2549940 4629720, "
"2549970 4629720, 2549970 4629660, 2549940 4629660, "
"2549910 4629660, 2549910 4629720)), ((2549970 4629780, "
"2550000 4629780, 2550090 4629780, 2550090 4629810, "
"2550120 4629810, 2550120 4629750, 2550090 4629750, "
"2550090 4629720, 2550120 4629720, 2550120 4629660, "
"2550090 4629660, 2550090 4629630, 2550060 4629630, "
"2550060 4629660, 2550030 4629660, 2550030 4629690, "
"2550000 4629690, 2550000 4629720, 2549970 4629720, "
"2549970 4629780)))"
)
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
assert str(built_geometry) == expected_geometry
def test_clip_to_tile_extent():
query = TileProcessor.intersect_datasource_with_tile(datasource=datasource, tile=tile)
pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(query, tile)
expected_clipped_geometry = (
"MULTIPOLYGON (((2549939.26359348 4629720, 2549940 4629720, 2549970 4629720, "
"2549970 4629660, 2549940 4629660, 2549939.26359348 4629660, 2549939.26359348 "
"4629720)), ((2549970 4629780, 2550000 4629780, 2550090 4629780, "
"2550090 4629790.803233128, 2550092.13765005 4629790.803233128, "
"2550092.13765005 4629750, 2550090 4629750, 2550090 4629720, "
"2550092.13765005 4629720, 2550092.13765005 4629660, 2550090 4629660, "
"2550090 4629637.929176555, 2550060 4629637.929176555, 2550060 4629660, "
"2550030 4629660, 2550030 4629690, 2550000 4629690, 2550000 4629720, "
"2549970 4629720, 2549970 4629780)))"
)
built_geometry = TileProcessor.polygonize_array(
pixel_values, pixel_georeferences, datasource
)
clipped_built_geometry = TileProcessor.clip_to_tile_extent(built_geometry, tile)
assert str(clipped_built_geometry) == expected_clipped_geometry
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment