tileprocessor.py 13.9 KB
Newer Older
1
2
#!/usr/bin/env python3

3
# Copyright (C) 2021:
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
#   Helmholtz-Zentrum Potsdam Deutsches GeoForschungsZentrum GFZ
#
# This program is free software: you can redistribute it and/or modify it
# under the terms of the GNU Affero General Public License as published by
# the Free Software Foundation, either version 3 of the License, or (at
# your option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU Affero
# General Public License for more details.
#
# You should have received a copy of the GNU Affero General Public License
# along with this program. If not, see http://www.gnu.org/licenses/.


import logging
21
22
23
import os
from datetime import date

24
25
26
import rasterio
from rasterio.mask import mask
from shapely.geometry import mapping
27
from rasterio import features
28
29
30
import pyproj
from shapely.ops import transform
from functools import partial
31
import geopandas
32
from obmgapanalysis.tile import Tile
33

34
35
36
37
38
39
40
# Initialize log
logger = logging.getLogger(__name__)


class TileProcessor:
    @staticmethod
    def intersect_datasource_with_tile(datasource, tile):
41
        """Returns a subset of the datasource.raster_files_index with the raster tiles
42
43
44
        that intersect the given tile.geometry

        Args:
45
46
            datasource (datasource.DataSource): DataSource instance with crs,
                    pathname and raster_files_index attributes.
47
48
49
50
51

            tile (tile.Tile): Tile object with quadkey, crs and geometry attributes

        Returns:
            index_list (list): List of the filepaths of the raster files that
52
                    intersect with the Tile.geometry.
53
        """
54
55

        raster_files_index = datasource.raster_files_index.copy()
56
        raster_files_index = raster_files_index[raster_files_index.intersects(tile.geometry)]
57
        raster_files_index.loc[:, "location"] = raster_files_index["location"].apply(
58
59
60
61
62
63
64
            lambda x: os.path.join(datasource.pathname, x)
        )
        index_list = raster_files_index.location.tolist()
        return index_list

    @staticmethod
    def get_raster_pixels_in_tile(raster_filepaths, tile):
65
        """Returns the subset of the raster and its affine transformation based
66
67
68
69
70
71
72
73
        on which pixels intersect with the cell's geometry

        Args:
            raster_filepaths (list): List of filepaths (may be only one) to raster files

            tile (tile.Tile): Tile object with quadkey, crs and geometry attributes

        Returns:
74
            pixel_values (list of numpy.ndarray): List of arrays with pixel values that
75
                    intersect with the tile.geometry
76
            pixel_georeferences (list of affine.Affine): List with affine transformation
77
                    matrices of pixel_values (allows geocoding)
78
79
80
81
82
83
84
85
86
87
88
89
90
91

        """
        pixel_values = []
        pixel_georeferences = []

        for i, filepath in enumerate(raster_filepaths):

            with rasterio.open(filepath) as src:
                feature = [mapping(tile.geometry)]
                masked_band = mask(src, feature, crop=True, all_touched=True)
                pixel_values.append(masked_band[0].reshape(-1, masked_band[0].shape[-1]))
                pixel_georeferences.append(masked_band[1])

        return pixel_values, pixel_georeferences
92
93
94
95
96
97
98
99

    @staticmethod
    def polygonize_array(pixel_values, pixel_georeferences, datasource):
        """Returns a Shapely geometry object with built area (multi)polygon for a tile
        based on the key pixel_values for the given datasource

        Args:
            pixel_values (list of numpy.ndarray): List of arrays with pixel values that
100
                    intersect with the tile.geometry
101
102

            pixel_georeferences (list of affine.Affine): List with affine transformation
103
                    matrices of pixel_values (allows geocoding)
104
105

            datasource (datasource.DataSource): DataSource instance with
106
                    crs, pathname and raster_files_index attributes.
107
108
109

        Returns:
            built_area_geometry (shapely.geometry.multipolygon.MultiPolygon): Shapely
110
                            polygon or multipolygon.
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
        """
        built_pixel_values = datasource.built_pixel_values
        results = []
        # Run through the list of matrices and extract built up features
        for i in range(len(pixel_values)):
            features_in_matrix = features.shapes(
                pixel_values[i], transform=pixel_georeferences[i]
            )
            for geometry, pixel_value in features_in_matrix:
                if pixel_value in built_pixel_values:
                    result = {
                        "properties": {"pixel_value": pixel_value},
                        "geometry": geometry,
                    }
                    results.append(result)

        # Completely dissolve all features into one geometry object
        if results:
            results_geodataframe = geopandas.GeoDataFrame.from_features(results)
            built_area_geometry = results_geodataframe.unary_union
131
            return built_area_geometry.buffer(0)
132
133
134
135

    @staticmethod
    def clip_to_tile_extent(polygon, tile):
        """Returns a Shapely geometry object of a (multi)polygon based on the
136
        intersection of the given geometry (built areas) with the tile.geometry.
137
138
139

        Args:
            polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
140
                    or multipolygon.
141

142
            tile (tile.Tile): Tile object with quadkey, crs and geometry attributes.
143
144
145

        Returns:
            polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely polygon
146
                    or multipolygon clipped to the shape of tile.geometry.
147
148
149
150
151
        """

        clipped_polygon = polygon.intersection(tile.geometry)

        return clipped_polygon
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199

    @staticmethod
    def reproject_polygon(input_polygon, input_crs, target_crs):
        """
        Returns a (multi)polygon object transformed from input_crs to target_crs. if
        "aea" is specified as target crs, returns the Albers equal area transformed
        polygon.

        Args:
            input_polygon (shapely.geometry.polygon.Polygon): Polygon object to reproject.

            input_crs (str): Initial crs of the input_polygon (e.g. "epsg:4326").

            target_crs (str): Target crs for the final polygon object. If "aea" is
                    specified, process the complete Albers equal area transformation.

        Returns:
            geometry (shapely.geometry.polygon.Polygon): Polygon object reprojected to
                    target_crs.
        """
        if target_crs == "aea":
            project = pyproj.Transformer.from_proj(
                pyproj.Proj(input_crs),
                pyproj.Proj("epsg:4326"),
                always_xy=True,
            )
            input_polygon = transform(project.transform, input_polygon)

            project = partial(
                pyproj.transform,
                pyproj.Proj("epsg:4326"),
                pyproj.Proj(
                    proj="aea", lat_1=input_polygon.bounds[1], lat_2=input_polygon.bounds[3]
                ),
            )
            geometry = transform(project, input_polygon)

        else:
            project = pyproj.Transformer.from_proj(
                pyproj.Proj(input_crs),
                pyproj.Proj(target_crs),
                always_xy=True,
            )
            geometry = transform(project.transform, input_polygon)

        return geometry

    @staticmethod
200
    def process_dataframe_with_tile(input_dataframe, tile, buffer_magnitude=0.0):
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
        """
        Returns a (multi)polygon processed with a tile object and, if desired, buffered
        by a certain magnitude.

        Args:
            input_dataframe (geopandas.geodataframe.GeoDataFrame): GeoPandas dataframe to be
                    processed. May contain buildings, roads or other objects.

            tile (tile.Tile): Tile object with quadkey, crs and geometry attributes.

            buffer_magnitude (float): Numeric magnitude for the polygon buffer (units are
                    equal to the coordinate system units). Default = False
        Returns:
            geometry (shapely.geometry.polygon.Polygon): Polygon object resulting from the
                    geometric operations.
        """
217
218
219

        input_dataframe = input_dataframe.to_crs(tile.crs)
        input_dataframe = geopandas.clip(input_dataframe, tile.geometry)
220
        geometry = input_dataframe.unary_union
221
        if buffer_magnitude > 0.0:
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
            geometry = geometry.buffer(buffer_magnitude)
        geometry = TileProcessor.clip_to_tile_extent(geometry, tile)

        return geometry

    @staticmethod
    def polygon_difference(reference_polygon, secondary_polygon):
        """
        Returns a (multi)polygon object based on the geometric difference between two
        polygons.

        Args:
            reference_polygon (shapely.geometry.polygon.Polygon): Geometry of
                    reference polygon.
            secondary_polygon (shapely.geometry.polygon.Polygon): Geometry to
                    compare to the reference geometry
        Returns:
            output_polygon (shapely.geometry.polygon.Polygon): Polygon object
                    representing the difference between the two input polygons.
        """

        output_polygon = reference_polygon.difference(secondary_polygon)

        return output_polygon

    @staticmethod
    def albers_area_calculation(input_polygon, polygon_crs):
        """Return the area of a polygon in the Albers equal area projection.

        Args:
            input_polygon (shapely.geometry.polygon.Polygon): Input polygon object for
                    area calculation.

            polygon_crs (str): Initial crs associated to input_polygon.

        Returns:
            float: Area measured in squared meters for the input_polygon projected to the
                    Albers equal area system.
        """

        polygon = TileProcessor.reproject_polygon(input_polygon, polygon_crs, "aea")

        return polygon.area
265
266
267
268
269
270
271

    @staticmethod
    def build_dictionary(tile, datasource, built_polygon):
        """Returns a dictionary with the built-up area related attributes
        associated to the Tile and a given DataSource.
        Contains:
            quadkey (str): Tile quadkey
272
            source_id (int): Integer associated to a predefined method
273
            built_area (str): Polygon string projected to WGS84 coordinates.
274
            built_area_size (float): Area measured in squared meters.
275
276
277
278
279
280
            last_update (str): Date when the pickle was generated.

        Args:
            tile (tile.Tile): Tile object with quadkey, crs and geometry attributes.

            datasource (datasource.DataSource): DataSource instance with crs,
281
                    pathname, source_id and raster_files_index attributes.
282
283
284
285
286
287
288
289
290
291
292
293
294

            built_polygon (shapely.geometry.multipolygon.MultiPolygon): Shapely
                    polygon or multipolygon of the built area.

        Returns:
            results (dictionary): dictionary with buil-up area information.
        """

        if built_polygon.is_empty:
            logging.info("No built area found in {}".format(tile.quadkey))
            return

        results = {
295
            "quadkey": tile.quadkey,
296
            "source_id": datasource.source_id,
297
            "built_area": TileProcessor.reproject_polygon(built_polygon, tile.crs, "epsg:4326"),
298
            "built_area_size": TileProcessor.albers_area_calculation(built_polygon, tile.crs),
299
300
            "last_update": str(date.today()),
        }
301
302
        if not results["source_id"]:
            del results["source_id"]
303
304
305
306

        return results

    @staticmethod
307
308
309
    def get_build_up_area(
        quadkey, datasource, database, database_crs_number, table_config, buffer_magnitude
    ):
310
311
312
313
314
315
316
        """Run the complete processing of a quadkey and returns a dictionary
        created with TileProcessor.build_dictionary.

        Args:
            quadkey (str): Quadkey code associated with a Bing quadtree tile.

            datasource (datasource.DataSource): DataSource instance with crs,
317
                    pathname, source_id and raster_files_index attributes.
318
319
320
321
322
323
324
325

            database (database.Database): Database instance with credentials
                    and connection ready to perform data queries.

            database_crs_number (str): SRID number of the target table.

            table_config (dict): Dictionary with table name, schema and geometry_field.
                    This is part of the config.yml file.
326
327
328

            buffer_magnitude (float): Numeric magnitude for the polygon buffer (units are
                    equal to the coordinate system units)
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
        Returns:
            results (dictionary): Dictionary with built-up area information.
        """

        tile = Tile(quadkey, datasource.crs)
        # Find raster files and extract relevant pixels
        raster_files = TileProcessor.intersect_datasource_with_tile(
            datasource=datasource, tile=tile
        )
        pixel_values, pixel_georeferences = TileProcessor.get_raster_pixels_in_tile(
            raster_filepaths=raster_files, tile=tile
        )
        # Transform the pixels into a polygon
        built_geometry = TileProcessor.polygonize_array(
            pixel_values, pixel_georeferences, datasource
        )
        if built_geometry is not None:
            clip_built_geometry = TileProcessor.clip_to_tile_extent(built_geometry, tile)

            roads_in_tile = database.get_features_in_tile(
                tile=tile, crs_number=database_crs_number, **table_config
            )
            roads_processed = TileProcessor.process_dataframe_with_tile(
352
                roads_in_tile, tile=tile, buffer_magnitude=buffer_magnitude
353
354
355
356
357
358
            )
            refined_built_area = TileProcessor.polygon_difference(
                clip_built_geometry, roads_processed
            )
            result = TileProcessor.build_dictionary(tile, datasource, refined_built_area)
            return result