L1B_P.py 37.3 KB
Newer Older
Daniel Scheffler's avatar
Daniel Scheffler committed
1
# -*- coding: utf-8 -*-
2
3
4
5
6
7
"""
Level 1B Processor:

Detection of global/local geometric displacements.
"""

Daniel Scheffler's avatar
Daniel Scheffler committed
8

9
import collections
10
import os
11
import time
12
import warnings
13
from datetime import datetime, timedelta
14
15

import numpy as np
16
from geopandas import GeoDataFrame
17
from shapely.geometry import box
18
import pytz
19
from typing import Union  # noqa F401  # flake8 issue
Daniel Scheffler's avatar
Daniel Scheffler committed
20

21
from arosics import COREG, DESHIFTER
22
from geoarray import GeoArray
23
24
25
26
27
28
from py_tools_ds.geo.coord_grid import is_coord_grid_equal
from py_tools_ds.geo.coord_calc import corner_coord_to_minmax
from py_tools_ds.geo.coord_trafo import reproject_shapelyGeometry, transform_any_prj
from py_tools_ds.geo.projection import prj_equal, EPSG2WKT, WKT2EPSG
from py_tools_ds.geo.vector.topology import get_overlap_polygon

29
from ..options.config import GMS_config as CFG
30
from ..model.gms_object import GMS_object
31
32
33
34
from .L1A_P import L1A_object
from ..misc import database_tools as DB_T
from ..misc import helper_functions as HLP_F
from ..misc import path_generator as PG
35
from ..misc.logging import GMS_logger
36
from ..misc.spatial_index_mediator import SpatialIndexMediator
37
from ..misc.definition_dicts import get_GMS_sensorcode, get_outFillZeroSaturated
38

39
__author__ = 'Daniel Scheffler'
40
41


42
class Scene_finder(object):
43
44
    """Scene_finder class to query the postgreSQL database to find a suitable reference scene for co-registration."""

45
    def __init__(self, src_boundsLonLat, src_AcqDate, src_prj, src_footprint_poly, sceneID_excluded=None,
46
                 min_overlap=20, min_cloudcov=0, max_cloudcov=20, plusminus_days=30, plusminus_years=10, logger=None):
47
48
49
50
51
52
53
54
55
56
57
58
59
        """Initialize Scene_finder.

        :param src_boundsLonLat:
        :param src_AcqDate:
        :param src_prj:
        :param src_footprint_poly:
        :param sceneID_excluded:
        :param min_overlap:         minimum overlap of reference scene in percent
        :param min_cloudcov:        minimum cloud cover of reference scene in percent
        :param max_cloudcov:        maximum cloud cover of reference scene in percent
        :param plusminus_days:      maximum time interval between target and reference scene in days
        :param plusminus_years:     maximum time interval between target and reference scene in years
        """
60
61
62
        self.boundsLonLat = src_boundsLonLat
        self.src_AcqDate = src_AcqDate
        self.src_prj = src_prj
63
        self.src_footprint_poly = src_footprint_poly
64
        self.sceneID_excluded = sceneID_excluded
65
66
67
68
69
        self.min_overlap = min_overlap
        self.min_cloudcov = min_cloudcov
        self.max_cloudcov = max_cloudcov
        self.plusminus_days = plusminus_days
        self.plusminus_years = plusminus_years
70
        self.logger = logger or GMS_logger('ReferenceSceneFinder')
71

72
        # get temporal constraints
73
        def add_years(dt, years): return dt.replace(dt.year + years) \
74
75
76
            if not (dt.month == 2 and dt.day == 29) else dt.replace(dt.year + years, 3, 1)
        self.timeStart = add_years(self.src_AcqDate, -plusminus_years)
        timeEnd = add_years(self.src_AcqDate, +plusminus_years)
77
78
        timeNow = datetime.utcnow().replace(tzinfo=pytz.UTC)
        self.timeEnd = timeEnd if timeEnd <= timeNow else timeNow
79

80
81
82
        self.possib_ref_scenes = None  # set by self.spatial_query()
        self.GDF_ref_scenes = GeoDataFrame()  # set by self.spatial_query()
        self.ref_scene = None
83

84
    def spatial_query(self, timeout=5):
85
86
87
88
        """Query the postgreSQL database to find possible reference scenes matching the specified criteria.

        :param timeout:     maximum query duration allowed (seconds)
        """
89
90
91
92
93
94
95
96
97
98
        SpIM = SpatialIndexMediator(host=CFG.spatial_index_server_host, port=CFG.spatial_index_server_port,
                                    timeout=timeout, retries=10)
        self.possib_ref_scenes = SpIM.getFullSceneDataForDataset(envelope=self.boundsLonLat,
                                                                 timeStart=self.timeStart,
                                                                 timeEnd=self.timeEnd,
                                                                 minCloudCover=self.min_cloudcov,
                                                                 maxCloudCover=self.max_cloudcov,
                                                                 datasetid=CFG.datasetid_spatial_ref,
                                                                 refDate=self.src_AcqDate,
                                                                 maxDaysDelta=self.plusminus_days)
99

100
101
102
        if self.possib_ref_scenes:
            # fill GeoDataFrame with possible ref scene parameters
            GDF = GeoDataFrame(self.possib_ref_scenes, columns=['object'])
103
104
105
106
            GDF['sceneid'] = list(GDF['object'].map(lambda scene: scene.sceneid))
            GDF['acquisitiondate'] = list(GDF['object'].map(lambda scene: scene.acquisitiondate))
            GDF['cloudcover'] = list(GDF['object'].map(lambda scene: scene.cloudcover))
            GDF['polyLonLat'] = list(GDF['object'].map(lambda scene: scene.polyLonLat))
107

108
109
            def LonLat2UTM(polyLL):
                return reproject_shapelyGeometry(polyLL, 4326, self.src_prj)
110

111
112
            GDF['polyUTM'] = list(GDF['polyLonLat'].map(LonLat2UTM))
            self.GDF_ref_scenes = GDF
113

114
115
116
    def _collect_refscene_metadata(self):
        """Collect some reference scene metadata needed for later filtering."""
        GDF = self.GDF_ref_scenes
117

118
119
120
121
122
123
124
125
126
127
128
        # get overlap parameter
        def get_OL_prms(poly): return get_overlap_polygon(poly, self.src_footprint_poly)

        GDF['overlapParams'] = list(GDF['polyLonLat'].map(get_OL_prms))
        GDF['overlap area'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap area']))
        GDF['overlap percentage'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap percentage']))
        GDF['overlap poly'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap poly']))
        del GDF['overlapParams']

        # get processing level of reference scenes
        procL = GeoDataFrame(
129
            DB_T.get_info_from_postgreSQLdb(CFG.conn_database, 'scenes_proc', ['sceneid', 'proc_level'],
130
131
132
133
134
135
136
137
138
139
140
141
                                            {'sceneid': list(GDF.sceneid)}), columns=['sceneid', 'proc_level'])
        GDF = GDF.merge(procL, on='sceneid', how='left')
        GDF = GDF.where(GDF.notnull(), None)  # replace NaN values with None

        # get path of binary file
        def get_path_binary(GDF_row):
            return PG.path_generator(scene_ID=GDF_row['sceneid'], proc_level=GDF_row['proc_level']) \
                .get_path_imagedata() if GDF_row['proc_level'] else None
        GDF['path_ref'] = GDF.apply(lambda GDF_row: get_path_binary(GDF_row), axis=1)
        GDF['refDs_exists'] = list(GDF['path_ref'].map(lambda p: os.path.exists(p) if p else False))

        # check if a proper entity ID can be gathered from database
142
        eID = GeoDataFrame(DB_T.get_info_from_postgreSQLdb(CFG.conn_database, 'scenes', ['id', 'entityid'],
143
144
145
146
147
148
149
150
                                                           {'id': list(GDF.sceneid)}), columns=['sceneid', 'entityid'])
        GDF = GDF.merge(eID, on='sceneid', how='left')
        self.GDF_ref_scenes = GDF.where(GDF.notnull(), None)

    def _filter_excluded_sceneID(self):
        """Filter reference scene with the same scene ID like the target scene."""
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
Daniel Scheffler's avatar
Daniel Scheffler committed
151
            self.logger.info('Same ID filter:  Excluding scene with the same ID like the target scene.')
152
            self.GDF_ref_scenes = GDF.loc[GDF['sceneid'] != self.sceneID_excluded]
153
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
154

155
    def _filter_by_overlap(self):
156
        """Filter all scenes with less spatial overlap than self.min_overlap."""
157
158
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
Daniel Scheffler's avatar
Daniel Scheffler committed
159
160
            self.logger.info('Overlap filter:  Excluding all scenes with less than %s percent spatial overlap.'
                             % self.min_overlap)
161
            self.GDF_ref_scenes = GDF.loc[GDF['overlap percentage'] >= self.min_overlap]
162
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
163

164
    def _filter_by_proc_status(self):
165
        """Filter all scenes that have not been processed before according to proc. status (at least L1A is needed)."""
166
167
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
Daniel Scheffler's avatar
Daniel Scheffler committed
168
169
            self.logger.info('Processing level filter:  Exclude all scenes that have not been processed before '
                             'according to processing status (at least L1A is needed).')
170
            self.GDF_ref_scenes = GDF[GDF['proc_level'].notnull()]
171
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
172

173
    def _filter_by_dataset_existance(self):
174
        """Filter all scenes where no processed data can be found on fileserver."""
175
176
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
Daniel Scheffler's avatar
Daniel Scheffler committed
177
            self.logger.info('Existance filter:  Excluding all scenes where no processed data have been found.')
178
            self.GDF_ref_scenes = GDF[GDF['refDs_exists']]
179
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
180

181
    def _filter_by_entity_ID_availability(self):
182
        """Filter all scenes where no proper entity ID can be found in the database (database errors)."""
183
184
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
Daniel Scheffler's avatar
Daniel Scheffler committed
185
186
            self.logger.info('DB validity filter:  Exclude all scenes where no proper entity ID can be found in the '
                             'database (database errors).')
187
            self.GDF_ref_scenes = GDF[GDF['entityid'].notnull()]
188
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
189

190
    def _filter_by_projection(self):
191
        """Filter all scenes that have a different projection than the target image."""
192
        GDF = self.GDF_ref_scenes[self.GDF_ref_scenes.refDs_exists]
193
194
        if not GDF.empty:
            # compare projections of target and reference image
195
196
            GDF['prj_equal'] = \
                list(GDF['path_ref'].map(lambda path_ref: prj_equal(self.src_prj, GeoArray(path_ref).prj)))
197

Daniel Scheffler's avatar
Daniel Scheffler committed
198
199
            self.logger.info('Projection filter:  Exclude all scenes that have a different projection than the target '
                             'image.')
200
            self.GDF_ref_scenes = GDF[GDF['prj_equal']]
201
            self.logger.info('%s scenes => %s scenes' % (len(GDF), len(self.GDF_ref_scenes)))
202

203
204
    def choose_ref_scene(self):
        """Choose reference scene with minimum cloud cover and maximum overlap."""
205
206
207
        if self.possib_ref_scenes:
            # First, collect some relavant reference scene metadata
            self._collect_refscene_metadata()
208

209
210
211
212
213
214
215
            # Filter possible scenes by running all filter functions
            self._filter_excluded_sceneID()
            self._filter_by_overlap()
            self._filter_by_proc_status()
            self._filter_by_dataset_existance()
            self._filter_by_entity_ID_availability()
            self._filter_by_projection()
216

217
218
219
220
221
        # Choose the reference scene out of the filtered DataFrame
        if not self.GDF_ref_scenes.empty:
            GDF = self.GDF_ref_scenes
            GDF = GDF[GDF['cloudcover'] == GDF['cloudcover'].min()]
            GDF = GDF[GDF['overlap percentage'] == GDF['overlap percentage'].max()]
222

223
224
225
226
227
            if not GDF.empty:
                GDF_res = GDF.iloc[0]
                return ref_Scene(GDF_res)
        else:
            return None
228

229

230
231
class ref_Scene:
    def __init__(self, GDF_record):
232
233
234
        self.scene_ID = int(GDF_record['sceneid'])
        self.entity_ID = GDF_record['entityid']
        self.AcqDate = GDF_record['acquisitiondate']
235
236
        self.cloudcover = GDF_record['cloudcover']
        self.polyLonLat = GDF_record['polyLonLat']
237
        self.polyUTM = GDF_record['polyUTM']
238
        self.proc_level = GDF_record['proc_level']
239
        self.filePath = GDF_record['path_ref']
240
241
242
243
244


class L1B_object(L1A_object):
    def __init__(self, L1A_obj=None):

245
        super(L1B_object, self).__init__()
246
247
248

        # set defaults
        self._spatRef_available = None
249
250
        self.spatRef_scene = None  # set by self.get_spatial_reference_scene()
        self.deshift_results = collections.OrderedDict()
251
252
253
254
255
256

        if L1A_obj:
            # populate attributes
            [setattr(self, key, value) for key, value in L1A_obj.__dict__.items()]

        self.proc_level = 'L1B'
257
        self.proc_status = 'initialized'
258
259
260

    @property
    def spatRef_available(self):
261
        if self._spatRef_available is not None:
262
263
264
265
266
267
268
269
270
271
            return self._spatRef_available
        else:
            self.get_spatial_reference_scene()
            return self._spatRef_available

    @spatRef_available.setter
    def spatRef_available(self, spatRef_available):
        self._spatRef_available = spatRef_available

    def get_spatial_reference_scene(self):
272
        boundsLonLat = corner_coord_to_minmax(self.trueDataCornerLonLat)
273
        footprint_poly = HLP_F.CornerLonLat_to_shapelyPoly(self.trueDataCornerLonLat)
274
        RSF = Scene_finder(boundsLonLat, self.acq_datetime, self.meta_odict['coordinate system string'],
275
276
277
278
279
                           footprint_poly, self.scene_ID,
                           min_overlap=CFG.spatial_ref_min_overlap,
                           min_cloudcov=CFG.spatial_ref_min_cloudcov,
                           max_cloudcov=CFG.spatial_ref_max_cloudcov,
                           plusminus_days=CFG.spatial_ref_plusminus_days,
280
281
                           plusminus_years=CFG.spatial_ref_plusminus_years,
                           logger=self.logger)
282
283
284
285
286
287
288

        # run spatial query
        self.logger.info('Querying database in order to find a suitable reference scene for co-registration.')
        RSF.spatial_query(timeout=5)
        if RSF.possib_ref_scenes:
            self.logger.info('Query result: %s reference scenes with matching metadata.' % len(RSF.possib_ref_scenes))

289
290
291
292
293
294
295
296
297
298
299
            # try to get a spatial reference scene by applying some filter criteria
            self.spatRef_scene = RSF.choose_ref_scene()  # type: Union[ref_Scene, None]
            if self.spatRef_scene:
                self.spatRef_available = True
                self.logger.info('Found a suitable reference image for coregistration: scene ID %s (entity ID %s).'
                                 % (self.spatRef_scene.scene_ID, self.spatRef_scene.entity_ID))
            else:
                self.spatRef_available = False
                self.logger.warning('No scene fulfills all requirements to serve as spatial reference for scene %s '
                                    '(entity ID %s). Coregistration impossible.' % (self.scene_ID, self.entity_ID))

300
        else:
301
            self.logger.warning('Spatial query returned no matches. Coregistration impossible.')
302
            self.spatRef_available = False
303
304

    def _get_reference_image_params_pgSQL(self):
305
        # TODO implement earlier version of this function as a backup for SpatialIndexMediator
306
307
        """postgreSQL query: get IDs of overlapping scenes and select most suitable scene_ID
            (with respect to DGM, cloud cover"""
308
309
        warnings.warn('_get_reference_image_params_pgSQL is deprecated an will not work anymore.', DeprecationWarning)

310
311
        # vorab-check anhand wolkenmaske, welche region von im2shift überhaupt für shift-corr tauglich ist
        # -> diese region als argument in postgresql abfrage
312
        # scene_ID            = 14536400 # LE71510322000093SGS00 im2shift
313

314
        # set query conditions
315
316
        min_overlap = 20  # %
        max_cloudcov = 20  # %
317
        plusminus_days = 30
318
319
        AcqDate = self.im2shift_objDict['acquisition_date']
        date_minmax = [AcqDate - timedelta(days=plusminus_days), AcqDate + timedelta(days=plusminus_days)]
320
        dataset_cond = 'datasetid=%s' % CFG.datasetid_spatial_ref
321
322
323
324
325
        cloudcov_cond = 'cloudcover < %s' % max_cloudcov
        # FIXME cloudcover noch nicht für alle scenes im proc_level METADATA verfügbar
        dayrange_cond = "(EXTRACT(MONTH FROM scenes.acquisitiondate), EXTRACT(DAY FROM scenes.acquisitiondate)) " \
                        "BETWEEN (%s, %s) AND (%s, %s)" \
                        % (date_minmax[0].month, date_minmax[0].day, date_minmax[1].month, date_minmax[1].day)
326
327
        # TODO weitere Kriterien einbauen!

328
329
        def query_scenes(condlist):
            return DB_T.get_overlapping_scenes_from_postgreSQLdb(
330
                CFG.conn_database,
331
332
333
334
335
                table='scenes',
                tgt_corners_lonlat=self.trueDataCornerLonLat,
                conditions=condlist,
                add_cmds='ORDER BY scenes.cloudcover ASC',
                timeout=30000)
336
337
        conds_descImportance = [dataset_cond, cloudcov_cond, dayrange_cond]

338
        self.logger.info('Querying database in order to find a suitable reference scene for co-registration.')
339

340
        count, filt_overlap_scenes = 0, []
341
        while not filt_overlap_scenes:
342
343
344
345
            if count == 0:
                # search within already processed scenes
                # das ist nur Ergebnis aus scenes_proc
                # -> dort liegt nur eine referenz, wenn die szene schon bei CFG.job-Beginn in Datensatzliste drin war
346
                res = DB_T.get_overlapping_scenes_from_postgreSQLdb(
347
                    CFG.conn_database,
348
                    tgt_corners_lonlat=self.trueDataCornerLonLat,
349
                    conditions=['datasetid=%s' % CFG.datasetid_spatial_ref],
350
351
                    add_cmds='ORDER BY scenes.cloudcover ASC',
                    timeout=25000)
352
                filt_overlap_scenes = self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], 20.)
353

354
            else:
355
356
357
                # search within complete scenes table using less filter criteria with each run
                # TODO: Daniels Index einbauen, sonst  bei wachsender Tabellengröße evtl. irgendwann zu langsam
                res = query_scenes(conds_descImportance)
358
                filt_overlap_scenes = self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], min_overlap)
359

360
                if len(conds_descImportance) > 1:  # FIXME anderer Referenzsensor?
361
362
363
364
                    del conds_descImportance[-1]
                else:  # reduce min_overlap to 10 percent if there are overlapping scenes
                    if res:
                        min_overlap = 10
365
366
                        filt_overlap_scenes = \
                            self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], min_overlap)
367
368

                    # raise warnings if no match found
369
                    if not filt_overlap_scenes:
370
371
                        if not res:
                            warnings.warn('No reference scene found for %s (scene ID %s). Coregistration of this scene '
372
                                          'failed.' % (self.baseN, self.scene_ID))
373
374
375
                        else:
                            warnings.warn('Reference scenes for %s (scene ID %s) have been found but none has more '
                                          'than %s percent overlap. Coregistration of this scene failed.'
376
                                          % (self.baseN, self.scene_ID, min_overlap))
377
                        break
378
            count += 1
379
380
381
382

        if filt_overlap_scenes:
            ref_available = False
            for count, sc in enumerate(filt_overlap_scenes):
383
                if count == 2:  # FIXME Abbuch schon bei 3. Szene?
384
                    warnings.warn('No reference scene for %s (scene ID %s) available. '
385
                                  'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
386
387
388
                    break

                # start download of scene data not available and start L1A processing
389
                def dl_cmd(scene_ID): print('%s %s %s' % (
390
391
                    CFG.java_commands['keyword'].strip(),  # FIXME CFG.java_commands is deprecated
                    CFG.java_commands["value_download"].strip(), scene_ID))
392

393
                path = PG.path_generator(scene_ID=sc['scene_ID']).get_path_imagedata()
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
394

395
396
397
398
399
400
401
402
                if not os.path.exists(path):
                    # add scene 2 download to scenes_jobs.missing_scenes

                    # print JAVA download command
                    t_dl_start = time.time()
                    dl_cmd(sc['scene_ID'])

                    # check if scene is downloading
403
404
                    download_start_timeout = 5  # seconds
                    # set timout for external processing
405
                    # -> DEPRECATED BECAUSE CREATION OF EXTERNAL CFG WITHIN CFG IS NOT ALLOWED
406
                    processing_timeout = 5  # seconds # FIXME increase timeout if processing is really started
407
408
409
                    proc_level = None
                    while True:
                        proc_level_chk = DB_T.get_info_from_postgreSQLdb(
410
                            CFG.conn_database, 'scenes', ['proc_level'], {'id': sc['scene_ID']})[0][0]
411
                        if proc_level != proc_level_chk:
412
                            print('Reference scene %s, current processing level: %s' % (sc['scene_ID'], proc_level_chk))
413
                        proc_level = proc_level_chk
414
415
                        if proc_level_chk in ['SCHEDULED', 'METADATA'] and \
                           time.time() - t_dl_start >= download_start_timeout:
416
                            warnings.warn('Download of reference scene %s (entity ID %s) timed out. '
417
                                          'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
418
419
                            break
                        if proc_level_chk == 'L1A':
420
421
422
423
                            ref_available = True
                            break
                        elif proc_level_chk == 'DOWNLOADED' and time.time() - t_dl_start >= processing_timeout:
                            # proc_level='DOWNLOADED' but scene has not been processed
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
424
425
426
                            warnings.warn('L1A processing of reference scene %s (entity ID %s) timed out. '
                                          'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
                            break
427
                            # DB_T.set_info_in_postgreSQLdb(CFG.conn_database,'scenes',
428
                            #                             {'proc_level':'METADATA'},{'id':sc['scene_ID']})
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
429

430
431
432
433
434
435
436
                        time.sleep(5)
                else:
                    ref_available = True

                if not ref_available:
                    continue
                else:
437
438
                    self.path_imref = path
                    self.imref_scene_ID = sc['scene_ID']
439
                    self.imref_footprint_poly = sc['scene poly']
440
441
442
443
                    self.overlap_poly = sc['overlap poly']
                    self.overlap_percentage = sc['overlap percentage']
                    self.overlap_area = sc['overlap area']

444
                    query_res = DB_T.get_info_from_postgreSQLdb(CFG.conn_database, 'scenes', ['entityid'],
445
446
447
                                                                {'id': self.imref_scene_ID}, records2fetch=1)
                    assert query_res != [], 'No entity-ID found for scene number %s' % self.imref_scene_ID
                    self.imref_entity_ID = query_res[0][0]  # [('LC81510322013152LGN00',)]
448
                    break
449
        self.logger.close()
450

451
    def _sceneIDList_to_filt_overlap_scenes(self, sceneIDList, min_overlap):
452
453
454
        """find reference scenes that cover at least 20% of the scene with the given ID
        ONLY FIRST 50 scenes are considered"""

455
456
457
        # t0 = time.time()
        dict_sceneID_poly = [{'scene_ID': ID, 'scene poly': HLP_F.scene_ID_to_shapelyPolygon(ID)}
                             for ID in sceneIDList]  # always returns LonLot polygons
458
459

        # get overlap polygons and their parameters. result: [{overlap poly, overlap percentage, overlap area}]
460
461
        for dic in dict_sceneID_poly:  # input: dicts {scene_ID, ref_poly}
            dict_overlap_poly_params = get_overlap_polygon(dic['scene poly'], self.arr.footprint_poly)
462
            dic.update(dict_overlap_poly_params)  # adds {overlap poly, overlap percentage, overlap area}
463
        # print('polygon creation time', time.time()-t0)
464
465
466
467
468
469
470
471

        # filter those scene_IDs out where overlap percentage is below 20%
        if min_overlap:
            filt_overlap_scenes = [scene for scene in dict_sceneID_poly if scene['overlap percentage'] >= min_overlap]
        else:
            filt_overlap_scenes = dict_sceneID_poly

        return filt_overlap_scenes
472

473
    def get_opt_bands4matching(self, target_cwlPos_nm=550):
474
475
476
477
        """Automatically determines the optimal bands used für fourier shift theorem matching

        :param target_cwlPos_nm:   the desired wavelength used for matching
        """
478
479
        # get GMS_object for reference scene
        path_gmsFile = PG.path_generator(scene_ID=self.spatRef_scene.scene_ID).get_path_gmsfile()
480
        ref_obj = GMS_object.from_disk((path_gmsFile, ['cube', None]))
481

482
        # get spectral characteristics
483
484
        ref_cwl, shift_cwl = [[float(i) for i in GMS_obj.meta_odict['wavelength']] for GMS_obj in [ref_obj, self]]
        ref_fwhm, shift_fwhm = [[float(i) for i in GMS_obj.meta_odict['bandwidths']] for GMS_obj in [ref_obj, self]]
485
486

        # exclude cirrus/oxygen band of Landsat-8/Sentinel-2
487
        shift_bbl, ref_bbl = [False] * len(shift_cwl), [False] * len(ref_cwl)  # bad band lists
488
489
490
491
492
493
494
        for GMS_obj, s_r, bbl in zip([self, ref_obj], ['shift', 'ref'], [shift_bbl, ref_bbl]):
            GMS_obj.GMS_identifier['logger'] = None  # set a dummy value in order to avoid Exception
            sensorcode = get_GMS_sensorcode(GMS_obj.GMS_identifier)
            if sensorcode in ['LDCM', 'S2A', 'S2B'] and '9' in GMS_obj.LayerBandsAssignment:
                bbl[GMS_obj.LayerBandsAssignment.index('9')] = True
            if sensorcode in ['S2A', 'S2B'] and '10' in GMS_obj.LayerBandsAssignment:
                bbl[GMS_obj.LayerBandsAssignment.index('10')] = True
495

496
        # cwl_overlap = (max(min(shift_cwl),min(ref_cwl)),  min(max(shift_cwl),max(ref_cwl))) # -> (min wvl, max wvl)
497
        # find matching band of reference image for each band of image to be shifted
498
499
500
501
        match_dic = collections.OrderedDict()
        for idx, cwl, fwhm in zip(range(len(shift_cwl)), shift_cwl, shift_fwhm):
            if shift_bbl[idx]:
                continue  # skip cwl if it is declared as bad band above
502
503
504

            def is_inside(r_cwl, s_cwl, s_fwhm): return s_cwl - s_fwhm / 2 < r_cwl < s_cwl + s_fwhm / 2

505
506
            matching_r_cwls = [r_cwl for i, r_cwl in enumerate(ref_cwl) if
                               is_inside(r_cwl, cwl, fwhm) and not ref_bbl[i]]
507
508
            if matching_r_cwls:
                match_dic[cwl] = matching_r_cwls[0] if len(matching_r_cwls) else \
509
                    matching_r_cwls[np.abs(np.array(matching_r_cwls) - cwl).argmin()]
510
511
512
513
514

        # set bands4 match based on the above results
        poss_cwls = [cwl for cwl in shift_cwl if cwl in match_dic]
        if poss_cwls:
            if not target_cwlPos_nm:
515
516
517
518
519
520
521
                shift_band4match = shift_cwl.index(poss_cwls[0]) + 1  # first possible shift band
                ref_band4match = ref_cwl.index(match_dic[poss_cwls[0]]) + 1  # matching reference band
            else:  # target_cwlPos is given
                closestWvl_to_target = poss_cwls[np.abs(np.array(poss_cwls) - target_cwlPos_nm).argmin()]
                shift_band4match = shift_cwl.index(closestWvl_to_target) + 1  # the shift band closest to target
                ref_band4match = ref_cwl.index(match_dic[closestWvl_to_target]) + 1  # matching ref
        else:  # all reference bands are outside of shift-cwl +- fwhm/2
522
523
            self.logger.warning('Optimal bands for matching could not be automatically determined. '
                                'Choosing first band of each image.')
524
525
            shift_band4match = 1
            ref_band4match = 1
526

527
        self.logger.info(
528
            'Target band for matching:    %s (%snm)' % (shift_band4match, shift_cwl[shift_band4match - 1]))
529
530
        self.logger.info(
            'Reference band for matching: %s (%snm)' % (ref_band4match, ref_cwl[ref_band4match - 1]))
531
532
533

        return ref_band4match, shift_band4match

534
    def compute_global_shifts(self):
535
536
537
538
539
        spatIdxSrv_status = os.environ['GMS_SPAT_IDX_SRV_STATUS'] if 'GMS_SPAT_IDX_SRV_STATUS' in os.environ else True

        if spatIdxSrv_status == 'unavailable':
            self.logger.warning('Coregistration skipped due to unavailable Spatial Index Mediator Server!"')

540
        elif CFG.skip_coreg:
541
            self.logger.warning('Coregistration skipped according to user configuration.')
542

543
        elif self.coreg_needed and self.spatRef_available:
544
545
            geoArr_ref = GeoArray(self.spatRef_scene.filePath)
            geoArr_shift = GeoArray(self.arr)
546
            r_b4match, s_b4match = self.get_opt_bands4matching(target_cwlPos_nm=CFG.coreg_band_wavelength_for_matching)
547
548
549
550
551
            coreg_kwargs = dict(
                r_b4match=r_b4match,
                s_b4match=s_b4match,
                align_grids=True,  # FIXME not needed here
                match_gsd=True,  # FIXME not needed here
552
                max_shift=CFG.coreg_max_shift_allowed,
Daniel Scheffler's avatar
Fix.    
Daniel Scheffler committed
553
                ws=CFG.coreg_window_size,
554
555
556
557
558
559
560
561
562
                data_corners_ref=[[x, y] for x, y in self.spatRef_scene.polyUTM.convex_hull.exterior.coords],
                data_corners_tgt=[transform_any_prj(EPSG2WKT(4326), self.meta_odict['coordinate system string'], x, y)
                                  for x, y in HLP_F.reorder_CornerLonLat(self.trueDataCornerLonLat)],
                nodata=(get_outFillZeroSaturated(geoArr_ref.dtype)[0],
                        get_outFillZeroSaturated(geoArr_shift.dtype)[0]),
                ignore_errors=True,
                v=False,
                q=True
            )
563
564
565
566

            COREG_obj = COREG(geoArr_ref, geoArr_shift, **coreg_kwargs)
            COREG_obj.calculate_spatial_shifts()

567
568
569
570
            self.coreg_info.update(
                COREG_obj.coreg_info)  # no clipping to trueCornerLonLat until here -> only shift correction
            self.coreg_info.update({'reference scene ID': self.spatRef_scene.scene_ID})
            self.coreg_info.update({'reference entity ID': self.spatRef_scene.entity_ID})
571
            self.coreg_info.update({'shift_reliability': COREG_obj.shift_reliability})
572
573

            if COREG_obj.success:
574
                self.coreg_info['success'] = True
575
                self.logger.info("Calculated map shifts (X,Y): %s / %s"
576
577
                                 % (COREG_obj.x_shift_map,
                                    COREG_obj.y_shift_map))  # FIXME direkt in calculate_spatial_shifts loggen
578
                self.logger.info("Reliability of calculated shift: %.1f percent" % COREG_obj.shift_reliability)
579

580
581
582
            else:
                # TODO add database entry with error hint
                [self.logger.error('ERROR during coregistration of scene %s (entity ID %s):\n%s'
583
                                   % (self.scene_ID, self.entity_ID, err)) for err in COREG_obj.tracked_errors]
584

585
        else:
586
            if self.coreg_needed:
587
588
                self.logger.warning('Coregistration skipped because no suitable reference scene is available or '
                                    'spatial query failed.')
589
590
            else:
                self.logger.info('Coregistration of scene %s (entity ID %s) skipped because target dataset ID equals '
591
592
                                 'reference dataset ID.' % (self.scene_ID, self.entity_ID))

593
594
    def correct_spatial_shifts(self, cliptoextent=True, clipextent=None, clipextent_prj=None, v=False):
        # type: (bool, list, any, bool) -> None
595
        """Corrects the spatial shifts calculated by self.compute_global_shifts().
596
597
598
599
600
601
602
603
604

        :param cliptoextent:    whether to clip the output to the given extent
        :param clipextent:      list of XY-coordinate tuples giving the target extent (if not given and cliptoextent is
                                True, the 'trueDataCornerLonLat' attribute of the GMS object is used
        :param clipextent_prj:  WKT projection string or EPSG code of the projection for the coordinates in clipextent
        :param v:
        :return:
        """

605
606
        # cliptoextent is automatically True if an extent is given
        cliptoextent = cliptoextent if not clipextent else True
607

608
609
        if cliptoextent or self.resamp_needed or (self.coreg_needed and self.coreg_info['success']):

610
            # get target bounds # TODO implement boxObj call instead here
611
            if not clipextent:
612
613
                trueDataCornerUTM = [transform_any_prj(EPSG2WKT(4326), self.MetaObj.projection, x, y)
                                     for x, y in self.trueDataCornerLonLat]
614
                xmin, xmax, ymin, ymax = corner_coord_to_minmax(trueDataCornerUTM)
615
                mapBounds = box(xmin, ymin, xmax, ymax).bounds
616
617
618
619
620
621
622
623
            else:
                assert clipextent_prj, \
                    "'clipextent_prj' must be given together with 'clipextent'. Received only 'clipextent'."
                clipextent_UTM = clipextent if prj_equal(self.MetaObj.projection, clipextent_prj) else \
                    [transform_any_prj(clipextent_prj, self.MetaObj.projection, x, y) for x, y in clipextent]
                xmin, xmax, ymin, ymax = corner_coord_to_minmax(clipextent_UTM)
                mapBounds = box(xmin, ymin, xmax, ymax).bounds

624
            # correct shifts and clip to extent
625
626
            # ensure self.masks exists (does not exist in case of inmem_serialization mode because
            # then self.fill_from_disk() is skipped)
627
628
629
            if not hasattr(self, 'masks') or self.masks is None:
                self.build_combined_masks_array()  # creates self.masks and self.masks_meta

630
631
632
            # exclude self.mask_nodata, self.mask_clouds from warping
            del self.mask_nodata, self.mask_clouds

633
634
635
            attributes2deshift = [attrname for attrname in
                                  ['arr', 'masks', 'dem', 'ac_errors', 'mask_clouds_confidence']
                                  if getattr(self, '_%s' % attrname) is not None]
636
            for attrname in attributes2deshift:
637
                geoArr = getattr(self, attrname)
638
639

                # do some logging
640
641
                if self.coreg_needed:
                    if self.coreg_info['success']:
642
643
                        self.logger.info("Correcting spatial shifts for attribute '%s'..." % attrname)
                    elif cliptoextent and is_coord_grid_equal(
644
                         geoArr.gt, CFG.spatial_ref_gridx, CFG.spatial_ref_gridy):
645
                        self.logger.info("Attribute '%s' has only been clipped to it's extent because no valid "
646
647
                                         "shifts have been detected and the pixel grid equals the target grid."
                                         % attrname)
648
649
                    elif self.resamp_needed:
                        self.logger.info("Resampling attribute '%s' to target grid..." % attrname)
650
651
652
653
                elif self.resamp_needed:
                    self.logger.info("Resampling attribute '%s' to target grid..." % attrname)

                # correct shifts
654
                DS = DESHIFTER(geoArr, self.coreg_info,
655
                               target_xyGrid=[CFG.spatial_ref_gridx, CFG.spatial_ref_gridy],
656
657
658
                               cliptoextent=cliptoextent,
                               clipextent=mapBounds,
                               align_grids=True,
659
                               resamp_alg='nearest' if attrname == 'masks' else CFG.spatial_resamp_alg,
660
                               CPUs=None if CFG.allow_subMultiprocessing else 1,
661
662
663
                               progress=True if v else False,
                               q=True,
                               v=v)
664
665
666
                DS.correct_shifts()

                # update coreg_info
667
668
                if attrname == 'arr':
                    self.coreg_info['is shifted'] = DS.is_shifted
669
                    self.coreg_info['is resampled'] = DS.is_resampled
670

671
                # update geoinformations and array shape related attributes
672
673
674
                self.logger.info("Updating geoinformations of '%s' attribute..." % attrname)
                if attrname == 'arr':
                    self.meta_odict['map info'] = DS.updated_map_info
675
                    self.meta_odict['coordinate system string'] = EPSG2WKT(WKT2EPSG(DS.updated_projection))
676
                    self.shape_fullArr = DS.arr_shifted.shape
677
678
                    self.meta_odict['lines'], self.meta_odict['samples'] = DS.arr_shifted.shape[:2]
                else:
679
680
                    self.masks_meta['map info'] = DS.updated_map_info
                    self.masks_meta['coordinate system string'] = EPSG2WKT(WKT2EPSG(DS.updated_projection))
681
682
                    self.masks_meta['lines'], self.masks_meta['samples'] = DS.arr_shifted.shape[:2]

683
684
                    # NOTE: mask_nodata and mask_clouds are updated later by L2A_map mapper function (module pipeline)

685
                # update the GeoArray instance without loosing its inherent metadata (nodata, ...)
686
687
688
                geoArr.arr, geoArr.gt, geoArr.prj = \
                    DS.GeoArray_shifted.arr, DS.GeoArray_shifted.gt, DS.GeoArray_shifted.prj
                # setattr(self,attrname, DS.GeoArray_shifted) # NOTE: don't set array earlier because setter will also
689
690
                #                                            # update arr.gt/.prj/.nodata from meta_odict

691
            self.resamp_needed = False
692
            self.coreg_needed = False
693

694
695
            # recreate self.masks_nodata and self.mask_clouds from self.masks
            self.mask_nodata = self.mask_nodata
696
697
            self.mask_clouds = self.mask_clouds
            # FIXME move functionality of self.masks only to output writer and remove self.masks completely