L1B_P.py 34.2 KB
Newer Older
Daniel Scheffler's avatar
Daniel Scheffler committed
1
# -*- coding: utf-8 -*-
2
3
4
5
6
7
"""
Level 1B Processor:

Detection of global/local geometric displacements.
"""

Daniel Scheffler's avatar
Daniel Scheffler committed
8

9
10
import collections
import json
11
import os
12
import socket
13
import time
14
import warnings
15
from datetime import datetime, timedelta
16
17

import numpy as np
18
from geopandas import GeoDataFrame
19
from shapely.geometry import box
Daniel Scheffler's avatar
Daniel Scheffler committed
20

21
from arosics import COREG, DESHIFTER
22
from geoarray import GeoArray
23
24
25
26
27
28
29
30
31
32
33
from py_tools_ds.geo.coord_grid import is_coord_grid_equal
from py_tools_ds.geo.coord_calc import corner_coord_to_minmax
from py_tools_ds.geo.coord_trafo import reproject_shapelyGeometry, transform_any_prj
from py_tools_ds.geo.projection import prj_equal, EPSG2WKT, WKT2EPSG
from py_tools_ds.geo.vector.topology import get_overlap_polygon

from ..config import GMS_config as CFG
from .L1A_P import L1A_object
from ..misc import database_tools as DB_T
from ..misc import helper_functions as HLP_F
from ..misc import path_generator as PG
34
from ..misc.spatial_index_mediator import SpatialIndexMediator
35
from ..misc.definition_dicts import get_GMS_sensorcode, get_outFillZeroSaturated
36

37
__author__ = 'Daniel Scheffler'
38
39


40
41
42
class Scene_finder(object):
    def __init__(self, src_boundsLonLat, src_AcqDate, src_prj, src_footprint_poly, min_overlap=20,
                 min_cloudcov=0, max_cloudcov=20, plusminus_days=30, plusminus_years=10):
43

44
45
46
        self.boundsLonLat = src_boundsLonLat
        self.src_AcqDate = src_AcqDate
        self.src_prj = src_prj
47
        self.src_footprint_poly = src_footprint_poly
48
49
50
51
52
        self.min_overlap = min_overlap
        self.min_cloudcov = min_cloudcov
        self.max_cloudcov = max_cloudcov
        self.plusminus_days = plusminus_days
        self.plusminus_years = plusminus_years
53

54
        # get temporal constraints
55
        def add_years(dt, years): return dt.replace(dt.year + years) \
56
57
58
59
            if not (dt.month == 2 and dt.day == 29) else dt.replace(dt.year + years, 3, 1)
        self.timeStart = add_years(self.src_AcqDate, -plusminus_years)
        timeEnd = add_years(self.src_AcqDate, +plusminus_years)
        self.timeEnd = timeEnd if timeEnd <= datetime.now() else datetime.now()
60

61
62
63
        self.possib_ref_scenes = None  # set by self.spatial_query()
        self.GDF_ref_scenes = GeoDataFrame()  # set by self.spatial_query()
        self.ref_scene = None
64

65
    def spatial_query(self, timeout=5):
66
67
        for i in range(10):
            try:
68
69
                SpIM = SpatialIndexMediator(timeout=timeout)
                self.possib_ref_scenes = \
70
                    SpIM.getFullSceneDataForDataset(self.boundsLonLat, self.timeStart, self.timeEnd, self.min_cloudcov,
71
                                                    self.max_cloudcov, CFG.usecase.datasetid_spatial_ref,
72
                                                    refDate=self.src_AcqDate, maxDaysDelta=self.plusminus_days)
73
74
                break
            except socket.timeout:
75
                if i < 9:
76
77
78
                    continue
                else:
                    raise TimeoutError('Spatial query timed out 10 times!')
79
80
                    # TODO catch error when index server is not running:
                    # TODO ConnectionRefusedError: [Errno 111] Connection refused
81

82
83
84
        if self.possib_ref_scenes:
            # fill GeoDataFrame with possible ref scene parameters
            GDF = GeoDataFrame(self.possib_ref_scenes, columns=['object'])
85
86
87
88
            GDF['sceneid'] = list(GDF['object'].map(lambda scene: scene.sceneid))
            GDF['acquisitiondate'] = list(GDF['object'].map(lambda scene: scene.acquisitiondate))
            GDF['cloudcover'] = list(GDF['object'].map(lambda scene: scene.cloudcover))
            GDF['polyLonLat'] = list(GDF['object'].map(lambda scene: scene.polyLonLat))
89
90
91

            def LonLat2UTM(polyLL): return reproject_shapelyGeometry(polyLL, 4326, self.src_prj)

92
93
            GDF['polyUTM'] = list(GDF['polyLonLat'].map(LonLat2UTM))
            self.GDF_ref_scenes = GDF
94
95
96
97
98
99

    def filter_possib_ref_scenes(self):
        self._filter_by_overlap()
        self._filter_by_proc_status()
        self._filter_by_dataset_existance()
        self._filter_by_entity_ID_availability()
100
        self._filter_by_projection()
101
102
103
104
105
106

    def choose_ref_scene(self):
        """choose refence scene with minimum cloud cover and maximum overlap"""

        if not self.GDF_ref_scenes.empty:
            GDF = self.GDF_ref_scenes
107
            GDF = GDF[GDF['cloudcover'] == GDF['cloudcover'].min()]
108
            GDF = GDF[GDF['overlap percentage'] == GDF['overlap percentage'].max()]
109

110
111
112
113
114
            if not GDF.empty:
                GDF_res = GDF.iloc[0]
                return ref_Scene(GDF_res)
        else:
            return None
115

116
117
118
    def _filter_by_overlap(self):
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
119
            # get overlap parameter
120
            def get_OL_prms(poly): return get_overlap_polygon(poly, self.src_footprint_poly)
121
122
            GDF['overlapParams'] = list(GDF['polyLonLat'].map(get_OL_prms))
            GDF['overlap area'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap area']))
123
            GDF['overlap percentage'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap percentage']))
124
            GDF['overlap poly'] = list(GDF['overlapParams'].map(lambda OL_prms: OL_prms['overlap poly']))
125
126
127
            del GDF['overlapParams']

            # filter scenes out that have too less overlap
128
            self.GDF_ref_scenes = GDF.loc[GDF['overlap percentage'] >= self.min_overlap]
129

130
131
132
133
    def _filter_by_proc_status(self):
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
            # get processing level of refernce scenes
134
135
136
            def query_procL(sceneID):
                return DB_T.get_info_from_postgreSQLdb(CFG.job.conn_database, 'scenes_proc', ['proc_level'],
                                                       {'sceneid': sceneID})
137
138
139
            GDF['temp_queryRes'] = list(GDF['sceneid'].map(query_procL))
            GDF['proc_level'] = list(GDF['temp_queryRes'].map(lambda queryRes: queryRes[0][0] if queryRes else None))
            GDF.drop('temp_queryRes', axis=1, inplace=True)
140
141
142

            # filter scenes out that have not yet been processed
            self.GDF_ref_scenes = GDF[GDF['proc_level'].notnull()]
143

144
145
146
147
148
    def _filter_by_dataset_existance(self):
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
            # get path of binary file and check if the corresponding dataset exists
            GDF = self.GDF_ref_scenes
149
150
151
152
153
154

            def get_path_binary(GDF_row):
                return PG.path_generator(scene_ID=GDF_row['sceneid'], proc_level=GDF_row['proc_level'])\
                    .get_path_imagedata()

            def check_exists(path): return os.path.exists(path)
155
            GDF['path_ref'] = GDF.apply(lambda GDF_row: get_path_binary(GDF_row), axis=1)
156
            GDF['refDs_exists'] = list(GDF['path_ref'].map(check_exists))
157

158
            # filter scenes out where the corresponding dataset does not exist on fileserver
159
            self.GDF_ref_scenes = GDF[GDF['refDs_exists']]
160

161
162
163
164
    def _filter_by_entity_ID_availability(self):
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
            # check if a proper entity ID can be gathered from database
165
166
167
            def query_eID(sceneID):
                return DB_T.get_info_from_postgreSQLdb(CFG.job.conn_database, 'scenes', ['entityid'],
                                                       {'id': sceneID}, records2fetch=1)
168
169
            GDF['temp_queryRes'] = list(GDF['sceneid'].map(query_eID))
            GDF['entityid'] = list(GDF['temp_queryRes'].map(lambda queryRes: queryRes[0][0] if queryRes else None))
170
            GDF.drop('temp_queryRes', axis=1, inplace=True)
171

172
            # filter scenes out that have no entity ID (database errors)
173
            self.GDF_ref_scenes = GDF[GDF['refDs_exists']]
174

175
176
177
178
    def _filter_by_projection(self):
        GDF = self.GDF_ref_scenes
        if not GDF.empty:
            # compare projections of target and reference image
179
            from ..io.input_reader import read_ENVIhdr_to_dict
180
181
182
183
184

            def get_prj(path_binary):
                return read_ENVIhdr_to_dict(os.path.splitext(path_binary)[0] + '.hdr')['coordinate system string']

            def is_prj_equal(path_binary): return prj_equal(self.src_prj, get_prj(path_binary))
185
            GDF['prj_equal'] = list(GDF['path_ref'].map(is_prj_equal))
186

187
            # filter scenes out that have a different projection
188
            self.GDF_ref_scenes = GDF[GDF['prj_equal']]
189

190

191
192
class ref_Scene:
    def __init__(self, GDF_record):
193
194
195
        self.scene_ID = int(GDF_record['sceneid'])
        self.entity_ID = GDF_record['entityid']
        self.AcqDate = GDF_record['acquisitiondate']
196
197
        self.cloudcover = GDF_record['cloudcover']
        self.polyLonLat = GDF_record['polyLonLat']
198
        self.polyUTM = GDF_record['polyUTM']
199
        self.proc_level = GDF_record['proc_level']
200
        self.filePath = GDF_record['path_ref']
201
202
203
204
205


class L1B_object(L1A_object):
    def __init__(self, L1A_obj=None):

206
        super(L1B_object, self).__init__()
207
208
209

        # set defaults
        self._spatRef_available = None
210
211
212
        self.spatRef_scene = None  # set by self.get_spatial_reference_scene()
        self.coreg_info = {}
        self.deshift_results = collections.OrderedDict()
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232

        if L1A_obj:
            # populate attributes
            [setattr(self, key, value) for key, value in L1A_obj.__dict__.items()]

        self.proc_level = 'L1B'

    @property
    def spatRef_available(self):
        if self._spatRef_available is None:
            return self._spatRef_available
        else:
            self.get_spatial_reference_scene()
            return self._spatRef_available

    @spatRef_available.setter
    def spatRef_available(self, spatRef_available):
        self._spatRef_available = spatRef_available

    def get_spatial_reference_scene(self):
233
        boundsLonLat = corner_coord_to_minmax(self.trueDataCornerLonLat)
234
        footprint_poly = HLP_F.CornerLonLat_to_shapelyPoly(self.trueDataCornerLonLat)
235
236
        RSF = Scene_finder(boundsLonLat, self.acq_datetime, self.meta_odict['coordinate system string'],
                           footprint_poly, 20, 0, 20, 30, 10)
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256

        # run spatial query
        self.logger.info('Querying database in order to find a suitable reference scene for co-registration.')
        RSF.spatial_query(timeout=5)
        if RSF.possib_ref_scenes:
            self.logger.info('Query result: %s reference scenes with matching metadata.' % len(RSF.possib_ref_scenes))
        else:
            self.logger.warning('Spatial query returned no matches. Coregistration impossible.')
            self.spatRef_available = False
            return None

        # filter results
        RSF.filter_possib_ref_scenes()
        if RSF.GDF_ref_scenes.empty:
            self.logger.warning('No scene fulfills all requirements to serve as spatial reference for scene %s '
                                '(entity ID %s). Coregistration impossible.' % (self.scene_ID, self.entity_ID))
            self.spatRef_available = False
            return None

        # assign spatial refernce scene
257
        self.spatRef_scene = RSF.choose_ref_scene()
258
259
260
261
262
        self.spatRef_available = True
        self.logger.info('Found a suitable reference image for coregistration: scene ID %s (entity ID %s).'
                         % (self.spatRef_scene.scene_ID, self.spatRef_scene.entity_ID))

    def _get_reference_image_params_pgSQL(self):
263
        # TODO implement earlier version of this function as a backup for SpatialIndexMediator
264
265
        """postgreSQL query: get IDs of overlapping scenes and select most suitable scene_ID
            (with respect to DGM, cloud cover"""
266
267
        warnings.warn('_get_reference_image_params_pgSQL is deprecated an will not work anymore.', DeprecationWarning)

268
269
        # vorab-check anhand wolkenmaske, welche region von im2shift überhaupt für shift-corr tauglich ist
        # -> diese region als argument in postgresql abfrage
270
        # scene_ID            = 14536400 # LE71510322000093SGS00 im2shift
271

272
        # set query conditions
273
274
        min_overlap = 20  # %
        max_cloudcov = 20  # %
275
        plusminus_days = 30
276
277
278
279
280
281
282
283
        AcqDate = self.im2shift_objDict['acquisition_date']
        date_minmax = [AcqDate - timedelta(days=plusminus_days), AcqDate + timedelta(days=plusminus_days)]
        dataset_cond = 'datasetid=%s' % CFG.usecase.datasetid_spatial_ref
        cloudcov_cond = 'cloudcover < %s' % max_cloudcov
        # FIXME cloudcover noch nicht für alle scenes im proc_level METADATA verfügbar
        dayrange_cond = "(EXTRACT(MONTH FROM scenes.acquisitiondate), EXTRACT(DAY FROM scenes.acquisitiondate)) " \
                        "BETWEEN (%s, %s) AND (%s, %s)" \
                        % (date_minmax[0].month, date_minmax[0].day, date_minmax[1].month, date_minmax[1].day)
284
285
        # TODO weitere Kriterien einbauen!

286
287
288
289
290
291
292
293
        def query_scenes(condlist):
            return DB_T.get_overlapping_scenes_from_postgreSQLdb(
                CFG.job.conn_database,
                table='scenes',
                tgt_corners_lonlat=self.trueDataCornerLonLat,
                conditions=condlist,
                add_cmds='ORDER BY scenes.cloudcover ASC',
                timeout=30000)
294
295
        conds_descImportance = [dataset_cond, cloudcov_cond, dayrange_cond]

296
        self.logger.info('Querying database in order to find a suitable reference scene for co-registration.')
297

298
        count, filt_overlap_scenes = 0, []
299
        while not filt_overlap_scenes:
300
301
302
303
            if count == 0:
                # search within already processed scenes
                # das ist nur Ergebnis aus scenes_proc
                # -> dort liegt nur eine referenz, wenn die szene schon bei CFG.job-Beginn in Datensatzliste drin war
304
                res = DB_T.get_overlapping_scenes_from_postgreSQLdb(
305
                    CFG.job.conn_database,
306
307
308
309
                    tgt_corners_lonlat=self.trueDataCornerLonLat,
                    conditions=['datasetid=%s' % CFG.usecase.datasetid_spatial_ref],
                    add_cmds='ORDER BY scenes.cloudcover ASC',
                    timeout=25000)
310
                filt_overlap_scenes = self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], 20.)
311

312
            else:
313
314
315
                # search within complete scenes table using less filter criteria with each run
                # TODO: Daniels Index einbauen, sonst  bei wachsender Tabellengröße evtl. irgendwann zu langsam
                res = query_scenes(conds_descImportance)
316
                filt_overlap_scenes = self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], min_overlap)
317

318
                if len(conds_descImportance) > 1:  # FIXME anderer Referenzsensor?
319
320
321
322
                    del conds_descImportance[-1]
                else:  # reduce min_overlap to 10 percent if there are overlapping scenes
                    if res:
                        min_overlap = 10
323
324
                        filt_overlap_scenes = \
                            self._sceneIDList_to_filt_overlap_scenes([i[0] for i in res[:50]], min_overlap)
325
326

                    # raise warnings if no match found
327
                    if not filt_overlap_scenes:
328
329
                        if not res:
                            warnings.warn('No reference scene found for %s (scene ID %s). Coregistration of this scene '
330
                                          'failed.' % (self.baseN, self.scene_ID))
331
332
333
                        else:
                            warnings.warn('Reference scenes for %s (scene ID %s) have been found but none has more '
                                          'than %s percent overlap. Coregistration of this scene failed.'
334
                                          % (self.baseN, self.scene_ID, min_overlap))
335
                        break
336
            count += 1
337
338
339
340

        if filt_overlap_scenes:
            ref_available = False
            for count, sc in enumerate(filt_overlap_scenes):
341
                if count == 2:  # FIXME Abbuch schon bei 3. Szene?
342
                    warnings.warn('No reference scene for %s (scene ID %s) available. '
343
                                  'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
344
345
346
                    break

                # start download of scene data not available and start L1A processing
347
                def dl_cmd(scene_ID): print('%s %s %s' % (
348
349
                    CFG.job.java_commands['keyword'].strip(),  # FIXME CFG.job.java_commands is deprecated
                    CFG.job.java_commands["value_download"].strip(), scene_ID))
350

351
                path = PG.path_generator(scene_ID=sc['scene_ID']).get_path_imagedata()
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
352

353
354
355
356
357
358
359
360
                if not os.path.exists(path):
                    # add scene 2 download to scenes_jobs.missing_scenes

                    # print JAVA download command
                    t_dl_start = time.time()
                    dl_cmd(sc['scene_ID'])

                    # check if scene is downloading
361
362
363
364
                    download_start_timeout = 5  # seconds
                    # set timout for external processing
                    # -> DEPRECATED BECAUSE CREATION OF EXTERNAL CFG.job WITHIN CFG.job IS NOT ALLOWED
                    processing_timeout = 5  # seconds # FIXME increase timeout if processing is really started
365
366
367
                    proc_level = None
                    while True:
                        proc_level_chk = DB_T.get_info_from_postgreSQLdb(
368
                            CFG.job.conn_database, 'scenes', ['proc_level'], {'id': sc['scene_ID']})[0][0]
369
                        if proc_level != proc_level_chk:
370
                            print('Reference scene %s, current processing level: %s' % (sc['scene_ID'], proc_level_chk))
371
                        proc_level = proc_level_chk
372
373
                        if proc_level_chk in ['SCHEDULED', 'METADATA'] and \
                           time.time() - t_dl_start >= download_start_timeout:
374
                            warnings.warn('Download of reference scene %s (entity ID %s) timed out. '
375
                                          'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
376
377
                            break
                        if proc_level_chk == 'L1A':
378
379
380
381
                            ref_available = True
                            break
                        elif proc_level_chk == 'DOWNLOADED' and time.time() - t_dl_start >= processing_timeout:
                            # proc_level='DOWNLOADED' but scene has not been processed
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
382
383
384
                            warnings.warn('L1A processing of reference scene %s (entity ID %s) timed out. '
                                          'Coregistration of this scene failed.' % (self.baseN, self.scene_ID))
                            break
385
386
                            # DB_T.set_info_in_postgreSQLdb(CFG.job.conn_database,'scenes',
                            #                             {'proc_level':'METADATA'},{'id':sc['scene_ID']})
Daniel Scheffler's avatar
GEOP:    
Daniel Scheffler committed
387

388
389
390
391
392
393
394
                        time.sleep(5)
                else:
                    ref_available = True

                if not ref_available:
                    continue
                else:
395
396
                    self.path_imref = path
                    self.imref_scene_ID = sc['scene_ID']
397
                    self.imref_footprint_poly = sc['scene poly']
398
399
400
401
402
403
404
405
                    self.overlap_poly = sc['overlap poly']
                    self.overlap_percentage = sc['overlap percentage']
                    self.overlap_area = sc['overlap area']

                    query_res = DB_T.get_info_from_postgreSQLdb(CFG.job.conn_database, 'scenes', ['entityid'],
                                                                {'id': self.imref_scene_ID}, records2fetch=1)
                    assert query_res != [], 'No entity-ID found for scene number %s' % self.imref_scene_ID
                    self.imref_entity_ID = query_res[0][0]  # [('LC81510322013152LGN00',)]
406
                    break
407
        self.logger.close()
408

409
    def _sceneIDList_to_filt_overlap_scenes(self, sceneIDList, min_overlap):
410
411
412
        """find reference scenes that cover at least 20% of the scene with the given ID
        ONLY FIRST 50 scenes are considered"""

413
414
415
        # t0 = time.time()
        dict_sceneID_poly = [{'scene_ID': ID, 'scene poly': HLP_F.scene_ID_to_shapelyPolygon(ID)}
                             for ID in sceneIDList]  # always returns LonLot polygons
416
417

        # get overlap polygons and their parameters. result: [{overlap poly, overlap percentage, overlap area}]
418
419
        for dic in dict_sceneID_poly:  # input: dicts {scene_ID, ref_poly}
            dict_overlap_poly_params = get_overlap_polygon(dic['scene poly'], self.arr.footprint_poly)
420
            dic.update(dict_overlap_poly_params)  # adds {overlap poly, overlap percentage, overlap area}
421
        # print('polygon creation time', time.time()-t0)
422
423
424
425
426
427
428
429

        # filter those scene_IDs out where overlap percentage is below 20%
        if min_overlap:
            filt_overlap_scenes = [scene for scene in dict_sceneID_poly if scene['overlap percentage'] >= min_overlap]
        else:
            filt_overlap_scenes = dict_sceneID_poly

        return filt_overlap_scenes
430

431
432
433
434
435
436
437
    def get_opt_bands4matching(self, target_cwlPos_nm=550, v=False):
        """Automatically determines the optimal bands used für fourier shift theorem matching

        :param target_cwlPos_nm:   the desired wavelength used for matching
        :param v:                  verbose mode
        """
        # get spectral characteristics
438
439
        shift_cwl = [float(i) for i in self.meta_odict['wavelength']]
        shift_fwhm = [float(i) for i in self.meta_odict['bandwidths']]
440
441
        with open(PG.path_generator(scene_ID=self.spatRef_scene.scene_ID).get_path_gmsfile()) as inF:
            ref_gmsDict = json.load(inF)
442
443
        ref_cwl = [float(i) for i in ref_gmsDict['meta']['wavelength']]
        ref_fwhm = [float(i) for i in ref_gmsDict['meta']['bandwidths']]
444
445

        # exclude cirrus/oxygen band of Landsat-8/Sentinel-2
446
447
448
        shift_bbl, ref_bbl = [False] * len(shift_cwl), [False] * len(ref_cwl)  # bad band lists
        for dic, s_r in zip([self.__dict__, ref_gmsDict], ['shift', 'ref']):
            dic['GMS_identifier']['logger'] = None  # set a dummy value in order to avoid Exception
449
            sensorcode = get_GMS_sensorcode(dic['GMS_identifier'])
450
451
452
453
            if sensorcode in ['LDCM', 'S2A', 'S2B'] and '9' in dic['LayerBandsAssignment']:
                locals()['%s_bbl' % s_r][dic['LayerBandsAssignment'].index('9')] = True
            if sensorcode in ['S2A', 'S2B'] and '10' in dic['LayerBandsAssignment']:
                locals()['%s_bbl' % s_r][dic['LayerBandsAssignment'].index('10')] = True
454

455
        # cwl_overlap = (max(min(shift_cwl),min(ref_cwl)),  min(max(shift_cwl),max(ref_cwl))) # -> (min wvl, max wvl)
456
        # find matching band of reference image for each band of image to be shifted
457
458
459
460
        match_dic = collections.OrderedDict()
        for idx, cwl, fwhm in zip(range(len(shift_cwl)), shift_cwl, shift_fwhm):
            if shift_bbl[idx]:
                continue  # skip cwl if it is declared as bad band above
461
462
463

            def is_inside(r_cwl, s_cwl, s_fwhm): return s_cwl - s_fwhm / 2 < r_cwl < s_cwl + s_fwhm / 2

464
465
            matching_r_cwls = [r_cwl for i, r_cwl in enumerate(ref_cwl) if
                               is_inside(r_cwl, cwl, fwhm) and not ref_bbl[i]]
466
467
            if matching_r_cwls:
                match_dic[cwl] = matching_r_cwls[0] if len(matching_r_cwls) else \
468
                    matching_r_cwls[np.abs(np.array(matching_r_cwls) - cwl).argmin()]
469
470
471
472
473

        # set bands4 match based on the above results
        poss_cwls = [cwl for cwl in shift_cwl if cwl in match_dic]
        if poss_cwls:
            if not target_cwlPos_nm:
474
475
476
477
478
479
480
                shift_band4match = shift_cwl.index(poss_cwls[0]) + 1  # first possible shift band
                ref_band4match = ref_cwl.index(match_dic[poss_cwls[0]]) + 1  # matching reference band
            else:  # target_cwlPos is given
                closestWvl_to_target = poss_cwls[np.abs(np.array(poss_cwls) - target_cwlPos_nm).argmin()]
                shift_band4match = shift_cwl.index(closestWvl_to_target) + 1  # the shift band closest to target
                ref_band4match = ref_cwl.index(match_dic[closestWvl_to_target]) + 1  # matching ref
        else:  # all reference bands are outside of shift-cwl +- fwhm/2
481
482
            warnings.warn('Optimal bands for matching could not be automatically determined. Choosing first band of'
                          'each image.')
483
484
            shift_band4match = 1
            ref_band4match = 1
485

486
487
488
        if v:
            print('Shift band for matching:     %s (%snm)' % (shift_band4match, shift_cwl[shift_band4match - 1]))
            print('Reference band for matching: %s (%snm)' % (ref_band4match, ref_cwl[ref_band4match - 1]))
489
490
491
492
493

        return ref_band4match, shift_band4match

    def coregister_spatially(self):
        if self.coreg_needed and self.spatRef_available:
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
            geoArr_ref = GeoArray(self.spatRef_scene.filePath)
            geoArr_shift = GeoArray(self.arr)
            r_b4match, s_b4match = self.get_opt_bands4matching(target_cwlPos_nm=550, v=False)
            coreg_kwargs = dict(
                r_b4match=r_b4match,
                s_b4match=s_b4match,
                align_grids=True,  # FIXME not needed here
                match_gsd=True,  # FIXME not needed here
                data_corners_ref=[[x, y] for x, y in self.spatRef_scene.polyUTM.convex_hull.exterior.coords],
                data_corners_tgt=[transform_any_prj(EPSG2WKT(4326), self.meta_odict['coordinate system string'], x, y)
                                  for x, y in HLP_F.reorder_CornerLonLat(self.trueDataCornerLonLat)],
                nodata=(get_outFillZeroSaturated(geoArr_ref.dtype)[0],
                        get_outFillZeroSaturated(geoArr_shift.dtype)[0]),
                ignore_errors=True,
                v=False,
                q=True
            )
511
512
513
514

            COREG_obj = COREG(geoArr_ref, geoArr_shift, **coreg_kwargs)
            COREG_obj.calculate_spatial_shifts()

515
516
517
518
            self.coreg_info.update(
                COREG_obj.coreg_info)  # no clipping to trueCornerLonLat until here -> only shift correction
            self.coreg_info.update({'reference scene ID': self.spatRef_scene.scene_ID})
            self.coreg_info.update({'reference entity ID': self.spatRef_scene.entity_ID})
519
520
521

            if COREG_obj.success:
                self.logger.info("Calculated map shifts (X,Y): %s / %s"
522
523
                                 % (COREG_obj.x_shift_map,
                                    COREG_obj.y_shift_map))  # FIXME direkt in calculate_spatial_shifts loggen
524

525
526
527
            else:
                # TODO add database entry with error hint
                [self.logger.error('ERROR during coregistration of scene %s (entity ID %s):\n%s'
528
                                   % (self.scene_ID, self.entity_ID, err)) for err in COREG_obj.tracked_errors]
529
        else:
530
            if self.coreg_needed:
531
532
                self.logger.warning('Coregistration skipped because no suitable reference scene is available or '
                                    'spatial query failed.')
533
534
            else:
                self.logger.info('Coregistration of scene %s (entity ID %s) skipped because target dataset ID equals '
535
536
537
538
539
540
541
542
                                 'reference dataset ID.' % (self.scene_ID, self.entity_ID))

            self.coreg_info.update({'corrected_shifts_px': {'x': 0, 'y': 0}})
            self.coreg_info.update({'corrected_shifts_map': {'x': 0, 'y': 0}})
            self.coreg_info.update({'original map info': self.meta_odict['map info']})
            self.coreg_info.update({'updated map info': None})
            self.coreg_info.update({'reference scene ID': None})
            self.coreg_info.update({'reference entity ID': None})
543
            self.coreg_info.update({'reference geotransform': None})
544
545
546
547
548
549
550
            # reference projection must be the own projection in order to avoid overwriting with a wrong EPSG
            self.coreg_info.update({'reference projection': self.meta_odict['coordinate system string']})
            self.coreg_info.update({'reference extent': {'rows': None, 'cols': None}})
            self.coreg_info.update({'reference grid': [list(CFG.usecase.spatial_ref_gridx),
                                                       list(CFG.usecase.spatial_ref_gridy)]})
            self.coreg_info.update(
                {'success': True if not self.coreg_needed else False})  # False means spatRef not available
551

552
553
554
555
556
557
558
559
560
561
562
563
    def correct_spatial_shifts(self, cliptoextent=True, clipextent=None, clipextent_prj=None, v=False):
        # type: (bool, list, any, bool) -> None
        """Corrects the spatial shifts calculated by self.coregister_spatially().

        :param cliptoextent:    whether to clip the output to the given extent
        :param clipextent:      list of XY-coordinate tuples giving the target extent (if not given and cliptoextent is
                                True, the 'trueDataCornerLonLat' attribute of the GMS object is used
        :param clipextent_prj:  WKT projection string or EPSG code of the projection for the coordinates in clipextent
        :param v:
        :return:
        """

564
565
        # cliptoextent is automatically True if an extent is given
        cliptoextent = cliptoextent if not clipextent else True
566

567
568
        if cliptoextent or self.resamp_needed or (self.coreg_needed and self.coreg_info['success']):

569
            # get target bounds # TODO implement boxObj call instead here
570
            if not clipextent:
571
572
                trueDataCornerUTM = [transform_any_prj(EPSG2WKT(4326), self.MetaObj.projection, x, y)
                                     for x, y in self.trueDataCornerLonLat]
573
                xmin, xmax, ymin, ymax = corner_coord_to_minmax(trueDataCornerUTM)
574
                mapBounds = box(xmin, ymin, xmax, ymax).bounds
575
576
577
578
579
580
581
582
            else:
                assert clipextent_prj, \
                    "'clipextent_prj' must be given together with 'clipextent'. Received only 'clipextent'."
                clipextent_UTM = clipextent if prj_equal(self.MetaObj.projection, clipextent_prj) else \
                    [transform_any_prj(clipextent_prj, self.MetaObj.projection, x, y) for x, y in clipextent]
                xmin, xmax, ymin, ymax = corner_coord_to_minmax(clipextent_UTM)
                mapBounds = box(xmin, ymin, xmax, ymax).bounds

583
            # correct shifts and clip to extent
584
585
            # ensure self.masks exists (does not exist in Flink mode because
            # in that case self.fill_from_disk() is skipped)
586
587
588
            if not hasattr(self, 'masks') or self.masks is None:
                self.build_combined_masks_array()  # creates self.masks and self.masks_meta

589
590
591
            # exclude self.mask_nodata, self.mask_clouds from warping
            del self.mask_nodata, self.mask_clouds

592
593
594
            attributes2deshift = [attrname for attrname in
                                  ['arr', 'masks', 'dem', 'ac_errors', 'mask_clouds_confidence']
                                  if getattr(self, '_%s' % attrname) is not None]
595
            for attrname in attributes2deshift:
596
                geoArr = getattr(self, attrname)
597
598

                # do some logging
599
600
                if self.coreg_needed:
                    if self.coreg_info['success']:
601
602
603
                        self.logger.info("Correcting spatial shifts for attribute '%s'..." % attrname)
                    elif cliptoextent and is_coord_grid_equal(
                         geoArr.gt, CFG.usecase.spatial_ref_gridx, CFG.usecase.spatial_ref_gridy):
604
                        self.logger.info("Attribute '%s' has only been clipped to it's extent because no valid "
605
606
                                         "shifts have been detected and the pixel grid equals the target grid."
                                         % attrname)
607
608
                    elif self.resamp_needed:
                        self.logger.info("Resampling attribute '%s' to target grid..." % attrname)
609
610
611
612
                elif self.resamp_needed:
                    self.logger.info("Resampling attribute '%s' to target grid..." % attrname)

                # correct shifts
613
                DS = DESHIFTER(geoArr, self.coreg_info,
614
615
616
617
618
619
620
621
622
                               target_xyGrid=[CFG.usecase.spatial_ref_gridx, CFG.usecase.spatial_ref_gridy],
                               cliptoextent=cliptoextent,
                               clipextent=mapBounds,
                               align_grids=True,
                               resamp_alg='nearest' if attrname == 'masks' else 'cubic',
                               CPUs=None if CFG.job.allow_subMultiprocessing else 1,
                               progress=True if v else False,
                               q=True,
                               v=v)
623
624
625
                DS.correct_shifts()

                # update coreg_info
626
627
                if attrname == 'arr':
                    self.coreg_info['is shifted'] = DS.is_shifted
628
                    self.coreg_info['is resampled'] = DS.is_resampled
629

630
                # update geoinformations and array shape related attributes
631
632
633
                self.logger.info("Updating geoinformations of '%s' attribute..." % attrname)
                if attrname == 'arr':
                    self.meta_odict['map info'] = DS.updated_map_info
634
                    self.meta_odict['coordinate system string'] = EPSG2WKT(WKT2EPSG(DS.updated_projection))
635
                    self.shape_fullArr = DS.arr_shifted.shape
636
637
                    self.meta_odict['lines'], self.meta_odict['samples'] = DS.arr_shifted.shape[:2]
                else:
638
639
                    self.masks_meta['map info'] = DS.updated_map_info
                    self.masks_meta['coordinate system string'] = EPSG2WKT(WKT2EPSG(DS.updated_projection))
640
641
                    self.masks_meta['lines'], self.masks_meta['samples'] = DS.arr_shifted.shape[:2]

642
643
                    # NOTE: mask_nodata and mask_clouds are updated later by L2A_map mapper function (module pipeline)

644
                # update the GeoArray instance without loosing its inherent metadata (nodata, ...)
645
646
647
                geoArr.arr, geoArr.gt, geoArr.prj = \
                    DS.GeoArray_shifted.arr, DS.GeoArray_shifted.gt, DS.GeoArray_shifted.prj
                # setattr(self,attrname, DS.GeoArray_shifted) # NOTE: don't set array earlier because setter will also
648
649
                #                                            # update arr.gt/.prj/.nodata from meta_odict

650
            self.resamp_needed = False
651
            self.coreg_needed = False
652

653
654
            # recreate self.masks_nodata and self.mask_clouds from self.masks
            self.mask_nodata = self.mask_nodata
655
656
            self.mask_clouds = self.mask_clouds
            # FIXME move functionality of self.masks only to output writer and remove self.masks completely