gms_object.py 93.5 KB
Newer Older
1
2
3
4
5
# -*- coding: utf-8 -*-

import collections
import copy
import datetime
6
import functools
7
8
9
10
11
12
13
import glob
import json
import os
import re
import shutil
import sys
import warnings
14
import logging
15
from collections import OrderedDict
16
from itertools import chain
17
from typing import Iterable, List, Union, TYPE_CHECKING  # noqa F401  # flake8 issue
18
19
20

import numpy as np
import spectral
21
from spectral.io import envi
22
from numba import jit
23
from pandas import DataFrame, read_csv
24
from nested_dict import nested_dict
25

26
27
28
29
try:
    from osgeo import gdalnumeric
except ImportError:
    import gdalnumeric
30

31
from geoarray import GeoArray
32
from py_tools_ds.geo.coord_grid import is_coord_grid_equal
33
from py_tools_ds.geo.projection import EPSG2WKT
34
35
36
from py_tools_ds.geo.map_info import geotransform2mapinfo, mapinfo2geotransform
from py_tools_ds.geo.coord_calc import calc_FullDataset_corner_positions
from py_tools_ds.geo.coord_trafo import pixelToLatLon, pixelToMapYX
37
from sicor.options import get_options as get_ac_options
38

39
from ..misc.logging import GMS_logger as DatasetLogger
40
from ..model.mgrs_tile import MGRS_tile
41
from ..model.metadata import METADATA, get_dict_LayerOptTherm, metaDict_to_metaODict, layerdependent_metadata
42
43
44
from ..model.dataset import Dataset
from ..misc import path_generator as PG
from ..misc import database_tools as DB_T
45
from ..options.config import GMS_config as CFG
46
47
48
from ..algorithms import geoprocessing as GEOP
from ..io import input_reader as INP_R
from ..io import output_writer as OUT_W
49
50
from ..misc import helper_functions as HLP_F
from ..misc import definition_dicts as DEF_D
51
from ..misc.locks import IOLock
52

53
54
55
if TYPE_CHECKING:
    from ..algorithms.L1C_P import L1C_object  # noqa F401  # flake8 issue

56
__author__ = 'Daniel Scheffler'
57
58


59
class GMS_object(Dataset):
60
61
62
63
    # class attributes
    # NOTE: these attributes can be modified and seen by ALL GMS_object instances
    proc_status_all_GMSobjs = nested_dict()

64
65
66
67
    def __init__(self, pathImage=''):
        # get all attributes of base class "Dataset"
        super(GMS_object, self).__init__()

68
        # add private attributes
69
        self._dict_LayerOptTherm = None
70
71
        self._cloud_masking_algorithm = None
        self._meta_odict = None
72
        self._coreg_info = None
73

74
        self.job_ID = CFG.ID
75
76
77
        self.dataset_ID = -9999 if self.scene_ID == -9999 else \
            int(DB_T.get_info_from_postgreSQLdb(CFG.conn_database, 'scenes', ['datasetid'],
                                                {'id': self.scene_ID})[0][0])
78
79
        self.scenes_proc_ID = None  # set by Output writer after creation/update of db record in table scenes_proc
        self.mgrs_tiles_proc_ID = None  # set by Output writer after creation/update of db rec in table mgrs_tiles_proc
80
        self.MGRS_info = None
Daniel Scheffler's avatar
Daniel Scheffler committed
81
82
        self.lonlat_arr = None  # set by self.write_tiles_to_ENVIfile
        self.trueDataCornerUTM = None  # set by self.from_tiles
83
84

        # set pathes
85
86
87
88
        self.path_cloud_class_obj = ''

        # handle initialization arguments
        if pathImage:
89
90
            # run the setter for 'arr' of the base class 'Dataset' which creates an Instance of GeoArray
            self.arr = pathImage
91
92
93
94

    def __getstate__(self):
        """Defines how the attributes of GMS object are pickled."""

Daniel Scheffler's avatar
Bugfix    
Daniel Scheffler committed
95
        self.close_loggers()
96
        del self.pathGen  # path generator can only be used for the current processing level
97
98

        # delete arrays if their in-mem size is to big to be pickled
99
        # => (avoids MaybeEncodingError: Error sending result: '[<gms_preprocessing.algorithms.L2C_P.L2C_object
100
        #    object at 0x7fc44f6399e8>]'. Reason: 'error("'i' format requires -2147483648 <= number <= 2147483647",)')
101
        if self.proc_level == 'L2C' and CFG.inmem_serialization:
102
            self.flush_array_data()
103

Daniel Scheffler's avatar
Bugfix    
Daniel Scheffler committed
104
105
        return self.__dict__

106
    def set_pathes(self):
107
108
109
110
111
112
        self.baseN = self.pathGen.get_baseN()
        self.path_procdata = self.pathGen.get_path_procdata()
        self.ExtractedFolder = self.pathGen.get_path_tempdir()
        self.path_logfile = self.pathGen.get_path_logfile()
        self.pathGen = PG.path_generator(self.__dict__)  # passes a logger in addition to previous attributes
        self.path_archive = self.pathGen.get_local_archive_path_baseN()
113

114
        if not CFG.inmem_serialization:
Daniel Scheffler's avatar
Daniel Scheffler committed
115
            self.path_InFilePreprocessor = os.path.join(self.ExtractedFolder, '%s%s_DN.bsq'
116
117
                                                        % (self.entity_ID,
                                                           ('_%s' % self.subsystem if self.subsystem else '')))
118
        else:  # keep data in memory
119
            self.path_InFilePreprocessor = None  # None: keeps all produced data in memory (numpy array attributes)
120
121
122
123
124
125

        self.path_MetaPreprocessor = self.path_archive

    def validate_pathes(self):
        if not os.path.isfile(self.path_archive) and not os.path.isdir(self.path_archive):
            self.logger.info("The %s dataset '%s' has not been processed earlier and no corresponding raw data archive"
126
                             "has been found at %s." % (self.sensor, self.entity_ID, self.path_archive))
127
            self.logger.info('Trying to download the dataset...')
128
            self.path_archive_valid = self._data_downloader(self.sensor, self.entity_ID)
129
130
131
        else:
            self.path_archive_valid = True

132
        if not CFG.inmem_serialization and self.ExtractedFolder and not os.path.isdir(self.ExtractedFolder):
133
134
135
136
137
138
139
            os.makedirs(self.ExtractedFolder)

        assert os.path.exists(self.path_archive), 'Invalid path to RAW data. File %s does not exist at %s.' \
                                                  % (os.path.basename(self.path_archive),
                                                     os.path.dirname(self.path_archive))
        assert isinstance(self.path_archive, str), 'Invalid path to RAW data. Got %s instead of string or unicode.' \
                                                   % type(self.path_archive)
140
        if not CFG.inmem_serialization and self.ExtractedFolder:
141
142
            assert os.path.exists(self.path_archive), \
                'Invalid path for temporary files. Directory %s does not exist.' % self.ExtractedFolder
143
144
145
146
147
148
149
150

    @property
    def logger(self):
        if self._loggers_disabled:
            return None
        if self._logger and self._logger.handlers[:]:
            return self._logger
        else:
151
            self._logger = DatasetLogger('log__' + self.baseN, fmt_suffix=self.scene_ID, path_logfile=self.path_logfile,
152
                                         log_level=CFG.log_level, append=True)
153
154
155
156
            return self._logger

    @logger.setter
    def logger(self, logger):
157
        assert isinstance(logger, logging.Logger) or logger in ['not set', None], \
158
            "GMS_obj.logger can not be set to %s." % logger
159
160

        # save prior logs
161
        # if logger is None and self._logger is not None:
162
        #    self.log += self.logger.captured_stream
163
164
        self._logger = logger

165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
    @property
    def proc_status(self):
        # type: () -> str
        """
        Get the processing status of the current GMS_object (subclass) instance for the current processing level.

        Possible values: 'initialized', 'running', 'finished', 'failed'
        """
        # NOTE: self.proc_status_all_GMSobjs is a class attribute (visible and modifyable from any other subsystem)
        return self.proc_status_all_GMSobjs[self.scene_ID][self.subsystem][self.proc_level]

    @proc_status.setter
    def proc_status(self, new_status):
        # type: (str) -> None
        self.proc_status_all_GMSobjs[self.scene_ID][self.subsystem][self.proc_level] = new_status

181
182
    @property
    def GMS_identifier(self):
183
184
        return collections.OrderedDict(zip(
            ['image_type', 'Satellite', 'Sensor', 'Subsystem', 'proc_level', 'dataset_ID', 'logger'],
185
186
            [self.image_type, self.satellite, self.sensor, self.subsystem, self.proc_level, self.dataset_ID,
             self.logger]))
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202

    @property
    def MetaObj(self):
        if self._meta_odict:
            # if there is already a meta_odict -> create a new MetaObj from it (ensures synchronization!)
            self._MetaObj = METADATA(self.GMS_identifier).from_odict(self._meta_odict)
            del self.meta_odict
        elif not self._MetaObj:
            # if there is no meta_odict and no MetaObj -> create MetaObj by reading metadata from disk
            pass  # reading from disk should use L1A_P.L1A_object.import_metadata -> so just return None

        return self._MetaObj

    @MetaObj.setter
    def MetaObj(self, MetaObj):
        assert isinstance(MetaObj, METADATA), "'MetaObj' can only be set to an instance of METADATA class. " \
203
                                              "Got %s." % type(MetaObj)
204
205
206
        self._MetaObj = MetaObj

        # update meta_odict
207
        del self.meta_odict  # it is recreated if getter is used the next time
208
209
210

    @MetaObj.deleter
    def MetaObj(self):
211
212
213
214
215
        if hasattr(self, '_MetaObj') and self._MetaObj and hasattr(self._MetaObj, 'logger') and \
                self._MetaObj.logger not in [None, 'not set']:
            self._MetaObj.logger.close()
            self._MetaObj.logger = None

216
217
218
219
220
        self._MetaObj = None

    @property
    def meta_odict(self):
        if self._MetaObj:
221
            # if there is already a MetaObj -> create new meta_odict from it (ensures synchronization!)
222
223
224
225
            self._meta_odict = self._MetaObj.to_odict()
            del self.MetaObj
        elif not self._meta_odict:
            # if there is no MetaObj and no meta_odict -> use MetaObj getter to read metadata from disk
226
            pass  # reading from disk should use L1A_P.L1A_object.import_metadata -> so just return None
227
228
229
230
231
232
            self._meta_odict = None

        return self._meta_odict

    @meta_odict.setter
    def meta_odict(self, odict):
233
234
        assert isinstance(odict, (collections.OrderedDict, dict)), "'meta_odict' can only be set to an instance of " \
                                                                   "collections.OrderedDict. Got %s." % type(odict)
235
236
237
        self._meta_odict = odict

        # update MetaObj
238
        del self.MetaObj  # it is recreated if getter is used the next time
239
240
241
242
243

    @meta_odict.deleter
    def meta_odict(self):
        self._meta_odict = None

244
245
246
247
248
    @property
    def dict_LayerOptTherm(self):
        if self._dict_LayerOptTherm:
            return self._dict_LayerOptTherm
        elif self.LayerBandsAssignment:
249
            self._dict_LayerOptTherm = get_dict_LayerOptTherm(self.identifier, self.LayerBandsAssignment)
250
251
252
253
254
255
256
            return self._dict_LayerOptTherm
        else:
            return None

    @property
    def georef(self):
        """Returns True if the current dataset can serve as spatial reference."""
Daniel Scheffler's avatar
Daniel Scheffler committed
257

258
259
260
261
        return True if self.image_type == 'RSD' and re.search('OLI', self.sensor, re.I) else False

    @property
    def coreg_needed(self):
262
        if self._coreg_needed is None:
263
            self._coreg_needed = not (self.dataset_ID == CFG.datasetid_spatial_ref)
264
        return self._coreg_needed
265
266
267
268
269

    @coreg_needed.setter
    def coreg_needed(self, value):
        self._coreg_needed = value

270
271
272
273
274
275
276
277
    @property
    def coreg_info(self):
        if not self._coreg_info:
            self._coreg_info = {
                'corrected_shifts_px': {'x': 0, 'y': 0},
                'corrected_shifts_map': {'x': 0, 'y': 0},
                'original map info': self.meta_odict['map info'],
                'updated map info': None,
278
                'shift_reliability': None,
279
280
281
282
283
284
                'reference scene ID': None,
                'reference entity ID': None,
                'reference geotransform': None,
                # reference projection must be the own projection in order to avoid overwriting with a wrong EPSG
                'reference projection': self.meta_odict['coordinate system string'],
                'reference extent': {'rows': None, 'cols': None},
285
286
                'reference grid': [list(CFG.spatial_ref_gridx),
                                   list(CFG.spatial_ref_gridy)],
287
288
289
290
291
292
293
294
295
                'success': False
            }

        return self._coreg_info

    @coreg_info.setter
    def coreg_info(self, val):
        self._coreg_info = val

296
297
298
299
    @property
    def resamp_needed(self):
        if self._resamp_needed is None:
            gt = mapinfo2geotransform(self.meta_odict['map info'])
300
301
            self._resamp_needed = not is_coord_grid_equal(gt, CFG.spatial_ref_gridx,
                                                          CFG.spatial_ref_gridy)
302
303
304
305
306
307
308
309
        return self._resamp_needed

    @resamp_needed.setter
    def resamp_needed(self, value):
        self._resamp_needed = value

    @property
    def masks(self):
310
        # if self.mask_nodata is not None and self.mask_clouds is not None and \
311
312
313
314
        #     self._masks is not None and self._masks.bands==1:

        #     self.build_combined_masks_array()

315
316
317
        return self._masks

    @masks.setter
318
    def masks(self, *geoArr_initArgs):
319
320
321
322
        """
        NOTE: This does not automatically update mask_nodata and mask_clouds BUT if mask_nodata and mask_clouds are
        None their getters will automatically synchronize!
        """
Daniel Scheffler's avatar
Daniel Scheffler committed
323

324
        if geoArr_initArgs[0] is not None:
325
            self._masks = GeoArray(*geoArr_initArgs)
326
            self._masks.nodata = 0
327
328
            self._masks.gt = self.arr.gt
            self._masks.prj = self.arr.prj
329
330
        else:
            del self.masks
331

332
333
334
335
    @masks.deleter
    def masks(self):
        self._masks = None

336
337
338
339
340
341
342
343
344
345
346
347
348
    @property
    def mask_clouds_confidence(self):
        return self._mask_clouds_confidence

    @mask_clouds_confidence.setter
    def mask_clouds_confidence(self, *geoArr_initArgs):
        if geoArr_initArgs[0] is not None:
            cnfArr = GeoArray(*geoArr_initArgs)

            assert cnfArr.shape == self.arr.shape[:2], \
                "The 'mask_clouds_confidence' GeoArray can only be instanced with an array of the same dimensions " \
                "like GMS_obj.arr. Got %s." % str(cnfArr.shape)

Daniel Scheffler's avatar
Daniel Scheffler committed
349
            # noinspection PyProtectedMember
350
351
352
353
            if cnfArr._nodata is None:
                cnfArr.nodata = DEF_D.get_outFillZeroSaturated(cnfArr.dtype)[0]
            cnfArr.gt = self.arr.gt
            cnfArr.prj = self.arr.prj
354
            cnfArr.bandnames = ['confidence']
355
356
357

            self._mask_clouds_confidence = cnfArr
        else:
358
            del self.mask_clouds_confidence
359
360
361
362
363
364
365
366
367
368
369
370

    @mask_clouds_confidence.deleter
    def mask_clouds_confidence(self):
        self._mask_clouds_confidence = None

    @property
    def ac_errors(self):
        """Returns an instance of GeoArray containing error information calculated by the atmospheric correction.

        :return:
        """

371
        return self._ac_errors
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386

    @ac_errors.setter
    def ac_errors(self, *geoArr_initArgs):
        if geoArr_initArgs[0] is not None:
            errArr = GeoArray(*geoArr_initArgs)

            if CFG.ac_bandwise_accuracy:
                assert errArr.shape == self.arr.shape, \
                    "The 'ac_errors' GeoArray can only be instanced with an array of the same dimensions like " \
                    "GMS_obj.arr. Got %s." % str(errArr.shape)
            else:
                assert errArr.shape[:2] == self.arr.shape[:2], \
                    "The 'ac_errors' GeoArray can only be instanced with an array of the same X/Y dimensions like " \
                    "GMS_obj.arr. Got %s." % str(errArr.shape)

Daniel Scheffler's avatar
Daniel Scheffler committed
387
            # noinspection PyProtectedMember
388
389
390
391
            if errArr._nodata is None:
                errArr.nodata = DEF_D.get_outFillZeroSaturated(errArr.dtype)[0]
            errArr.gt = self.arr.gt
            errArr.prj = self.arr.prj
392
            errArr.bandnames = self.LBA2bandnames(self.LayerBandsAssignment) if errArr.ndim == 3 else ['median']
393
394
395
396
397
398
399
400
401

            self._ac_errors = errArr
        else:
            del self.ac_errors

    @ac_errors.deleter
    def ac_errors(self):
        self._ac_errors = None

402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
    @property
    def spat_homo_errors(self):
        if not self._spat_homo_errors and self.coreg_info['shift_reliability'] is not None:
            errArr = GeoArray(np.full(self.arr.shape[:2], self.coreg_info['shift_reliability'], dtype=np.uint8),
                              geotransform=self.arr.geotransform,
                              projection=self.arr.projection,
                              bandnames=['shift_reliability'],
                              nodata=DEF_D.get_outFillZeroSaturated(np.uint8)[0])
            errArr[self.arr.mask_nodata.astype(np.int8) == 0] = errArr.nodata

            self._spat_homo_errors = errArr

        return self._spat_homo_errors

    @spat_homo_errors.deleter
    def spat_homo_errors(self):
        self._spat_homo_errors = None

420
421
422
423
424
425
426
    @property
    def spec_homo_errors(self):
        """Returns an instance of GeoArray containing error information calculated during spectral homogenization.

        :return:
        """

427
        return self._spec_homo_errors
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442

    @spec_homo_errors.setter
    def spec_homo_errors(self, *geoArr_initArgs):
        if geoArr_initArgs[0] is not None:
            errArr = GeoArray(*geoArr_initArgs)

            if CFG.spechomo_bandwise_accuracy:
                assert errArr.shape == self.arr.shape, \
                    "The 'spec_homo_errors' GeoArray can only be instanced with an array of the same dimensions like " \
                    "GMS_obj.arr. Got %s." % str(errArr.shape)
            else:
                assert errArr.shape[:2] == self.arr.shape[:2], \
                    "The 'spec_homo_errors' GeoArray can only be instanced with an array of the same X/Y dimensions " \
                    "like GMS_obj.arr. Got %s." % str(errArr.shape)

Daniel Scheffler's avatar
Daniel Scheffler committed
443
            # noinspection PyProtectedMember
444
445
446
447
            if errArr._nodata is None:
                errArr.nodata = DEF_D.get_outFillZeroSaturated(errArr.dtype)[0]
            errArr.gt = self.arr.gt
            errArr.prj = self.arr.prj
448
            errArr.bandnames = self.LBA2bandnames(self.LayerBandsAssignment) if errArr.ndim == 3 else ['median']
449
450
451
452
453
454
455
456
457
458
459

            self._spec_homo_errors = errArr
        else:
            del self.spec_homo_errors

    @spec_homo_errors.deleter
    def spec_homo_errors(self):
        self._spec_homo_errors = None

    @property
    def accuracy_layers(self):
460
461
462
463
        if not self._accuracy_layers:
            if not self.proc_level.startswith('L2'):
                self.logger.warning('Attempt to get %s accuracy layers failed - they are a Level 2 feature only.'
                                    % self.proc_level)
464

465
466
467
468
            self.logger.info('Generating combined accuracy layers array..')
            try:
                from ..algorithms.L2C_P import AccuracyCube
                self._accuracy_layers = AccuracyCube(self)
469
470
471
472

            except ValueError as e:
                if str(e) == 'The given GMS_object contains no accuracy layers for combination.':
                    if CFG.ac_estimate_accuracy or CFG.spechomo_estimate_accuracy:
473
474
                        self.logger.warning('The given GMS_object contains no accuracy layers although computation '
                                            'of accurracy layers was enabled in job configuration.')
475
476
477
478
479
                    else:
                        pass  # self._accuracy_layers keeps None
                else:
                    raise

480
481
            except Exception as e:
                raise RuntimeError('Failed to generate AccuracyCube!', e)
482

483
        return self._accuracy_layers
484
485
486
487
488
489
490
491
492

    @accuracy_layers.setter
    def accuracy_layers(self, geoArr_initArgs):
        if geoArr_initArgs[0] is not None:
            acc_lay = GeoArray(geoArr_initArgs)
            assert acc_lay.shape[:2] == self.arr.shape[:2],\
                "The 'accuracy_layers' GeoArray can only be instanced with an array of the same dimensions like " \
                "GMS_obj.arr. Got %s." % str(acc_lay.shape)

Daniel Scheffler's avatar
Daniel Scheffler committed
493
            # noinspection PyProtectedMember
494
495
496
497
498
499
500
501
502
503
504
505
            if acc_lay._nodata is None:
                acc_lay.nodata = DEF_D.get_outFillZeroSaturated(acc_lay.dtype)[0]
            acc_lay.gt = self.arr.gt
            acc_lay.prj = self.arr.prj

            if not acc_lay.bandnames:
                raise ValueError

            self._accuracy_layers = acc_lay
        else:
            del self._accuracy_layers

506
507
508
509
    @accuracy_layers.deleter
    def accuracy_layers(self):
        self._accuracy_layers = None

510
511
512
513
514
515
516
517
518
519
520
    @property
    def accuracy_layers_meta(self):
        if self._accuracy_layers is not None:
            return {'map info': geotransform2mapinfo(self._accuracy_layers.gt, self._accuracy_layers.projection),
                    'coordinate system string': self._accuracy_layers.projection,
                    'bands': self._accuracy_layers.bands,
                    'band names': list(self._accuracy_layers.bandnames),
                    'data ignore value': self._accuracy_layers.nodata}
        else:
            return None

521
522
523
    @property
    def cloud_masking_algorithm(self):
        if not self._cloud_masking_algorithm:
524
            self._cloud_masking_algorithm = CFG.cloud_masking_algorithm[self.satellite]
525
526
        return self._cloud_masking_algorithm

527
528
    @property
    def ac_options(self):
529
        # type: () -> dict
530
        """
531
532
        Returns the options dictionary needed as input for atmospheric correction. If an empty dictionary is returned,
        atmospheric correction is not yet available for the current sensor and will later be skipped.
533
        """
Daniel Scheffler's avatar
Daniel Scheffler committed
534

535
        if not self._ac_options:
536
            path_ac_options = CFG.path_custom_sicor_options or PG.get_path_ac_options(self.GMS_identifier)
537

538
            if path_ac_options and os.path.exists(path_ac_options):
539
540
                # don't validate because options contain pathes that do not exist on another server:
                opt_dict = get_ac_options(path_ac_options, validation=False)
541

Daniel Scheffler's avatar
Daniel Scheffler committed
542
                # update some file paths depending on the current environment
543
544
                opt_dict['DEM']['fn'] = CFG.path_dem_proc_srtm_90m
                opt_dict['ECMWF']['path_db'] = CFG.path_ECMWF_db
545
546
547
                opt_dict['S2Image'][
                    'S2_MSI_granule_path'] = None  # only a placeholder -> will always be None for GMS usage
                opt_dict['output'] = []  # outputs are not needed for GMS -> so
548
                opt_dict['report']['report_path'] = os.path.join(self.pathGen.get_path_procdata(), '[TYPE]')
549
                if 'uncertainties' in opt_dict:
550
551
552
553
                    if CFG.ac_estimate_accuracy:
                        opt_dict['uncertainties']['snr_model'] = PG.get_path_snr_model(self.GMS_identifier)
                    else:
                        del opt_dict['uncertainties']  # SICOR will not compute uncertainties if that key is missing
554

555
556
557
558
559
560
                # apply custom configuration
                opt_dict["logger"]['level'] = CFG.log_level
                opt_dict["ram"]['upper_limit'] = CFG.ac_max_ram_gb
                opt_dict["ram"]['unit'] = 'GB'
                opt_dict["AC"]['fill_nonclear_areas'] = CFG.ac_fillnonclear_areas
                opt_dict["AC"]['clear_area_labels'] = CFG.ac_clear_area_labels
561
                # opt_dict['AC']['n_cores'] = CFG.CPUs if CFG.allow_subMultiprocessing else 1
562

563
                self._ac_options = opt_dict
564
565
566
            else:
                self.logger.warning('There is no options file available for atmospheric correction. '
                                    'Atmospheric correction must be skipped.')
567

568
569
        return self._ac_options

570
    def get_copied_dict_and_props(self, remove_privates=False):
571
        # type: (bool) -> dict
572
        """Returns a copy of the current object dictionary including the current values of all object properties."""
573
574
575

        # loggers must be closed
        self.close_GMS_loggers()
576
577
        # this disables automatic recreation of loggers (otherwise loggers are created by using getattr()):
        self._loggers_disabled = True
578
579
580
581
582

        out_dict = self.__dict__.copy()

        # add properties
        property_names = [p for p in dir(self.__class__) if isinstance(getattr(self.__class__, p), property)]
583
        [out_dict.update({propK: copy.copy(getattr(self, propK))}) for propK in property_names]
584
585
586
587
588
589
590
591
592

        # remove private attributes
        if remove_privates:
            out_dict = {k: v for k, v in out_dict.items() if not k.startswith('_')}

        self._loggers_disabled = False  # enables automatic recreation of loggers

        return out_dict

593
594
    def attributes2dict(self, remove_privates=False):
        # type: (bool) -> dict
595
        """Returns a copy of the current object dictionary including the current values of all object properties."""
596
597
598

        # loggers must be closed
        self.close_GMS_loggers()
599
600
        # this disables automatic recreation of loggers (otherwise loggers are created by using getattr()):
        self._loggers_disabled = True
601
602
603
604

        out_dict = self.__dict__.copy()

        # add some selected property values
605
606
        for i in ['GMS_identifier', 'LayerBandsAssignment', 'coreg_needed', 'coreg_info', 'resamp_needed',
                  'dict_LayerOptTherm', 'georef', 'meta_odict']:
607
            out_dict[i] = getattr(self, i)
608
609
610
611
612

        # remove private attributes
        if remove_privates:
            out_dict = {k: v for k, v in out_dict.items() if not k.startswith('_')}

613
        self._loggers_disabled = False  # enables automatic recreation of loggers
614
615
        return out_dict

616
    def _data_downloader(self, sensor, entity_ID):
617
618
619
620
        self.logger.info('Data downloader started.')
        success = False
        " > download source code for Landsat here < "
        if not success:
621
622
            self.logger.critical(
                "Download for %s dataset '%s' failed. No further processing possible." % (sensor, entity_ID))
623
            raise RuntimeError('Archive download failed.')
624
625
        return success

626
627
    @classmethod
    def from_disk(cls, tuple_GMS_subset):
628
629
630
631
        """Fills an already instanced GMS object with data from disk. Excludes array attributes in Python mode.

        :param tuple_GMS_subset:    <tuple> e.g. ('/path/gms_file.gms', ['cube', None])
        """
632
        GMS_obj = cls()
633

634
        path_GMS_file = tuple_GMS_subset[0]
635
        GMSfileDict = INP_R.GMSfile2dict(path_GMS_file)
636
637

        # copy all attributes from GMS file (private attributes are not touched since they are not included in GMS file)
638
        GMS_obj.meta_odict = GMSfileDict['meta_odict']  # set that first in order to make some getters and setters work
639
640
        for key, value in GMSfileDict.items():
            if key in ['GMS_identifier', 'georef', 'dict_LayerOptTherm']:
641
                continue  # properties that should better be created on the fly
642
643
644
645
646
647
648
            elif key == 'acq_datetime':
                GMS_obj.acq_datetime = datetime.datetime.strptime(value, '%Y-%m-%d %H:%M:%S.%f%z')
            else:
                try:
                    setattr(GMS_obj, key, value)
                except Exception:
                    raise AttributeError("Can't set attribute %s." % key)
649

650
        GMS_obj.arr_shape, GMS_obj.arr_pos = tuple_GMS_subset[1]
651

652
        GMS_obj.arr = GMS_obj.pathGen.get_path_imagedata()
653
        # self.mask_nodata and self.mask_clouds are auto-synchronized via self.masks (see their getters):
654
        GMS_obj.masks = GMS_obj.pathGen.get_path_maskdata()
655

656
        return GMS_obj
657

658
659
    @classmethod
    def from_sensor_subsystems(cls, list_GMS_objs):
660
        # type: (List[GMS_object]) -> GMS_object
661
662
663
664
665
        """Merge separate GMS objects belonging to the same scene-ID into ONE GMS object.

        :param list_GMS_objs:   <list> of GMS objects covering the same geographic area but representing different
                                sensor subsystems (e.g. 3 GMS_objects for Sentinel-2 10m/20m/60m bands)
        """
666
        GMS_obj_merged = cls()
667

668
        # assertions
669
670
        assert len(list_GMS_objs) > 1, "'GMS_object.from_sensor_subsystems()' expects multiple input GMS objects. " \
                                       "Got %d." % len(list_GMS_objs)
671
        assert all([is_coord_grid_equal(list_GMS_objs[0].arr.gt, *obj.arr.xygrid_specs) for obj in list_GMS_objs[1:]]),\
672
673
674
            "The input GMS objects must have the same pixel grid. Received: %s" \
            % np.array([obj.arr.xygrid_specs for obj in list_GMS_objs])
        assert len(list(set([GMS_obj.proc_level for GMS_obj in list_GMS_objs]))) == 1, \
675
676
677
            "The input GMS objects for GMS_object.from_sensor_subsystems() must have the same processing level."
        subsystems = [GMS_obj.subsystem for GMS_obj in list_GMS_objs]
        assert len(subsystems) == len(list(set(subsystems))), \
678
            "The input 'list_GMS_objs' contains duplicates: %s" % subsystems
679

680
681
682
683
684
685
686
687
688
689
690
        ##################
        # merge logfiles #
        ##################

        # read all logs into DataFrame, sort it by the first column
        [GMS_obj.close_GMS_loggers() for GMS_obj in list_GMS_objs]  # close the loggers of the input objects
        paths_inLogs = [GMS_obj.pathGen.get_path_logfile() for GMS_obj in list_GMS_objs]
        allLogs_df = DataFrame()
        for log in paths_inLogs:
            df = read_csv(log, sep='\n', delimiter=':   ', header=None,
                          engine='python')  # engine suppresses a pandas warning
691
692
            # FIXME this will log e.g. atm. corr 3 times for S2A -> use captured streams instead?
            allLogs_df = allLogs_df.append(df)
693
694
695
696

        allLogs_df = allLogs_df.sort_values(0)

        # set common metadata, needed for logfile
697
698
699
        GMS_obj_merged.baseN = list_GMS_objs[0].pathGen.get_baseN(merged_subsystems=True)
        GMS_obj_merged.path_logfile = list_GMS_objs[0].pathGen.get_path_logfile(merged_subsystems=True)
        GMS_obj_merged.scene_ID = list_GMS_objs[0].scene_ID
700
701

        # write the merged logfile and flush previous logger
702
703
        np.savetxt(GMS_obj_merged.path_logfile, np.array(allLogs_df), delimiter=':   ', fmt="%s")
        GMS_obj_merged.close_GMS_loggers()
704

705
        # log
706
707
        GMS_obj_merged.logger.info('Merging the subsystems %s to a single GMS object...'
                                   % ', '.join([GMS_obj.subsystem for GMS_obj in list_GMS_objs]))
708

709
710
711
712
        ##################
        # MERGE METADATA #
        ##################

713
714
715
        # copy all attributes from the first input GMS file (private attributes are not touched)
        for key, value in list_GMS_objs[0].__dict__.copy().items():
            if key in ['GMS_identifier', 'georef', 'dict_LayerOptTherm']:
716
                continue  # properties that should better be created on the fly
717
718
            elif key in ['baseN', 'path_logfile', 'scene_ID', 'subsystem']:
                continue  # either previously set with common values or not needed for merged GMS_object
719
            try:
720
                setattr(GMS_obj_merged, key, value)
721
722
            except Exception:
                raise AttributeError("Can't set attribute %s." % key)
723

724
        # update LayerBandsAssignment and get full list of output bandnames
725
        from .metadata import get_LayerBandsAssignment
726
727
        # use identifier of first input GMS object for getting LBA (respects current proc_level):
        gms_idf = list_GMS_objs[0].GMS_identifier
728
729
        GMS_obj_merged.LayerBandsAssignment = get_LayerBandsAssignment(gms_idf, return_fullLBA=True)
        bandnames = ['B%s' % i if len(i) == 2 else 'B0%s' % i for i in GMS_obj_merged.LayerBandsAssignment]
730
731

        # update layer-dependent metadata with respect to remaining input GMS objects
732
733
734
        GMS_obj_merged.meta_odict.update({
            'band names': [('Band %s' % i) for i in GMS_obj_merged.LayerBandsAssignment],
            'LayerBandsAssignment': GMS_obj_merged.LayerBandsAssignment,
735
            'Subsystem': '',
736
            'PhysUnit': GMS_obj_merged.meta_odict['PhysUnit'],  # TODO can contain val_optical / val_thermal
737
        })
738
739
740
        GMS_obj_merged.subsystem = ''
        del GMS_obj_merged.pathGen  # must be refreshed because subsystem is now ''
        GMS_obj_merged.close_GMS_loggers()  # must also be refreshed because it depends on pathGen
741

742
        for attrN in layerdependent_metadata:
743
744
745
746
747
748
            # combine values from separate subsystems to a single value
            attrDic_fullLBA = {}
            for GMS_obj in list_GMS_objs:
                attr_val = getattr(GMS_obj.MetaObj, attrN)
                if isinstance(attr_val, list):
                    attrDic_fullLBA.update(dict(zip(GMS_obj.LayerBandsAssignment, attr_val)))
749
                elif isinstance(attr_val, (dict, collections.OrderedDict)):
750
751
752
753
754
755
                    attrDic_fullLBA.update(attr_val)
                else:
                    raise ValueError(attrN)

            # update the attribute in self.MetaObj
            if attrDic_fullLBA:
756
                val2set = [attrDic_fullLBA[bN] for bN in GMS_obj_merged.LayerBandsAssignment] \
757
                    if isinstance(getattr(list_GMS_objs[0].MetaObj, attrN), list) else attrDic_fullLBA
758
                setattr(GMS_obj_merged.MetaObj, attrN, val2set)
759

760
761
762
        ####################
        # MERGE ARRAY DATA #
        ####################
763

764
765
766
767
        # find the common extent. NOTE: boundsMap is expected in the order [xmin,xmax,ymin,ymax]
        geoExtents = np.array([GMS_obj.arr.box.boundsMap for GMS_obj in list_GMS_objs])
        common_extent = (min(geoExtents[:, 0]), max(geoExtents[:, 1]), min(geoExtents[:, 2]), max(geoExtents[:, 3]))

768
        # overwrite array data with merged arrays, clipped to common_extent and reordered according to FullLayerBandsAss
769
770
771
        for attrname in ['arr', 'ac_errors', 'dem', 'mask_nodata', 'mask_clouds', 'mask_clouds_confidence', 'masks']:

            # get current attribute of each subsystem without running property getters
772
            all_arrays = [getattr(GMS_obj, '_%s' % attrname) for GMS_obj in list_GMS_objs]
773
774
775
776
777
778
779
780
781

            # get the same geographical extent for each input GMS object
            if len(set(tuple(ext) for ext in geoExtents.tolist())) > 1:
                # in case of different extents
                geoArrs_same_extent = []

                for geoArr in all_arrays:

                    if geoArr is not None:
782
                        # FIXME mask_clouds_confidence is no GeoArray until here
783
                        # FIXME -> has no nodata value -> calculation throughs warning
784
785
                        geoArr_same_extent = \
                            GeoArray(*geoArr.get_mapPos(
786
787
788
789
                                mapBounds=np.array(common_extent)[[0, 2, 1, 3]],  # pass (xmin, ymin, xmax, ymax)
                                mapBounds_prj=geoArr.prj),
                                     bandnames=list(geoArr.bandnames.keys()),
                                     nodata=geoArr.nodata)
790
791
                        geoArrs_same_extent.append(geoArr_same_extent)
                    else:
792
793
                        # e.g. in case of cloud mask that is only appended to the GMS object with the same
                        # spatial resolution)
794
795
796
797
798
799
                        geoArrs_same_extent.append(None)

            else:
                # skip get_mapPos() if all input GMS objects have the same extent
                geoArrs_same_extent = all_arrays

800
801
            # validate output GeoArrays #
            #############################
802

803
804
            if len([gA for gA in geoArrs_same_extent if gA is not None]) > 1:
                equal_bounds = all([geoArrs_same_extent[0].box.boundsMap == gA.box.boundsMap
805
                                    for gA in geoArrs_same_extent[1:]])
806
807
                equal_epsg = all([geoArrs_same_extent[0].epsg == gA.epsg for gA in geoArrs_same_extent[1:]])
                equal_xydims = all([geoArrs_same_extent[0].shape[:2] == gA.shape[:2] for gA in geoArrs_same_extent[1:]])
808
809
810
811
812
                if not all([equal_bounds, equal_epsg, equal_xydims]):
                    raise RuntimeError('Something went wrong during getting the same geographical extent for all the '
                                       'input GMS objects. The extents, projections or pixel dimensions of the '
                                       'calculated input GMS objects are not equal.')

813
814
            # set output arrays #
            #####################
815

816
817
            # handle those arrays where bands have to be reordered according to FullLayerBandsAssignment
            if attrname in ['arr', 'ac_errors'] and list(set(geoArrs_same_extent)) != [None]:
818
819
                # check that each desired band name for the current attribute is provided by one of the input
                # GMS objects
820
821
                available_bandNs = list(chain.from_iterable([list(gA.bandnames) for gA in geoArrs_same_extent]))
                for bN in bandnames:
822
                    if bN not in available_bandNs:
823
                        raise ValueError("The given input GMS objects (subsystems) do not provide a bandname '%s' for "
824
825
                                         "the attribute '%s'. Available band names amongst all input GMS objects are: "
                                         "%s" % (bN, attrname, str(available_bandNs)))
826
827

                # merge arrays
828
829
                def get_band(bandN):
                    return [gA[bandN] for gA in geoArrs_same_extent if gA and bandN in gA.bandnames][0]
830
831
                full_geoArr = GeoArray(np.dstack((get_band(bandN) for bandN in bandnames)),
                                       geoArrs_same_extent[0].gt, geoArrs_same_extent[0].prj,
832
833
                                       bandnames=bandnames,
                                       nodata=geoArrs_same_extent[0].nodata)
834
                setattr(GMS_obj_merged, attrname, full_geoArr)
835

836
            # handle the remaining arrays
837
838
839
            else:
                # masks, dem, mask_nodata, mask_clouds, mask_clouds_confidence
                if attrname == 'dem':
840
841
                    # use the DEM of the first input object
                    # (if the grid is the same, the DEMs should be the same anyway)
842
                    GMS_obj_merged.dem = geoArrs_same_extent[0]
843

844
845
                elif attrname == 'mask_nodata':
                    # must not be merged -> self.arr is already merged, so just recalculate it (np.all)
846
                    GMS_obj_merged.mask_nodata = GMS_obj_merged.calc_mask_nodata(overwrite=True)
847

848
849
850
                elif attrname == 'mask_clouds':
                    # possibly only present in ONE subsystem (set by atm. Corr.)
                    mask_clouds = [msk for msk in geoArrs_same_extent if msk is not None]
851
852
                    if len(mask_clouds) > 1:
                        raise ValueError('Expected mask clouds in only one subsystem. Got %s.' % len(mask_clouds))
853
                    GMS_obj_merged.mask_clouds = mask_clouds[0] if mask_clouds else None
854

855
856
857
                elif attrname == 'mask_clouds_confidence':
                    # possibly only present in ONE subsystem (set by atm. Corr.)
                    mask_clouds_conf = [msk for msk in geoArrs_same_extent if msk is not None]
858
859
860
                    if len(mask_clouds_conf) > 1:
                        raise ValueError(
                            'Expected mask_clouds_conf in only one subsystem. Got %s.' % len(mask_clouds_conf))
861
                    GMS_obj_merged.mask_clouds_confidence = mask_clouds_conf[0] if mask_clouds_conf else None
862

863
                elif attrname == 'masks':
864
                    # self.mask_nodata and self.mask_clouds will already be set here -> so just recreate it from there
865
                    GMS_obj_merged.masks = None
866

867
868
869
870
871
872
873
874
        # avoid unequal nodata edges between individual layers (resampling artifacts etc.) #
        ####################################################################################

        # get pixel values of areas that have not been atmospherically corrected (non-clear)
        nonclear_labels = [lbl for lbl in ["Clear", "Snow", "Water", "Shadow", "Cirrus", "Cloud"]
                           if lbl not in CFG.ac_clear_area_labels]
        cloud_mask_legend = DEF_D.get_mask_classdefinition('mask_clouds', GMS_obj_merged.satellite)
        nonclear_pixVals = [cloud_mask_legend[lbl] for lbl in nonclear_labels]
875
876
877
878

        # apply cloud mask to image data and all products derived from image data
        # (only if image data represents BOA-Ref and cloud areas are not to be filled with TOA-Ref)

879
        if re.search('BOA_Reflectance', GMS_obj_merged.MetaObj.PhysUnit, re.I) and not CFG.ac_fillnonclear_areas:
880
881
            # fill non-clear areas with no data values (for all bands)
            for pixVal in nonclear_pixVals:
882
883
                mask_nonclear = GMS_obj_merged.mask_clouds[:] == pixVal
                GMS_obj_merged.arr[mask_nonclear] = DEF_D.get_outFillZeroSaturated(GMS_obj_merged.arr.dtype)[0]
884

885
886
887
                if GMS_obj_merged.ac_errors:
                    GMS_obj_merged.ac_errors[mask_nonclear] = \
                        DEF_D.get_outFillZeroSaturated(GMS_obj_merged.ac_errors.dtype)[0]
888

889
        # update no data mask (adds all pixels to no data where at least one band has no data)
890
891
        GMS_obj_merged.calc_mask_nodata(overwrite=True)

892
893
894
895
896
897
898
899
900
901
902
        # apply updated nodata mask to array data (only to no data area, not to non-clear area)
        # => this sets all pixels to no data where at least one band has no data
        #    NOTE: This may cause the cloud mask not exactly match the holes in self.arr so that there may be small
        #          gaps between the cloud edge and where the image data begins. This is due to resampling, e.g. from
        #          Sentinel-2 60m grid to Landsat 30 m grid. Nodata mask marks those areas as no data although there is
        #          no cloud at exactly that pixel. This is ok!
        area2replace = np.all([GMS_obj_merged.mask_nodata[:] == 0] +
                              [GMS_obj_merged.mask_clouds[:] != pixVal for pixVal in nonclear_pixVals], axis=0) \
            if GMS_obj_merged.mask_clouds is not None else GMS_obj_merged.mask_nodata[:] == 0

        for attrname in ['arr', 'ac_errors']:
903
904
905
            attr_val = getattr(GMS_obj_merged, attrname)

            if attr_val is not None:
906
                attr_val[area2replace] = DEF_D.get_outFillZeroSaturated(attr_val.dtype)[0]
907
                setattr(GMS_obj_merged, attrname, attr_val)
908

909
        # recreate self.masks
910
        GMS_obj_merged.build_combined_masks_array()
911
912

        # update array-dependent metadata
913
914
915
916
917
918
919
920
        GMS_obj_merged.meta_odict.update({
            'samples': GMS_obj_merged.arr.cols, 'lines': GMS_obj_merged.arr.rows,
            'bands': GMS_obj_merged.arr.bands,
            'map info': geotransform2mapinfo(GMS_obj_merged.arr.gt, GMS_obj_merged.arr.prj),
            'coordinate system string': GMS_obj_merged.arr.prj, })

        # update corner coordinates  # (calc_corner_positions is a method of L1A_object)
        GMS_obj_merged.calc_corner_positions()
921
922

        # set shape of full array
923
        GMS_obj_merged.shape_fullArr = GMS_obj_merged.arr.shape
924

925
        return GMS_obj_merged
926

927
928
929
    @classmethod
    def from_tiles(cls, list_GMS_tiles):
        # type: (list) -> cls
930
931
932
933
        """Merge separate GMS objects with different spatial coverage but belonging to the same scene-ID to ONE GMS object.

        :param list_GMS_tiles: <list> of GMS objects that have been created by cut_GMS_obj_into_blocks()
        """
934
        GMS_obj = cls()
Daniel Scheffler's avatar
Daniel Scheffler committed
935

936
937
938
939
940
        if 'IMapUnorderedIterator' in str(type(list_GMS_tiles)):
            list_GMS_tiles = list(list_GMS_tiles)

        # copy all attributes except of array attributes
        tile1 = list_GMS_tiles[0]
941
        [setattr(GMS_obj, i, getattr(tile1, i)) for i in tile1.__dict__
942
         if not callable(getattr(tile1, i)) and not isinstance(getattr(tile1, i), (np.ndarray, GeoArray))]
943
944

        # MERGE ARRAY-ATTRIBUTES
945
        list_arraynames = [i for i in tile1.__dict__ if not callable(getattr(tile1, i)) and
946
                           isinstance(getattr(tile1, i), (np.ndarray, GeoArray))]
947
948
        list_arraynames = ['_arr'] + [i for i in list_arraynames if
                                      i != '_arr']  # list must start with _arr, otherwise setters will not work
949
950
951
952

        for arrname in list_arraynames:
            samplearray = getattr(tile1, arrname)
            assert isinstance(samplearray, (np.ndarray, GeoArray)), \
953
                'Received a %s object for attribute %s. Expected a numpy array or an instance of GeoArray.' \
954
                % (type(samplearray), arrname)
955
956
            is_3d = samplearray.ndim == 3
            bands = (samplearray.shape[2],) if is_3d else ()  # dynamic -> works for arr, cld_arr,...
957
            target_shape = tuple(GMS_obj.shape_fullArr[:2]) + bands
958
            target_dtype = samplearray.dtype
959
            merged_array = GMS_obj._numba_array_merger(list_GMS_tiles, arrname, target_shape, target_dtype)
960

961
            setattr(GMS_obj, arrname if not arrname.startswith('_') else arrname[1:],
962
                    merged_array)  # use setters if possible
963
964
965
            # NOTE: this asserts that each attribute starting with '_' has also a property with a setter!

        # UPDATE ARRAY-DEPENDENT ATTRIBUTES
966
967
        GMS_obj.arr_shape = 'cube'
        GMS_obj.arr_pos = None
968
969

        # update MetaObj attributes
970
971
972
973
974
975
976
        GMS_obj.meta_odict.update({
            'samples': GMS_obj.arr.cols,
            'lines': GMS_obj.arr.rows,
            'bands': GMS_obj.arr.bands,
            'map info': geotransform2mapinfo(GMS_obj.arr.gt, GMS_obj.arr.prj),
            'coordinate system string': GMS_obj.arr.prj
        })
977
978
979

        # calculate data_corners_imXY (mask_nodata is always an array here because get_mapPos always returns an array)
        corners_imYX = calc_FullDataset_corner_positions(
980
981
            GMS_obj.mask_nodata, assert_four_corners=False, algorithm='shapely')
        GMS_obj.trueDataCornerPos = [(YX[1], YX[0]) for YX in corners_imYX]  # [UL, UR, LL, LR]
982
983

        # calculate trueDataCornerLonLat
984
985
986
        data_corners_LatLon = pixelToLatLon(GMS_obj.trueDataCornerPos,
                                            geotransform=GMS_obj.arr.gt, projection=GMS_obj.arr.prj)
        GMS_obj.trueDataCornerLonLat = [(YX[1], YX[0]) for YX in data_corners_LatLon]
987
988

        # calculate trueDataCornerUTM
989
990
991
        data_corners_utmYX = pixelToMapYX(GMS_obj.trueDataCornerPos, geotransform=GMS_obj.arr.gt,
                                          projection=GMS_obj.arr.prj)  # FIXME asserts gt in UTM coordinates
        GMS_obj.trueDataCornerUTM = [(YX[1], YX[0]) for YX in data_corners_utmYX]
992

993
        return GMS_obj
Daniel Scheffler's avatar