run_gms.py 14.1 KB
Newer Older
1
# -*- coding: utf-8 -*-
2
3
4

# gms_preprocessing, spatial and spectral homogenization of satellite remote sensing data
#
5
# Copyright (C) 2020  Daniel Scheffler (GFZ Potsdam, daniel.scheffler@gfz-potsdam.de)
6
7
8
9
10
11
#
# This software was developed within the context of the GeoMultiSens project funded
# by the German Federal Ministry of Education and Research
# (project grant code: 01 IS 14 010 A-C).
#
# This program is free software: you can redistribute it and/or modify it under
12
13
14
15
16
17
# the terms of the GNU General Public License as published by the Free Software
# Foundation, either version 3 of the License, or (at your option) any later
# version. Please note the following exception: `spechomo` depends on tqdm, which
# is distributed under the Mozilla Public Licence (MPL) v2.0 except for the files
# "tqdm/_tqdm.py", "setup.py", "README.rst", "MANIFEST.in" and ".gitignore".
# Details can be found here: https://github.com/tqdm/tqdm/blob/master/LICENCE.
18
19
20
21
22
23
24
25
26
#
# This program is distributed in the hope that it will be useful, but WITHOUT
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
# FOR A PARTICULAR PURPOSE. See the GNU Lesser General Public License for more
# details.
#
# You should have received a copy of the GNU Lesser General Public License along
# with this program.  If not, see <http://www.gnu.org/licenses/>.

27
__author__ = 'Daniel Scheffler'
28

29
30
import argparse
import warnings
31
import os
32

33
import matplotlib
34

35
matplotlib.use('Agg', warn=False)  # switch matplotlib backend to 'Agg' and disable warning in case its already 'Agg'
36

37
from gms_preprocessing import ProcessController, __version__  # noqa: E402
38
from gms_preprocessing.misc.database_tools import GMS_JOB  # noqa: E402
39
40
41
from gms_preprocessing.options.config import get_conn_database  # noqa: E402
from gms_preprocessing.options.config import path_options_default  # noqa: E402
from gms_preprocessing.options.config import get_options  # noqa: E402
42
from gms_preprocessing.options.options_schema import get_param_from_json_config  # noqa: E402
43
44
45
46

options_default = get_options(path_options_default, validation=True)  # type: dict


47
def parsedArgs_to_user_opts(cli_args):
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
    # type: (argparse.Namespace) -> dict
    """Convert argparse Namespace object to dictionary of explicitly given parameters.

    NOTE:   All options that have not been given explicitly (None values) are removed. Reason: all options to
            passed set_config WILL OVERRIDE job settings read from the GMS database (e.g., specified by the WebUI)
            => only override job configuration defined by WebUI if CLI options are explicitly given
            => if json_opts are given: configuration defined by WebUI will be overridden by this json config in any case

    :param cli_args:    options as parsed by the argparse.ArgumentParser
    """

    # convert argparse Namespace object to dictionary
    opts = {k: v for k, v in vars(cli_args).items() if not k.startswith('_') and k != 'func'}

    # remove those options that have not been given explicitly (None values)
    user_opts = dict()
    for k, v in opts.items():
        # values are None if they are not given by the user -> don't pass to set_config
        if v is None:
            continue

        # remove keys that are not to be passed to set_config
        elif k in ['jobid', 'sceneids', 'entityids', 'filenames', 'comment', ]:
            continue

        else:
            user_opts.update({k: v})

    return user_opts
77
78


79
def run_from_jobid(args):
80
81
82
83
84
    # TODO distinguish between ID of a master, processing or download job
    # TODO master: find corresponding sub-jobs and run them
    # TODO processing: check for not downloaded scenes and run processing after download
    # TODO download: run only the downloader

85
    # set up process controller instance
86
    kwargs = parsedArgs_to_user_opts(args)
87

88
    if 'GMS_IS_TEST' in os.environ and os.environ['GMS_IS_TEST'] == 'True':
89
90
        kwargs['is_test'] = True

91
    PC = ProcessController(args.jobid, **kwargs)
92
93
94

    # run the job
    if 'GMS_IS_TEST_CONFIG' in os.environ and os.environ['GMS_IS_TEST_CONFIG'] == 'True':
95
96
97
98
99
        # in case of software test, it is enough to get an instance of process controller because all inputs are
        # validated within options.config.Job_Config (indirectly called by ProcessController.__init__() )
        pass
    else:
        PC.run_all_processors()
100

101
102
103

def run_from_sceneids(args):
    # create and run a download job
104
    warnings.warn('Currently the console argument parser expects the given scenes as already downloaded.')  # TODO
105
106

    # create a new processing job from scene IDs
107
    dbJob = GMS_JOB(get_conn_database(args.db_host))
108
    dbJob.from_sceneIDlist(list_sceneIDs=args.sceneids,
109
110
                           virtual_sensor_id=get_user_input_or_default('virtual_sensor_id', args),
                           datasetid_spatial_ref=get_user_input_or_default('datasetid_spatial_ref', args),
111
                           comment=args.comment)
112
    _run_job(dbJob, **parsedArgs_to_user_opts(args))
113
114
115


def run_from_entityids(args):
116
    """Create a new job from entity IDs.
117

118
119
120
    :param args:
    :return:
    """
121
    dbJob = GMS_JOB(get_conn_database(args.db_host))
122
    dbJob.from_entityIDlist(list_entityids=args.entityids,
123
124
                            virtual_sensor_id=get_user_input_or_default('virtual_sensor_id', args),
                            datasetid_spatial_ref=get_user_input_or_default('datasetid_spatial_ref', args),
125
                            comment=args.comment)
126
    _run_job(dbJob, **parsedArgs_to_user_opts(args))
127
128
129


def run_from_filenames(args):
130
    """Create a new GMS job from filenames of downloaded archives and run it!
131

132
133
134
    :param args:
    :return:
    """
135
    dbJob = GMS_JOB(get_conn_database(args.db_host))
136
    dbJob.from_filenames(list_filenames=args.filenames,
137
138
                         virtual_sensor_id=get_user_input_or_default('virtual_sensor_id', args),
                         datasetid_spatial_ref=get_user_input_or_default('datasetid_spatial_ref', args),
139
                         comment=args.comment)
140
    _run_job(dbJob, **parsedArgs_to_user_opts(args))
141
142
143
144
145


def run_from_constraints(args):
    # create a new job from constraints
    # TODO
146
147
148
    raise NotImplementedError


149
def _run_job(dbJob, **config_kwargs):
150
    # type: (GMS_JOB, dict) -> None
151
    """
152

153
154
155
    :param dbJob:
    :return:
    """
156
    # create a database record for the given job
157
    dbJob.create()
158
159

    # set up process controller instance
160
161
162
    if 'GMS_IS_TEST' in os.environ and os.environ['GMS_IS_TEST'] == 'True':
        config_kwargs['is_test'] = True

163
    PC = ProcessController(dbJob.id, **config_kwargs)
164
165

    # run the job
166
    if 'GMS_IS_TEST_CONFIG' in os.environ and os.environ['GMS_IS_TEST_CONFIG'] == 'True':
167
168
169
170
171
        # in case of software test, it is enough to get an instance of process controller because all inputs are
        # validated within options.config.Job_Config (indirectly called by ProcessController.__init__() )
        pass
    else:
        PC.run_all_processors()
172
173


174
175
176
177
178
179
180
def get_user_input_or_default(paramname, argsparse_ns):
    user_input = getattr(argsparse_ns, paramname)

    return user_input if user_input is not None else \
        get_param_from_json_config(paramname, options_default)


181
182
def get_gms_argparser():
    """Return argument parser for run_gms.py program."""
183

184
185
186
187
    ##################################################################
    # CONFIGURE MAIN PARSER FOR THE GEOMULTISENS PREPROCESSING CHAIN #
    ##################################################################

188
189
    parser = argparse.ArgumentParser(
        prog='run_gms.py',
190
191
        description='=' * 70 + '\n' + 'GeoMultiSens preprocessing console argument parser. '
                                      'Python implementation by Daniel Scheffler (daniel.scheffler@gfz-potsdam.de)',
192
        epilog="The argument parser offers multiple sub-argument parsers (jobid, sceneids, ...) for starting GMS jobs. "
193
194
               "use '>>> python /path/to/gms_preprocessing/run_gms.py <sub-parser> -h' for detailed documentation and "
               "usage hints.")
195
196
197

    parser.add_argument('--version', action='version', version=__version__)

198
199
200
201
202
203
204
205
206
207
208
209
    #################################################################
    # CONFIGURE SUBPARSERS FOR THE GEOMULTISENS PREPROCESSING CHAIN #
    #################################################################

    ##############################################
    # define parsers containing common arguments #
    ##############################################

    general_opts_parser = argparse.ArgumentParser(add_help=False)
    gop_p = general_opts_parser.add_argument

    gop_p('-jc', '--json_config', nargs='?', type=str,
210
          help='file path of a JSON file containing options. See here for an example: '
211
212
               'https://gitext.gfz-potsdam.de/geomultisens/gms_preprocessing/'
               'blob/master/gms_preprocessing/options/options_default.json')
213

214
215
    # '-exec_L1AP': dict(nargs=3, type=bool, help="L1A Processor configuration",
    #                   metavar=tuple("[run processor, write output, delete output]".split(' ')), default=[1, 1, 1]),
216

217
    gop_p('-DH', '--db_host', nargs='?', type=str,
218
          default='localhost',  # hardcoded here because default json is read from database and host must be available
219
          help='host name of the server that runs the postgreSQL database')
220

221
222
223
224
    # NOTE: don't define any defaults here for parameters that are passed to set_config!
    #       -> otherwise, we cannot distinguish between explicity given parameters and default values
    #       => see docs in parsedArgs_to_user_opts() for explanation
    gop_p('-DOO', '--delete_old_output', nargs='?', type=bool, default=None,
225
226
          help='delete previously created output of the given job ID before running the job')

227
    gop_p('-vid', '--virtual_sensor_id', type=int, default=None,
228
          help='ID of the target (virtual) sensor')
229

230
    gop_p('-dsid_spat', '--datasetid_spatial_ref', type=int, default=None,
231
          help='dataset ID of the spatial reference')
232

233
    gop_p('--CPUs', type=int, default=None,
234
235
          help='number of CPU cores to be used for processing (default: "None" -> use all available')

236
237
238
    gop_p('-c', '--comment', nargs='?', type=str,
          default='',
          help='comment concerning the job')
239

240
241
242
243
244
245
246
247
248
    ##################
    # add subparsers #
    ##################

    subparsers = parser.add_subparsers()

    parser_jobid = subparsers.add_parser(
        'jobid', parents=[general_opts_parser],
        description='Run a GeoMultiSens preprocessing job using an already existing job ID.',
249
250
        help="Run a GeoMultiSens preprocessing job using an already existing job ID (Sub-Parser).",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
251
252
253
254

    parser_sceneids = subparsers.add_parser(
        'sceneids', parents=[general_opts_parser],
        description='Run a GeoMultiSens preprocessing job for a given list of scene IDs.',
255
256
        help="Run a GeoMultiSens preprocessing job for a given list of scene IDs (Sub-Parser).",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
257
258
259
260

    parser_entityids = subparsers.add_parser(
        'entityids', parents=[general_opts_parser],
        description='Run a GeoMultiSens preprocessing job for a given list of entity IDs.',
261
262
        help="Run a GeoMultiSens preprocessing job for a given list of entity IDs (Sub-Parser).",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
263
264
265
266
267
268

    parser_filenames = subparsers.add_parser(
        'filenames', parents=[general_opts_parser],
        description='Run a GeoMultiSens preprocessing job for a given list of filenames of '
                    'downloaded satellite image archives!',
        help="Run a GeoMultiSens preprocessing job for a given list of filenames of downloaded satellite "
269
             "image archives! (Sub-Parser).",
270
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
271
272
273
274

    parser_constraints = subparsers.add_parser(
        'constraints', parents=[general_opts_parser],
        description='Run a GeoMultiSens preprocessing job matching the given constraints.',
275
276
        help="Run a GeoMultiSens preprocessing job matching the given constraints (Sub-Parser).",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter)
277
278
279
280
281
282
283
284

    #################
    # ADD ARGUMENTS #
    #################

    ##########################
    # add indivial arguments #
    ##########################
285

286
287
    # add arguments to parser_jobid
    jid_p = parser_jobid.add_argument
288
289
    jid_p('jobid', type=int, help='job ID of an already created GeoMultiSens preprocessing job '
                                  '(must be present in the jobs table of the database)')
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310

    # add arguments to parser_sceneids
    sid_p = parser_sceneids.add_argument
    sid_p('sceneids', nargs='+', type=int,
          help="list of scene IDs corresponding to valid records within the 'scenes' table of the database")

    # add arguments to parser_entityids
    eid_p = parser_entityids.add_argument
    eid_p('entityids', nargs='+', type=str,
          help="list of entity IDs corresponding to valid records within the 'scenes' table of the database")
    # FIXME satellite and sensor are required

    # add arguments to parser_filenames
    eid_p = parser_filenames.add_argument
    eid_p('filenames', nargs='+', type=str,
          help="list of filenames of satellite image archives corresponding to valid records within the 'scenes' "
               "table of the database")

    # add arguments to parse_constraints
    con_p = parser_constraints.add_argument
    # TODO
311
    # con_p('constraints', nargs='+', type=str, help="list of entity IDs corresponding to valid records within the "
312
    #                                            "'scenes' table of the database")
313

314
315
316
317
    #################################
    # LINK PARSERS TO RUN FUNCTIONS #
    #################################

318
319
320
    parser_jobid.set_defaults(func=run_from_jobid)
    parser_sceneids.set_defaults(func=run_from_sceneids)
    parser_entityids.set_defaults(func=run_from_entityids)
321
    parser_filenames.set_defaults(func=run_from_filenames)
322
    parser_constraints.set_defaults(func=run_from_constraints)
323

324
325
326
    return parser


327
if __name__ == '__main__':
328
    parsed_args = get_gms_argparser().parse_args()
329
330
    parsed_args.func(parsed_args)

331
    print('\nready.')