Commit ff03664a authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Deleted deprecated cloud masking algorithms based on...

Deleted deprecated cloud masking algorithms based on py_tools_ah/classical_bayesian because they dont work anymore and cloud masking is now implemented in SICOR.
Former-commit-id: 7f87d31d
Former-commit-id: 267dbb66
parent f2b13878
......@@ -2,13 +2,10 @@
import datetime
import os
import re
import sys
import time
import warnings
import gdal
import gdalnumeric
import matplotlib.pyplot as plt
import numpy as np
try:
......@@ -24,11 +21,9 @@ from py_tools_ds.geo.projection import EPSG2WKT
from ..config import GMS_config as CFG
from . import geoprocessing as GEOP
from . import gms_cloud_classifier as CLD_P # Cloud Processor
from ..io import output_writer as OUT_W
from ..misc import helper_functions as HLP_F
from ..misc import path_generator as PG
from ..misc.definition_dicts import get_outFillZeroSaturated, is_dataset_provided_as_fullScene, get_mask_classdefinition
from ..misc.definition_dicts import get_outFillZeroSaturated, is_dataset_provided_as_fullScene
from ..model.gms_object import GMS_object
from ..model import metadata as META
......@@ -481,156 +476,6 @@ class L1A_object(GMS_object):
self.delete_tempFiles() # these files are needed later in Python execution mode
self.MetaObj.Dataname = previous_dataname # /vsi.. pointing directly to raw data archive (which exists)
def calc_cloud_mask(self, subset=None):
algorithm = CFG.job.cloud_masking_algorithm[self.satellite]
(rS, rE), (cS, cE) = self.arr_pos if self.arr_pos else ((0, self.shape_fullArr[0]), (0, self.shape_fullArr[1]))
self.logger.info("Calculating cloud mask based on '%s' algorithm..." % algorithm)
if algorithm == 'FMASK':
mask_clouds = None # FIXME
else:
# FIXME Landsat cloud mask pixel values are currently not compatible to
# FIXME definition_dicts.get_mask_classdefinition
# append /<GeoMultiSensRepo>/algorithms to PATH in order to properly import py_tools_ah when unpickling
# cloud classifiers
sys.path.append(
os.path.join(os.path.dirname(__file__))) # FIXME handle py_tools_ah as normal external dependency
# in_mem = hasattr(self, 'arr') and isinstance(self.arr, np.ndarray)
# if in_mem:
# (rS, rE), (cS, cE) = \
# self.arr_pos if self.arr_pos else ((0,self.shape_fullArr[0]),(0,self.shape_fullArr[1]))
# bands = self.arr.shape[2] if len(self.arr.shape) == 3 else 1
# else:
# subset = subset if subset else ['block', self.arr_pos] if self.arr_pos else ['cube', None]
# bands, rS, rE, cS, cE = \
# list(GEOP.get_subsetProps_from_subsetArg(self.shape_fullArr, subset).values())[2:7]
# arr_isPath = isinstance(self.arr, str) and os.path.isfile(self.arr) # FIXME
# # FIXME ersetzen durch path generator?:
# inPath = self.arr if arr_isPath else self.MetaObj.Dataname if \
# (hasattr(self,'MetaObj') and self.MetaObj) else self.meta_odict['Dataname']
if not self.path_cloud_class_obj or self.satellite == 'Sentinel-2A': # FIXME dont exclude S2 here
self.log_for_fullArr_or_firstTile('Cloud masking is not yet implemented for %s %s...'
% (self.satellite, self.sensor), subset)
mask_clouds = None
else:
self.log_for_fullArr_or_firstTile('Calculating cloud mask...', subset)
# if CFG.usecase.skip_thermal: # FIXME
# if not in_mem:
# subset = subset if subset else ['cube',None]
# rasObj = GEOP.GEOPROCESSING(inPath, self.logger, subset=subset)
# self.arr = rasObj.tondarray(direction=3)
# TODO Sentinel-2 mask:
# if not hasattr(s2img, "mask_clouds"):
# logger.info("Cloud mask missing -> derive own cloud mask.")
# CldMsk = CloudMask(logger=logger, persistence_file=options["cld_mask"]["persistence_file"],
# processing_tiles=options["cld_mask"]["processing_tiles"])
# s2img.mask_clouds = \
# CldMsk(S2_img=s2img, target_resolution=options["cld_mask"]["target_resolution"],
# majority_filter_options=options["cld_mask"]["majority_mask_filter"],
# nodata_value=options["cld_mask"]['nodata_value_mask'])
# del CldMsk
self.GMS_identifier['logger'] = self.logger
if not CFG.job.bench_cloudMask:
self.path_cloud_class_obj = PG.get_path_cloud_class_obj(self.GMS_identifier)
CLD_obj = CLD_P.GmsCloudClassifier(classifier=self.path_cloud_class_obj)
assert CLD_obj, 'Error loading cloud classifier.'
if self.arr.bands == CLD_obj.classifier.n_channels: # FIXME
try:
mask_clouds = CLD_obj(self) # CLD_obj uses self.arr for cloud mask generation
except Exception:
mask_clouds = None
self.logger.error('Error during calculation of cloud mask:\n', exc_info=True)
else:
sensorcode = ('%s %s %s' % (self.satellite, self.sensor, self.subsystem)) \
if self.subsystem else '%s %s' % (self.satellite, self.sensor)
nbands = self.shape_fullArr[2] if len(self.shape_fullArr) == 3 else 1
self.log_for_fullArr_or_firstTile("'The chosen cloud classifier for %s is not suitable for the "
"given %s bands. %s bands expected. Cloud masking failed."
% (sensorcode, nbands, CLD_obj.classifier.n_channels), subset)
mask_clouds = None
else:
pathlist_cloud_class_obj = PG.get_path_cloud_class_obj(self.GMS_identifier, get_all=True)
# load_time,proc_time,classifier_name = [],[],[]
categories_timinggroup_timing = np.empty((2 * len(pathlist_cloud_class_obj), 3), dtype='<U7')
for i, class_path in zip(range(0, 2 * len(pathlist_cloud_class_obj), 2), pathlist_cloud_class_obj):
categories_timinggroup_timing[i:i + 1, 0] = os.path.splitext(os.path.basename(class_path))[0]
t1 = time.time()
# CLD_obj = CLD_P.GmsCloudClassifier(classifier=class_path)
categories_timinggroup_timing[i, 1] = "import time"
categories_timinggroup_timing[i, 2] = time.time() - t1
t2 = time.time()
# mask_clouds = CLD_obj(self)
categories_timinggroup_timing[i + 1, 1] = "processing time"
categories_timinggroup_timing[i + 1, 2] = time.time() - t2
# classifiers = np.unique(categories_timinggroup_timing[:, 0])
# categories = np.unique(categories_timinggroup_timing[:, 1])
plt.ioff()
fig = plt.figure()
# ax = fig.add_subplot(111)
# space = 0.3
# n = len(classifiers)
# width = (1 - space) / (len(classifiers))
# for i,classif in enumerate(classifiers): # FIXME
# vals = dpoints[dpoints[:,0] == cond][:,2].astype(np.float)
# pos = [j - (1 - space) / 2. + i * width for j in range(1,len(categories)+1)]
# ax.bar(pos, vals, width=width)
print(os.path.join(CFG.job.path_testing, 'out/%s_benchmark_cloud_classifiers.png' % self.baseN))
fig.savefig(os.path.abspath('./testing/out/%s_benchmark_cloud_classifiers.png' % self.baseN),
format='png', dpi=300, bbox_inches='tight')
mask_clouds = None
# import time
# t1 = time.time()
# import builtins
# data = builtins.CLD_obj(self) # CLD_obj is part of builtins
# print( time.time()-t1, os.path.basename(i), 'cld calc')
# del self.GMS_identifier['logger']
# # print(subset)
# # for i in pathlist_cloud_class_obj:
# # # print(os.path.basename(i))
# # t1 = time.time()
# # # self.path_cloud_class_obj = INP_R.get_path_cloud_class_obj(self.GMS_identifier)
# # # CLD_obj = CLD_P.GmsCloudClassifier(classifier=self.path_cloud_class_obj)
# # # CLD_obj = CLD_P.GmsCloudClassifier(classifier=i)
# # print( time.time()-t1, os.path.basename(i), 'cld class load')
# # t1 = time.time()
# # data = CLD_obj(self)
# # # data = self.CLD_obj(self)
# # print( time.time()-t1, os.path.basename(i), 'cld calc')
if mask_clouds is not None:
if False in np.equal(mask_clouds, mask_clouds.astype(np.uint8)):
warnings.warn('Some pixel values of cloud mask changed during data type '
'conversion within calc_cloud_mask().')
self.mask_clouds = mask_clouds.astype(np.uint8)
# update fill values (fill values written by CLD_obj(self) are not equal to GMS fill values)
outFill = get_outFillZeroSaturated(self.mask_clouds.dtype)[0]
if outFill in mask_clouds and get_mask_classdefinition('mask_clouds', self.satellite)['No Data'] != 0:
warnings.warn('Value %s is assigned to mask_clouds although it already contains this value. '
'Please make sure that the produced cloud mask does not use potential fill values. '
'Otherwise cloud mask results can be affected.' % outFill)
self.apply_nodata_mask_to_ObjAttr('mask_clouds', out_nodata_val=outFill)
if hasattr(self, 'masks') and self.masks is not None:
self.build_combined_masks_array() # update masks
return {'desc': 'mask_clouds', 'row_start': rS, 'row_end': rE, 'col_start': cS, 'col_end': cE,
'data': mask_clouds}
def calc_corner_positions(self):
"""Get true corner positions in the form
[UL, UR, LL, LR] as [(ULrow,ULcol),(URrow,URcol),...],[(ULLon,ULLat),(URLon,URLat),..]"""
......
......@@ -8,7 +8,6 @@
###############################################################################
from . import geoprocessing
from . import gms_cloud_classifier
from . import L1A_P
from . import L1B_P
from . import L1C_P
......@@ -17,7 +16,6 @@ from . import L2B_P
from . import L2C_P
__all__ = ['geoprocessing',
'gms_cloud_classifier',
'L1A_P',
'L1B_P',
'L1C_P',
......
# coding: utf-8
from copy import copy
import dill
__author__ = 'Andre Hollstein'
class GmsCloudClassifier(object):
def __init__(self, classifier):
"""
classifier for gms_preprocessing
:param classifier: either file name of dilled 'general_classifier' object or an instance of such an object
this object needs only .n_channels and .predict property
:return: instance
"""
if type(classifier) is str:
with open(classifier, "rb") as F:
self.classifier = dill.load(F)
else:
self.classifier = copy(classifier)
def __call__(self, gms_l1a):
"""
Predict classification results
:param gms_l1a: gms level1a object
:return: numpy array with classification results corresponding to l1a input
"""
return self.classifier.predict(
gms_l1a.arr.reshape((-1, self.classifier.n_channels_data))).reshape(gms_l1a.arr.shape[:2])
if __name__ == "__main__":
print("Start Test")
from glob import glob
from matplotlib.pyplot import imshow, colorbar, savefig, close, figure
import sys
from datetime import datetime
sys.path.append("/home/danscheff/gms_preprocessing/") # FIXME
fn_l1a = glob("./clfs/ETM+*.pkl")[0] # gms l1a object
with open(fn_l1a, "rb") as fl:
l1a = dill.load(fl)
fns_clf = glob("./clfs/*.dill") # classifier object filenames
now_str = datetime.now().strftime("%Y_%m_%d_%H_%M_%S")
print("suffix:", now_str)
for fn_clf in fns_clf:
print(fn_clf)
gms_clf = GmsCloudClassifier(classifier=fn_clf)
res = gms_clf(l1a)
fig = figure()
imshow(res)
colorbar()
savefig(fn_clf.replace(".dill", now_str + "_a_.jpg"))
fig.clear()
close()
with open(fn_clf, "rb") as fl:
inf = dill.load(fl)
clf = dill.load(fl)
for key, value in inf.items():
print(key, "->", value)
gms_clf = GmsCloudClassifier(classifier=clf)
res = gms_clf(l1a)
fig = figure()
imshow(res)
colorbar()
savefig(fn_clf.replace(".dill", now_str + "_b_.jpg"))
fig.clear()
close()
print("EEooFF")
# -*- coding: utf-8 -*-
from copy import copy
from random import sample
from operator import itemgetter
import random
from inspect import getargspec # FIXME
import numpy as np
from scipy.ndimage.filters import gaussian_filter
from sklearn.ensemble import AdaBoostClassifier
from sklearn.feature_selection import chi2
from sklearn.cross_validation import train_test_split # FIXME
__author__ = "Andre Hollstein"
def to_clf(inp):
""" helper function which sets the type of features and assures numeric values
:param inp:
:return:
"""
return np.nan_to_num(np.array(inp, dtype=np.float))
def save_divide(d1, d2):
""" save division without introducing NaN's
:param d1:
:param d2:
:return: d1/d2
"""
dd1 = to_clf(d1)
dd2 = to_clf(d2)
dd2[dd2 == 0.0] = 1e-6
dd1 /= dd2
return np.nan_to_num(dd1)
clf_functions = {
"ratio": lambda d1, d2: save_divide(d1, d2),
"index": lambda d1, d2: save_divide(d1 - d2, d1 + d2),
"difference": lambda d1, d2: to_clf(d1 - d2),
"channel": lambda d: to_clf(d),
"depth": lambda d1, d2, d3: save_divide((d1 + d2), d3),
}
# In[142]:
class _ToClassifierBase(object):
"""
internal base class for generation of classifiers, only to use common __call__
"""
def __init__(self):
""" dummy __init__ which sets all basic needed attributes to none,
need derived classes to implement proper __init__
:return:
"""
self.n_classifiers = None
self.classifiers_fk = None
self.classifiers_id = None
self.clf_functions = None
@staticmethod
def list_np(arr):
try:
return list(arr)
except TypeError:
return list(arr.reshape(1))
def __call__(self, data):
ret = np.zeros((data.shape[0], self.n_classifiers))
for ii, (fn, idx_clf) in enumerate(zip(self.classifiers_fk, self.classifiers_id)):
ret[:, ii] = self.clf_functions[fn](*(data[:, ii] for ii in self.list_np(idx_clf)))
return ret
class ToClassifierDef(_ToClassifierBase):
def __init__(self, classifiers_id, classifiers_fk, clf_functions):
self.n_classifiers = len(classifiers_fk)
self.clf_functions = clf_functions
self.classifiers_id = classifiers_id
self.classifiers_fk = classifiers_fk
class ToClassifierRnd(_ToClassifierBase):
def __init__(self, clf_functions, xx=None, yy=None, n_classifiers=None, n_channels=None, n_channels_data=None,
channel_selection="equal", scores=None):
self.n_classifiers = n_classifiers
if n_channels_data is None:
self.n_channels_data = xx.shape[1]
else:
self.n_channels_data = n_channels_data
self.clf_functions = clf_functions
if n_channels is None:
self.n_channels = self.n_channels_data
self.chls = np.arange(self.n_channels)
else:
if scores is None:
xxx = copy(xx)
xxx[xxx < 0.0] = 0.0
_scores, _ = chi2(xxx, yy)
_scores -= np.min(_scores)
_scores /= np.max(_scores)
self.scores = _scores
else:
self.scores = scores
self.n_channels = n_channels
self.chls = np.zeros(self.n_channels, dtype=np.int)
if channel_selection == "equal":
for ii, (i1, i2) in \
enumerate(zip(*(lambda x: (x[:-1], x[1:]))(np.linspace(0, self.n_channels_data,
self.n_channels + 1, dtype=np.int)))):
self.chls[ii] = i1 + np.argmax(self.scores[i1:i2])
else:
raise ValueError("wrong value for channel_selection:%s" % channel_selection)
self.__adjust__()
def __adjust__(self):
self.classifiers_fk = [list(self.clf_functions.keys())[sel] for sel in np.random.randint(
low=0, high=(len(self.clf_functions.keys())), size=self.n_classifiers)]
self.classifiers_id = [np.array(sample(
list(self.chls), self.number_of_arguments(self.clf_functions[func_key])), dtype=np.int)
for func_key in self.classifiers_fk]
@staticmethod
def number_of_arguments(func):
return len(getargspec(func).args)
# In[143]:
def __test__(clf, xx, yy, sample_weight=None):
if sample_weight is None:
return np.sum(clf.predict(xx) == yy) / np.float(len(yy)) # ratio of correct
else:
yy = np.array(clf.predict(xx) == yy, dtype=np.int)
yy[yy == 1] = 1
yy[yy == 0] = -1
return np.sum(yy * sample_weight)
def print_corr(clf, xx, yy):
pr_test = clf.predict(xx)
nrm = [len(yy[yy == cl]) for cl in clf.classes_]
bins = [clf.classes[0] - 1] + list(0.5 * (clf.classes[1:] + clf.classes[:-1])) + [clf.classes[-1] + 1]
hh, b1, b2 = np.histogram2d(pr_test, yy, bins=bins)
hh /= nrm
print((" " + len(clf.classes_) * "%5.1f") % tuple(clf.classes_))
for cc, hhh in zip(clf.classes_, hh):
print(("%6.1f:" % cc) + len(clf.classes_) * "%5.2f" % tuple(hhh))
return hh, b1
class ClassicalBayesian(object):
def __init__(self, mk_clf, smoth_min=0.0, smoth_max=2.0, n_bins_min=5, n_bins_max=50, n_runs=150, smoth_dd=10,
max_mb=100.0):
assert hasattr(mk_clf, "__call__")
self.mk_clf = mk_clf
self.params = {"mk_clf": self.mk_clf}
self.sample_weight = None
self.n_bins = None
self.classes = None
self.n_classes = None
self.classes_ = None
self.n_classes_ = None
if hasattr(mk_clf, "__adjust__") is True:
self.mk_clf = mk_clf
self.smoth_min = smoth_min
self.smoth_max = smoth_max
self.n_bins_min = n_bins_min
self.n_bins_max = np.min([n_bins_max,
np.int(np.floor((max_mb / (np.zeros(1, dtype=np.float).nbytes / 1024. ** 2)) ** (
1.0 / self.mk_clf.n_classifiers)))])
assert self.n_bins_max >= self.n_bins_min
self.n_runs = n_runs
self.smoth_dd = smoth_dd
self.smoth = None
self.n_bins = None
self.smoth_values = list(np.around(
np.linspace(smoth_min, smoth_max, np.int(smoth_dd)),
decimals=np.int(np.ceil(-1 * np.log10((smoth_max - smoth_min) / smoth_dd)))))
self.params.update({"smoth_min": self.smoth_min,
"smoth_max": self.smoth_max,
"smoth_dd": self.smoth_dd,
"n_bins_min": self.n_bins_min,
"n_bins_max": self.n_bins_max,
"n_runs": self.n_runs
})
def get_params(self, deep=True):
return self.params
def set_params(self, **params):
if "mk_clf" in params:
ss = copy(self)
ss.__init__(**params)
return ss
else:
for name, value in params.items():
setattr(self, name, value)
return self
def mk_hist(self, xx):
hh, bb = np.histogramdd(xx, self.bns, normed=True)
if self.smoth is not None:
hh = gaussian_filter(hh, self.smoth)
return hh, bb
@staticmethod
def bins4histeq(inn, nbins_ou=10, nbins_in=1000):
"""
returns the bin-edges!! for an equalized histogram
assumes numpy arrays
"""
hist, bins = np.histogram(inn.flatten(), nbins_in, normed=True)
cdf = hist.cumsum()
cdf = cdf / cdf[-1]
xxx = np.linspace(0., 1., nbins_ou + 1)
return np.interp(xxx, np.hstack((np.array([0]), cdf)), bins)
def set(self, xx, yy, smoth=None, n_bins=5, sample_weight=None):
self.sample_weight = sample_weight
self.n_bins = n_bins
self.classes = np.unique(yy)
self.n_classes = len(self.classes)
self.classes_ = self.classes
self.n_classes_ = self.n_classes
if type(smoth) is dict:
xx_train, xx_test, yy_train, yy_test = train_test_split(xx, yy, test_size=smoth["test_size"],
random_state=42)
left = smoth["min"]
right = smoth["max"]
_ = self.__set__(xx_train, yy_train, smoth=left, sample_weight=sample_weight) # noqa F841 unused
tst_left = self.__test__(xx_test, yy_test, sample_weight=sample_weight)
_ = self.__set__(xx_train, yy_train, smoth=right, sample_weight=sample_weight) # noqa F841 unused
tst_right = self.__test__(xx_test, yy_test, sample_weight=sample_weight)
for i_steps in range(smoth["steps"]):
middle = 0.5 * (left + right)
_ = self.__set__(xx_train, yy_train, smoth=middle, sample_weight=sample_weight) # noqa F841 unused
tst_middle = self.__test__(xx_test, yy_test, sample_weight=sample_weight)
if smoth["debug"] is True:
print(left, middle, right, tst_middle)
print(tst_left, tst_middle, tst_right)
if (tst_middle + tst_left) > (tst_middle + tst_right):
right = copy(middle)
tst_right = copy(tst_middle)
else:
left = copy(middle)
tst_left = copy(tst_middle)
if tst_left > tst_middle:
_ = self.__set__(xx_train, yy_train, smoth=left, sample_weight=sample_weight) # noqa F841 unused
if tst_right > tst_middle:
_ = self.__set__(xx_train, yy_train, smoth=right, sample_weight=sample_weight) # noqa F841 unused
else: # scalar or None
tst = self.__set__(xx, yy, smoth=smoth, sample_weight=sample_weight)
return tst
def __set__(self, xx, yy, smoth=None, sample_weight=None):
self.smoth = smoth
xx_clf = self.mk_clf(xx)
self.bns = [self.bins4histeq(xx, self.n_bins, nbins_in=1000) for xx in xx_clf.transpose()]
self.hh_full, self.bb_full = self.mk_hist(xx_clf)
self.hh = {}
self.hh_n = {}
for cl in self.classes:
self.hh[cl], _ = self.mk_hist(xx_clf[yy == cl, :])
self.hh_n[cl] = np.sum(yy == cl)
tst = self.__test__(xx, yy, sample_weight=sample_weight)
return tst