Commit a2e8b975 authored by Daniel Scheffler's avatar Daniel Scheffler
Browse files

Restricted tree depth of Random Forest Regressors to 10 to avoid overfitting...

Restricted tree depth of Random Forest Regressors to 10 to avoid overfitting and to drastically reduce file size of classifiers.
parent dba87f61
Pipeline #3188 passed with stage
in 18 minutes and 9 seconds
......@@ -1360,6 +1360,8 @@ class ClusterClassifier_Generator(object):
kwargs.update(dict(n_jobs=CPUs))
if 'n_estimators' not in kwargs:
kwargs.update(dict(n_estimators=CFG.spechomo_rfr_n_trees)) # we statically use 50 trees for RFR
if 'max_depth' not in kwargs:
kwargs.update(dict(max_depth=CFG.spechomo_rfr_tree_depth)) # we statically use a maximum depth of 10
# build the classifier collections with separate classifiers for each cluster
for src_cube in self.refcubes: # type: RefCube
......
......@@ -331,6 +331,7 @@ class JobConfig(object):
self.spechomo_method = gp('spechomo_method')
self.spechomo_n_clusters = gp('spechomo_n_clusters')
self.spechomo_rfr_n_trees = 50 # this is static confic value, not a user option
self.spechomo_rfr_tree_depth = 10 # this is static confic value, not a user option
self.spechomo_classif_alg = gp('spechomo_classif_alg')
self.spechomo_kNN_n_neighbors = gp('spechomo_kNN_n_neighbors')
self.spechomo_estimate_accuracy = gp('spechomo_estimate_accuracy')
......
......@@ -171,7 +171,7 @@ class Test_ClusterClassifier_Generator(unittest.TestCase):
"""Test creation of random forest regression classifiers."""
CCG = ClusterClassifier_Generator([refcube_l8, refcube_l5])
CCG.create_classifiers(outDir=self.tmpOutdir.name, method='RFR', n_clusters=1,
**dict(n_jobs=-1, n_estimators=20))
**dict(n_jobs=-1, n_estimators=20, max_depth=10))
outpath_cls = os.path.join(self.tmpOutdir.name,
'RFR_trees%d_clust1__Landsat-8__OLI_TIRS.dill' % self.config.spechomo_rfr_n_trees)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment