Commit 7645024b authored by John Doe's avatar John Doe
Browse files

All neccessary adjustments for Boston example - including a saved model 448...

All neccessary adjustments for Boston example - including a saved model 448 model combinations and their performance measurements
parent ef40b592
# Surrogate experiments controller
# Author: Janis Jatnieks, janis.jatnieks@gfz-potsdam.de, +49(0)157 3245 1188
# This calls all the other dependencies
source("Surrogate_playground.R")
# get the exatra MASS datasets, required for Boston
require(data.table)
require(MASS)
# use the standard Boston dataset for regression demo with 8 preprocessors
# and 60 methods for fitting
IN = subset(Boston,select=c("crim","zn","indus","chas","nox","rm","age","dis","rad","tax","ptratio", "black","lstat") )
# set the output columns you wish to predict here
# in case you use just one column, make sure it supports data.frame like data.table, for example
OUT = as.data.table(subset(Boston,select=c("medv") ))
# run the model fitting, validation and selection work-flow with given methods
# if no modelss are specified as first agrument, then all will be tried, this could take a while!
Main(c()
,preprocessing_ind = c(17,18,19,20,21,22,23,24) # 8 most useful caret preporcessing transforms
,input_data = IN # you can alternatively supplly filenames here if you have clearly split input
,output_data = OUT # and output data in files with corresponding row indexes
,seed = 105
# use of parallelization for training, validation and preprocessing steps
,preproc_para = F, train_para = F, run_para = F
# mase is a nice error measrue - not subject to div/0 and comparable across different columns
,selection_criteria = "MASE"
# run caret tuning routines, if applicable
,tuner = F
# use this much for training
,training_samples = 0.7
)
......@@ -27,7 +27,7 @@ createTuneGrids <- function() {
list(
`bayesglm` = NULL
,`rlm` = NULL # tends to perform well and fast sometimes
#,`gaussprLinear` = NULL # consumes too much time / resources / dies
,`gaussprLinear` = NULL # consumes too much time / resources / dies
,`glm` = NULL
#,`nnls` = NULL # the predict method is broken for this one, try after update to caret
,`lmStepAIC` = NULL # decent for simpler cases, fast, but fals often too
......@@ -43,15 +43,15 @@ createTuneGrids <- function() {
# very slow in prediction, bad results
#,`mlp` = list( data.frame ( size=4 ) ) # very slow and bad predictions
# these rbf guys are slow and give catastrophically horrifying results, maybe their parameters are completely off...
#,`rbf` = list( data.frame ( size=5 ) ) # very slow and bad predictions
,`rbf` = list( data.frame ( size=5 ) ) # very slow and bad predictions
#,`rbfDDA` = list( data.frame ( negativeThreshold=0.1 ) ) # really slow, mostly fails
#,`Boruta` = list( data.frame ( mtry=ncol(Fin) ) ) # slow, bad predictions
,`Boruta` = list( data.frame ( mtry=ncol(Fin) ) ) # slow, bad predictions
,`qrf` = list( data.frame ( mtry=7 ) ) # kind of slow in prediction, sometimes very good results
,`parRF` = list( data.frame ( mtry=7 ) ) # kind of slow in prediction, sometimes very good results
,`Rborist` = list( data.frame ( predFixed=ncol(Fin) ) ) # kind of slow in prediction, sometimes very good results
#,`cforest` = list( data.frame ( mtry=ncol(Fin) ) ) # resource hog, too slow in prediction, tolerable results, but not very good
,`cforest` = list( data.frame ( mtry=ncol(Fin) ) ) # resource hog, too slow in prediction, tolerable results, but not very good
# blackboost - not bad, but not very good, slowish also
#,`blackboost` = list( data.frame ( mstop=50, maxdepth=ncol(Fin) ) ) # I also killed this one
,`blackboost` = list( data.frame ( mstop=50, maxdepth=ncol(Fin) ) ) # I also killed this one
,`kernelpls` = list( data.frame ( ncomp = seq(1,ncol(Fin) ) ) )
#,`gaussprRadial` = list( data.frame ( sigma=c(0.5,1) )) # too slow
,`pcr` = list( data.frame ( ncomp = seq( 1, ncol(Fin)-1 ) ) ) # perform well/decent
......@@ -74,8 +74,8 @@ createTuneGrids <- function() {
#,`nodeHarvest` = list( expand.grid( maxinter=rev(seq(1,ncol(Fin),1)),mode="mean") ) # resource hog, slow with weak results
# this guy will need more understanding on tuning, maybe useful, but slow and much work
,`deepboost` = list( data.frame ( num_iter= 100, tree_depth=ncol(Fin), beta=0, lambda=0.00125, loss_type="e" ) )
,`logreg` = list( data.frame ( treesize =100,ntrees=100 ) ) # something about predicting binary stuff
,`ANFIS` = list( data.frame ( num.labels=5,max.iter=10 ) ) # way too slow...probably
#,`logreg` = list( data.frame ( treesize =100,ntrees=100 ) ) # not suitable for regression very much
#,`ANFIS` = list( data.frame ( num.labels=5,max.iter=10 ) ) # way too slow...
,`gbm` = list( expand.grid( n.trees = 1000,
#n.cores = 8,
interaction.depth = seq(2,5,1),
......@@ -98,7 +98,7 @@ createTuneGrids <- function() {
,`ctree` = list( data.frame( mincriterion=0.99 )) # does not perform well and is a resource hog
,`ctree2` = list( data.frame( maxdepth=seq(2,ncol(Fin)*3,1)) ) # does not perform well
,`widekernelpls` = list( data.frame( ncomp=seq(2,ncol(Fin) ) ) ) # our problem is unsuitable for this method
#,`rvmLinear` = NULL # way way too slow
,`rvmLinear` = NULL # way way too slow
# cubist is really slow like 100x than others with this tunegrid
,`cubist` = list( data.frame( neighbors=seq(3, 9, 1),committees=seq(3,9,1) ) )
,`svmRadialCost` = list( data.frame( C=c( 0.1, 0.001, 0.0001 ) ) ) # this model seems to perform badly
......@@ -108,9 +108,9 @@ createTuneGrids <- function() {
,`rpart1SE` = NULL
,`rpart2` = list( expand.grid( maxdepth = seq(1, 30, 1) ),
list( cp=0.00001, minbucket=2,minsplit=2) )
#,`rpart` = list( data.frame( cp=c(0.0001,0.00001), # tune grid with officially tunable parameters
# list( minsplit=2, minbucket=2) )) # other parameters that are given to train through passthrough
# minsplit=2, minbucket=2 )) # other parameters that are given to train through passthrough
,`rpart` = list( data.frame( cp=c(0.0001,0.00001), # tune grid with officially tunable parameters
list( minsplit=2, minbucket=2) )) # other parameters that are given to train through passthrough
#minsplit=2, minbucket=2 )) # other parameters that are given to train through passthrough
)
# for this we take the most extreme parameter combinations from the multi tune grid
singularTuneGrids <<- lapply( seq(1, length(multiTuneGrids), 1), FUN = function(i) {
......@@ -643,8 +643,8 @@ Main <- function(surrogate_types = c(),
}
#originalIOranges <<- GetRanges( Fin )
cat("Percentages of unique values in columns of input and output data:\n")
print( uvt(Fin,Fout) )
#cat("Percentages of unique values in columns of input and output data:\n")
#print( uvt(Fin,Fout) )
cat("\nNormalization ranges:\n")
print( originalIOranges )
......
This diff is collapsed.
output,method,preprocessing,speed,method_realname,RMSE,RSS,SAD,MAD,MASE,AME,RSSQ,model_id,MASE_speed_score,min_MASE
medv,brnn,range,0.00200000000040745,Bayesian Regularized Neural Networks,4.31012040679091,2823.72496399739,388.342279993718,2.55488342101131,0.403242956571774,23.2704632450485,53.1387331802085,356,0.0365327946913391,0.403242956571774
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment