...
 
Commits (2)
This diff is collapsed.
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization, Conv2D
from keras import optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os
import sys
df = pd.read_csv("../../data/LUCAS_full/LUCAS.csv",header=0)
columns = ["SOC","clay","CaCO3"]
out_data = df[columns].values
input_data = df[df.columns[4:]].values
def is_window(column):
without_x = column[1:]
x = int(without_x)
if x > 1350 and x < 1450:
print(x)
return False
if x > 1850 and x < 2050:
print(x)
return False
return True
input_data = df[df.columns[4:]]
#input_data = input_data[[c for c in input_data.columns if is_window(c)]]
input_data = input_data.values
max_values = np.max(input_data, axis=1)
#for idx, max_value in enumerate(max_values):
# input_data[idx] = input_data[idx] / max_value
# input_train = input_data[:int(0.7*len(input_data))]
# input_test = input_data[int(0.7*len(input_data)):]
# output_train = out_data[:int(0.7*len(out_data))]
# output_test = out_data[int(0.7*len(out_data)):]
input_train, input_test, output_train, output_test = train_test_split(input_data, out_data, test_size=0.3, shuffle= True)
K.clear_session()
model = Sequential()
model.add(BatchNormalization(input_shape=(input_train.shape[1],)))
model.add(Dense(64, activation="selu"))
model.add(Dense(128, activation="selu"))
model.add(Dense(3, activation="selu"))
#model.compile(optimizer="RMSprop", loss="mean_squared_error")
model.compile(optimizer="adam", loss="mean_squared_error")
model.fit(input_train,output_train,epochs=200,batch_size=32)
calculated_cal = model.predict(input_train)
calculated_val = model.predict(input_test)
for i in range(len(columns)):
rmse_train = np.sqrt(mean_squared_error(output_train[:,i],calculated_cal[:,i]))
rmse_val = np.sqrt(mean_squared_error(output_test[:,i],calculated_val[:,i]))
R2_train=sklearn.metrics.r2_score(output_train[:,i],calculated_cal[:,i])
R2_val=sklearn.metrics.r2_score(output_test[:,i],calculated_val[:,i])
print("Overall:")
print(columns[i])
print(R2_train)
print(R2_val)
import pdb
pdb.set_trace()
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization, Conv2D
from keras import optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os
import sys
df = pd.read_csv("../../data/LUCAS_full/LUCAS.csv",header=0)
columns = ["SOC","clay","CaCO3"]
out_data = df[columns].values
input_data = df[df.columns[4:]].values
def is_window(column):
without_x = column[1:]
x = int(without_x)
if x > 1350 and x < 1450:
print(x)
return False
if x > 1850 and x < 2050:
print(x)
return False
return True
input_data = df[df.columns[4:]]
#input_data = input_data[[c for c in input_data.columns if is_window(c)]]
input_data = input_data.values
#for idx, max_value in enumerate(max_values):
# input_data[idx] = input_data[idx] / max_value
# input_train = input_data[:int(0.7*len(input_data))]
# input_test = input_data[int(0.7*len(input_data)):]
# output_train = out_data[:int(0.7*len(out_data))]
# output_test = out_data[int(0.7*len(out_data)):]
input_train, input_test, output_train, output_test = train_test_split(input_data, out_data, test_size=0.3, shuffle= True)
reg = LinearRegression().fit(input_train, output_train)
reg_output = reg.predict(input_train)
ressiduals = reg_output - output_train
K.clear_session()
model = Sequential()
model.add(BatchNormalization(input_shape=(input_train.shape[1],)))
model.add(Dense(64, activation="selu"))
model.add(Dense(128, activation="selu"))
model.add(Dense(3, activation="selu"))
#model.compile(optimizer="RMSprop", loss="mean_squared_error")
model.compile(optimizer="adam", loss="mean_squared_error")
model.fit(input_train,ressiduals,epochs=200,batch_size=32)
calculated_cal = model.predict(input_train) + reg.predict(input_train)
calculated_val = model.predict(input_test) + reg.predict(input_test)
for i in range(len(columns)):
rmse_train = np.sqrt(mean_squared_error(output_train[:,i],calculated_cal[:,i]))
rmse_val = np.sqrt(mean_squared_error(output_test[:,i],calculated_val[:,i]))
R2_train=sklearn.metrics.r2_score(output_train[:,i],calculated_cal[:,i])
R2_val=sklearn.metrics.r2_score(output_test[:,i],calculated_val[:,i])
print("Overall:")
print(columns[i])
print(R2_train)
print(R2_val)
import pdb
pdb.set_trace()