Commit 911379a0 authored by Daniel Berger's avatar Daniel Berger

Python linear model and neural network

parent 133d5cff
This diff is collapsed.
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization, Conv2D
from keras import optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os
import sys
df = pd.read_csv("../../data/LUCAS_full/LUCAS.csv",header=0)
columns = ["SOC","clay","CaCO3"]
out_data = df[columns].values
input_data = df[df.columns[4:]].values
def is_window(column):
without_x = column[1:]
x = int(without_x)
if x > 1350 and x < 1450:
print(x)
return False
if x > 1850 and x < 2050:
print(x)
return False
return True
input_data = df[df.columns[4:]]
#input_data = input_data[[c for c in input_data.columns if is_window(c)]]
input_data = input_data.values
max_values = np.max(input_data, axis=1)
#for idx, max_value in enumerate(max_values):
# input_data[idx] = input_data[idx] / max_value
# input_train = input_data[:int(0.7*len(input_data))]
# input_test = input_data[int(0.7*len(input_data)):]
# output_train = out_data[:int(0.7*len(out_data))]
# output_test = out_data[int(0.7*len(out_data)):]
input_train, input_test, output_train, output_test = train_test_split(input_data, out_data, test_size=0.3, shuffle= True)
K.clear_session()
model = Sequential()
model.add(BatchNormalization(input_shape=(input_train.shape[1],)))
model.add(Dense(64, activation="selu"))
model.add(Dense(128, activation="selu"))
model.add(Dense(3, activation="selu"))
#model.compile(optimizer="RMSprop", loss="mean_squared_error")
model.compile(optimizer="adam", loss="mean_squared_error")
model.fit(input_train,output_train,epochs=200,batch_size=32)
calculated_cal = model.predict(input_train)
calculated_val = model.predict(input_test)
for i in range(len(columns)):
rmse_train = np.sqrt(mean_squared_error(output_train[:,i],calculated_cal[:,i]))
rmse_val = np.sqrt(mean_squared_error(output_test[:,i],calculated_val[:,i]))
R2_train=sklearn.metrics.r2_score(output_train[:,i],calculated_cal[:,i])
R2_val=sklearn.metrics.r2_score(output_test[:,i],calculated_val[:,i])
print("Overall:")
print(columns[i])
print(R2_train)
print(R2_val)
import pdb
pdb.set_trace()
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from datetime import datetime, timedelta
from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization, Conv2D
from keras import optimizers
from keras import backend as K
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os
import sys
df = pd.read_csv("../../data/LUCAS_full/LUCAS.csv",header=0)
columns = ["SOC","clay","CaCO3"]
out_data = df[columns].values
input_data = df[df.columns[4:]].values
def is_window(column):
without_x = column[1:]
x = int(without_x)
if x > 1350 and x < 1450:
print(x)
return False
if x > 1850 and x < 2050:
print(x)
return False
return True
input_data = df[df.columns[4:]]
#input_data = input_data[[c for c in input_data.columns if is_window(c)]]
input_data = input_data.values
#for idx, max_value in enumerate(max_values):
# input_data[idx] = input_data[idx] / max_value
# input_train = input_data[:int(0.7*len(input_data))]
# input_test = input_data[int(0.7*len(input_data)):]
# output_train = out_data[:int(0.7*len(out_data))]
# output_test = out_data[int(0.7*len(out_data)):]
input_train, input_test, output_train, output_test = train_test_split(input_data, out_data, test_size=0.3, shuffle= True)
reg = LinearRegression().fit(input_train, output_train)
reg_output = reg.predict(input_train)
ressiduals = reg_output - output_train
K.clear_session()
model = Sequential()
model.add(BatchNormalization(input_shape=(input_train.shape[1],)))
model.add(Dense(64, activation="selu"))
model.add(Dense(128, activation="selu"))
model.add(Dense(3, activation="selu"))
#model.compile(optimizer="RMSprop", loss="mean_squared_error")
model.compile(optimizer="adam", loss="mean_squared_error")
model.fit(input_train,ressiduals,epochs=200,batch_size=32)
calculated_cal = model.predict(input_train) + reg.predict(input_train)
calculated_val = model.predict(input_test) + reg.predict(input_test)
for i in range(len(columns)):
rmse_train = np.sqrt(mean_squared_error(output_train[:,i],calculated_cal[:,i]))
rmse_val = np.sqrt(mean_squared_error(output_test[:,i],calculated_val[:,i]))
R2_train=sklearn.metrics.r2_score(output_train[:,i],calculated_cal[:,i])
R2_val=sklearn.metrics.r2_score(output_test[:,i],calculated_val[:,i])
print("Overall:")
print(columns[i])
print(R2_train)
print(R2_val)
import pdb
pdb.set_trace()
Markdown is supported
0%
or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment