Neural_Network_with_linear_model.py 2.59 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92
#!/usr/bin/env python3
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import csv
from datetime import datetime, timedelta

from keras.models import Sequential
from keras.layers import Dense,Dropout,BatchNormalization, Conv2D
from keras import optimizers
from keras import backend as K

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
import sklearn
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import MinMaxScaler
import os 
import sys

df = pd.read_csv("../../data/LUCAS_full/LUCAS.csv",header=0)

columns = ["SOC","clay","CaCO3"]
out_data = df[columns].values
input_data = df[df.columns[4:]].values

def is_window(column):
    without_x = column[1:]
    x = int(without_x)
    if x > 1350 and x < 1450:
        print(x)
        return False
    if x > 1850 and x < 2050:
        print(x)
        return False
    return True

input_data = df[df.columns[4:]]
#input_data = input_data[[c for c in input_data.columns if is_window(c)]]

input_data = input_data.values

#for idx, max_value in enumerate(max_values):
#    input_data[idx] = input_data[idx] / max_value


# input_train = input_data[:int(0.7*len(input_data))]
# input_test = input_data[int(0.7*len(input_data)):]

# output_train = out_data[:int(0.7*len(out_data))]
# output_test = out_data[int(0.7*len(out_data)):]


input_train, input_test, output_train, output_test = train_test_split(input_data, out_data, test_size=0.3, shuffle= True)

reg = LinearRegression().fit(input_train, output_train)
reg_output = reg.predict(input_train)
ressiduals = reg_output - output_train

K.clear_session()


model = Sequential()
model.add(BatchNormalization(input_shape=(input_train.shape[1],)))
model.add(Dense(64, activation="selu"))
model.add(Dense(128, activation="selu"))
model.add(Dense(3, activation="selu"))


#model.compile(optimizer="RMSprop", loss="mean_squared_error")
model.compile(optimizer="adam", loss="mean_squared_error")

model.fit(input_train,ressiduals,epochs=200,batch_size=32)

calculated_cal = model.predict(input_train) + reg.predict(input_train)
calculated_val = model.predict(input_test) + reg.predict(input_test)

for i in range(len(columns)):
    rmse_train = np.sqrt(mean_squared_error(output_train[:,i],calculated_cal[:,i]))
    rmse_val = np.sqrt(mean_squared_error(output_test[:,i],calculated_val[:,i]))
    R2_train=sklearn.metrics.r2_score(output_train[:,i],calculated_cal[:,i])
    R2_val=sklearn.metrics.r2_score(output_test[:,i],calculated_val[:,i])



    print("Overall:")
    print(columns[i])
    print(R2_train)
    print(R2_val)

import pdb
pdb.set_trace()