Using machine learning to predict Rhine water levels | by Olivier Lejeune | Sep, 2020

0
18

A barge weighted down with coal navigating the Rhine (supply: https://commons.wikimedia.org/wiki/File:Coal_barge_Chilandia_on_Rhine_-_looking_south.jpg)
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib
# first, we import information from excel the usage of the read_excel serve as
df = pd.read_excel('RhineLSTM.xlsx')
# then, we set the date of the remark because the index
df.set_index('date', inplace=True)
df.head()
# specify columns to plot
columns = [0, 1, 2, 3, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14]
i = 1
values = df.values
# outline determine object and dimension
plt.determine(figsize=(9,40))
# plot every column with a for loop
for variable in columns:
plt.subplot(len(columns), 1, i)
plt.plot(values[:, variable])
plt.identify(df.columns[variable], y=0.5, loc='proper')
i += 1
plt.display()
# histograms of the variables
df.hist(figsize=(9,18))
plt.display()
# calculate dataset imply and usual deviation
imply = df.imply()
std = df.std()
# normalise dataset with prior to now calculated values
df_std = (df - imply) / std
# create violin plot
df_std = df_std.soften(var_name='Column', value_name='Normalised')
plt.determine(figsize=(12, 6))
ax = sns.violinplot(x='Column', y='Normalised', information=df_std)
_ = ax.set_xticklabels(df.keys(), rotation=90)
import datetime
import matplotlib as mpl
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import mean_squared_error
import joblib
# first, we import information from excel the usage of the read_excel serve as
df = pd.read_excel('RhineLSTM.xlsx', sheet_name='Detailed4_MAIN’)
# then, we set the date of the remark because the index
df.set_index('date', inplace=True)
# load dataset
values = df.values
# make sure all information is waft
values = values.astype('waft32')
# normalise every characteristic variable the usage of Scikit-Learn
scaler = StandardScaler()
scaled = scaler.fit_transform(values)
# save scaler for later use
joblib.unload(scaler, 'scaler.gz')
# specify the collection of lagged steps and contours
backward_steps = 7
n_features = df.form[1]
# convert collection to supervised learning
def series_to_supervised(information, n_in=1, n_out=1, dropnan=True):
n_vars = 1 if sort(information) is record else information.form[1]
df = pd.DataBody(information)
cols, names = record(), record()
# enter collection (t-n, ... t-1)
for i in vary(n_in, 0, -1):
cols.append(df.shift(i))
names += [('var%d(t-%d)' % (j+1, i)) for j in range(n_vars)]
# forecast collection (t, t+1, ... t+n)
for i in vary(0, n_out):
cols.append(df.shift(-i))
if i == 0:
names += [('var%d(t)' % (j+1)) for j in range(n_vars)]
else:
names += [('var%d(t+%d)' % (j+1, i)) for j in range(n_vars)]
# put all of it in combination
agg = pd.concat(cols, axis=1)
agg.columns = names
# drop rows with NaN values
if dropnan:
agg.dropna(inplace=True)
go back agg
# body as supervised learning
reframed = series_to_supervised(scaled, backward_steps, 1)
# cut up into teach and take a look at units
values = reframed.values
threshold = int(0.8 * len(reframed))
teach = values[:threshold, :]
take a look at = values[threshold:, :]
# cut up into enter and outputs
n_obs = backward_steps * n_features
train_X, train_y = teach[:, :n_obs], teach[:, -n_features]
test_X, test_y = take a look at[:, :n_obs], take a look at[:, -n_features]
print(train_X.form, len(train_X), train_y.form)
# reshape enter to be 3-D [samples, timesteps, features]
train_X = train_X.reshape((train_X.form[0], backward_steps, n_features))
test_X = test_X.reshape((test_X.form[0], backward_steps, n_features))
print(train_X.form, train_y.form, test_X.form, test_y.form)
# design community
type = tf.keras.fashions.Sequential()
type.upload(tf.keras.layers.LSTM(64, input_shape=(train_X.form[1], train_X.form[2])))
type.upload(tf.keras.layers.Dense(1))
type.bring together(loss='mae', optimizer='adam')
# outline early preventing parameter
callback = tf.keras.callbacks.EarlyStopping(track='loss', persistence=3)
# are compatible community
historical past = type.are compatible(train_X, train_y, epochs=25, callbacks=[callback], batch_size=72, validation_data=(test_X, test_y), verbose=2, shuffle=False)
# plot historical past
plt.determine(figsize=(12, 6))
plt.plot(historical past.historical past['loss'], label='teach')
plt.plot(historical past.historical past['val_loss'], label='take a look at')
plt.ylabel('imply absolute error [Kaub, normalised]')
plt.legend()
plt.display()
# make a prediction
yhat = type.predict(test_X)
test_X = test_X.reshape((test_X.form[0], backward_steps*n_features))
# invert scaling for forecast
inv_yhat = np.concatenate((yhat, test_X[:, -(n_features - 1):]), axis=1)
inv_yhat = scaler.inverse_transform(inv_yhat)
inv_yhat = inv_yhat[:,0]
# invert scaling for precise
test_y = test_y.reshape((len(test_y), 1))
inv_y = np.concatenate((test_y, test_X[:, -(n_features - 1):]), axis=1)
inv_y = scaler.inverse_transform(inv_y)
inv_y = inv_y[:,0]
# calculate RMSE
rmse = np.sqrt(mean_squared_error(inv_y, inv_yhat))
print('Test RMSE: %.3f' % rmse)

LEAVE A REPLY

Please enter your comment!
Please enter your name here