You are on page 1of 1

In [61]: import numpy as np # linear algebra

import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)


from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
from keras.models import model_from_json
from sklearn.cross_validation import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt
import math

In [62]: np.random.seed(7)

In [63]: msft_dataset = pd.read_csv('040618.csv')


msft_dataset.head()

Out[63]:
Local time Open High Low Close Volume

0 01.04.2018 00:00:00.000 1.232 1.232 1.232 1.232 0.0

1 01.04.2018 00:01:00.000 1.232 1.232 1.232 1.232 0.0

2 01.04.2018 00:02:00.000 1.232 1.232 1.232 1.232 0.0

3 01.04.2018 00:03:00.000 1.232 1.232 1.232 1.232 0.0

4 01.04.2018 00:04:00.000 1.232 1.232 1.232 1.232 0.0

In [64]: msft_dataset.dtypes

Out[64]: Local time object


Open float64
High float64
Low float64
Close float64
Volume float64
dtype: object

In [65]: msft_dataset['Close'] = pd.to_numeric(msft_dataset['Close'], downcast='float')

In [66]: msft_dataset.set_index('Local time',inplace=True)


msft_dataset.info()

<class 'pandas.core.frame.DataFrame'>
Index: 110880 entries, 01.04.2018 00:00:00.000 to 16.06.2018 23:59:00.000
Data columns (total 5 columns):
Open 110880 non-null float64
High 110880 non-null float64
Low 110880 non-null float64
Close 110880 non-null float32
Volume 110880 non-null float64
dtypes: float32(1), float64(4)
memory usage: 4.7+ MB

In [67]: plt.rcParams["figure.figsize"] = [80,32]


msft_close = msft_dataset['Close']
msft_close = msft_close.values.reshape(len(msft_close), 1)
plt.plot(msft_close)
plt.show()

In [68]: scaler = MinMaxScaler(feature_range=(0,1))

In [69]: msft_close = scaler.fit_transform(msft_close)


msft_close

Out[69]: array([[0.8963785 ],
[0.8963785 ],
[0.8963785 ],
...,
[0.10328674],
[0.10328674],
[0.10328674]], dtype=float32)

In [70]: train_size = int(len(msft_close)* 0.7)


test_size = len(msft_close) - train_size

msft_train, msft_test = msft_close[0:train_size, :], msft_close[train_size:len(msft_close), :]

In [71]: def create_ts(ds, series):


X, Y =[], []
for i in range(len(ds)-series - 1):
item = ds[i:(i+series), 0]
X.append(item)
Y.append(ds[i+series, 0])
return np.array(X), np.array(Y)

series = 60

trainX, trainY = create_ts(msft_train, series)


testX, testY = create_ts(msft_test, series)

In [72]: trainX

Out[72]: array([[0.8963785 , 0.8963785 , 0.8963785 , ..., 0.8963785 , 0.8963785 ,


0.8963785 ],
[0.8963785 , 0.8963785 , 0.8963785 , ..., 0.8963785 , 0.8963785 ,
0.8963785 ],
[0.8963785 , 0.8963785 , 0.8963785 , ..., 0.8963785 , 0.8963785 ,
0.8963785 ],
...,
[0.23800373, 0.23733807, 0.2378931 , ..., 0.23600483, 0.23733807,
0.23722649],
[0.23733807, 0.2378931 , 0.2378931 , ..., 0.23733807, 0.23722649,
0.2366724 ],
[0.2378931 , 0.2378931 , 0.2386713 , ..., 0.23722649, 0.2366724 ,
0.2366724 ]], dtype=float32)

In [73]: trainX[0]

Out[73]: array([0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,


0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785,
0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785, 0.8963785],
dtype=float32)

In [74]: trainX[7517]

Out[74]: array([0.819746 , 0.81919 , 0.8169699 , 0.8175249 , 0.8169699 ,


0.8169699 , 0.81652546, 0.8157482 , 0.81585884, 0.8125267 ,
0.81152725, 0.81075 , 0.8114157 , 0.8108616 , 0.8114157 ,
0.80997276, 0.8105278 , 0.81152725, 0.8103056 , 0.81163883,
0.8121929 , 0.8126383 , 0.8123045 , 0.8126383 , 0.8126383 ,
0.8127489 , 0.81186104, 0.81119347, 0.81152725, 0.81163883,
0.81152725, 0.81208324, 0.8140812 , 0.81452656, 0.81530285,
0.81474876, 0.81474876, 0.814415 , 0.8137493 , 0.8126383 ,
0.8103056 , 0.80986214, 0.80975056, 0.80652905, 0.8085289 ,
0.8084173 , 0.80708504, 0.8075285 , 0.80886173, 0.80919456,
0.80930614, 0.80808544, 0.80830574, 0.80797386, 0.80819607,
0.8075285 , 0.80775166, 0.80797386, 0.80808544, 0.80808544],
dtype=float32)

In [75]: #reshape into LSTM format - samples, steps, features


trainX = np.reshape(trainX, (trainX.shape[0], trainX.shape[1], 1))
testX = np.reshape(testX, (testX.shape[0], testX.shape[1], 1))

In [77]: #build the model


model = Sequential()
model.add(LSTM(4, input_shape=(series, 1)))
model.add(Dense(1))
model.compile(loss='mse', optimizer='adam')
#fit the model
model.fit(trainX, trainY, epochs=1, batch_size=60)

Epoch 1/1
77555/77555 [==============================] - 43s - loss: 0.0072

Out[77]: <keras.callbacks.History at 0x200a9359cf8>

In [78]: #test this model out


trainPredictions = model.predict(trainX)
testPredictions = model.predict(testX)
#unscale predictions
trainPredictions = scaler.inverse_transform(trainPredictions)
testPredictions = scaler.inverse_transform(testPredictions)
trainY = scaler.inverse_transform([trainY])
testY = scaler.inverse_transform([testY])

In [79]: #lets calculate the root mean squared error


trainScore = math.sqrt(mean_squared_error(trainY[0], trainPredictions[:, 0]))
testScore = math.sqrt(mean_squared_error(testY[0], testPredictions[:, 0]))
print('Train score: %.2f rmse', trainScore)
print('Test score: %.2f rmse', testScore)

Train score: %.2f rmse 0.00042021747565900846


Test score: %.2f rmse 0.0015036085035382669

In [80]: #lets plot the predictions on a graph and see how well it did
train_plot = np.empty_like(msft_close)
train_plot[:,:] = np.nan
train_plot[series:len(trainPredictions)+series, :] = trainPredictions

test_plot = np.empty_like(msft_close)
test_plot[:,:] = np.nan
test_plot[len(trainPredictions)+(series*2)+1:len(msft_close)-1, :] = testPredictions

#plot on graph
plt.rcParams["figure.figsize"] = [80,32]
plt.plot(scaler.inverse_transform(msft_close))
plt.plot(train_plot)
plt.plot(test_plot)

Out[80]: [<matplotlib.lines.Line2D at 0x200aa9d20f0>]

In [56]: model_json = model.to_json()


with open("model.json", "w") as json_file:
json_file.write(model_json)

In [57]:

You might also like