Saturday, April 14, 2018

Prediction of stockprice (training a model)

This is just a project for a practice to learn DeepLearning. This is predicting based only on previous 10 closing prices. It is highly possible that the information (columns of dataset) to predict is not enough and the prediction is not precise.

1. Prediction of stockprice (Training a model)
2. Prediction of stockprice (Prediction by CSV)
3. Prediction of stockprice with trained weight with Google API



Save the code shown below as stockprice.py and use it this way:
$ python3.5 stockprice.py location/of/your_csv.csv
You can specify a file or a folder path. If it is a file, it trains the model based on the single csv file. If it is a folder, it puts together all the csv files in the folder, and train the model based on the csv files.
You can download csv of stock price from Yahoo finance.

The code:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt
import sys
from pathlib import Path
from keras.models import load_model
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.noise import AlphaDropout
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path

class Prediction :

  def __init__(self):
    self.length_of_sequences = 10
    self.in_out_neurons = 1
    self.hidden_neurons = 600


  def load_data(self, data, n_prev=10):
    X, Y = [], []
    for i in range(len(data) - n_prev):
      X.append(data.iloc[i:(i+n_prev)].as_matrix())
      Y.append(data.iloc[i+n_prev].as_matrix())
    retX = numpy.array(X)
    retY = numpy.array(Y)
    return retX, retY


  def create_model(self, f_model, model_filename) :
    print(os.path.join(f_model,model_filename))
    if os.path.isfile(os.path.join(f_model,model_filename)):
      print('Saved parameters found. I will use this file...')
      model = load_model(os.path.join(f_model,model_filename))
    else:
      print('Saved parameters Not found. Creating new one...')
      model = Sequential()
      model.add(LSTM(self.hidden_neurons, \
              batch_input_shape=(None, self.length_of_sequences, self.in_out_neurons), \
              return_sequences=False))
      model.add(Dense(self.in_out_neurons))
      model.add(Activation("linear"))
      model.compile(loss="mape", optimizer="adam")
    return model

  def train(self, f_model, model_filename, X_train, y_train) :
    model = self.create_model(f_model, model_filename)
    # Learn
    model.fit(X_train, y_train, batch_size=10, epochs=20)
    return model


if __name__ == "__main__":

  f_log = './'
  f_model = './'
  model_filename = 'stockprice_model.hdf5'

  prediction = Prediction()

  # Data
  data = None
  try:
    csv_loc = str(sys.argv[1])
  except NameError:
    print("Please give a location of the csv file.")
  if(csv_loc == ""):
    print("Please give a location of the csv file.")
  print(csv_loc)
  if(os.path.isfile(csv_loc)):
    data_ = pandas.read_csv(csv_loc)
    data = data_ if (data is None) else pandas.concat([data, data_])
  elif(os.path.isdir(csv_loc)):
    pathlist = Path(csv_loc).glob('**/*.csv')
    for path in pathlist:
      # because path is object not string
      path_in_str = str(path)
      data_ = pandas.read_csv(path_in_str)
      data = data_ if (data is None) else pandas.concat([data, data_])
  else:
    print("This is not a file nor a folder.")

  data = pandas.read_csv(csv_loc)
  data = data.drop('Volume',axis=1)
  data = data.drop('Adj Close',axis=1)

  data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
  data['Date'] = pandas.to_datetime(data['Date'], format='%Y-%m-%d')
  # Data of closing price
  scaler = StandardScaler()
  scaler.fit(data[['Close']])
  data['Close'] = scaler.transform(data[['Close']])
  data = data.sort_values(by='Date')
  data = data.reset_index(drop=True)
  data = data.loc[:, ['Date', 'Close']]

  # 10% of the data is used as test data.
  split_pos = int(len(data) * 0.9)
  x_train, y_train = prediction.load_data(data[['Close']].iloc[0:split_pos], prediction.length_of_sequences)
  x_test,  y_test  = prediction.load_data(data[['Close']].iloc[split_pos:], prediction.length_of_sequences)

  model = prediction.train(f_model, model_filename, x_train, y_train)

  predicted = model.predict(x_test)
  print('save weights')
  model.save(os.path.join(f_model,model_filename))
  result = pandas.DataFrame(scaler.inverse_transform(predicted))
  result.columns = ['predict']
  result['actual'] = scaler.inverse_transform(y_test)
  result.plot()
  plt.show()