Sunday, April 8, 2018

Prediction of stockprice using trained weight

This is just a project for a practice. This is predicting based only on previous 10 closing prices. It is highly possible that the information (columns of dataset) to predict is not enough and the prediction is not precise.

1. Prediction of stockprice (Training a model)
2. Prediction of stockprice (Prediction by CSV)
3. Prediction of stockprice with trained weight with Google API

Save the code below as "predict.py".
And use this code to predict the stockprice like this (You need 10 rows of data in CSV. This predicts 11th data.) :
$ python3.5 predict.py location/of/your_csv.csv

The code:
#-*- coding: utf-8 -*-
import numpy
import pandas
import matplotlib.pyplot as plt
from decimal import *
import sys
from keras.models import load_model
from sklearn import preprocessing
from sklearn.preprocessing import StandardScaler
from keras.models import Sequential
from keras.models import model_from_json
from keras.layers.core import Dense, Activation
from keras.layers.recurrent import LSTM
import keras.backend.tensorflow_backend as KTF
import os.path
from datetime import datetime, timedelta

class Prediction :

  def load_data(self, data, scaler):
    # Data of closing price
    scaler.fit(data[['Close']])
    price_data = data[['Close']]
    data['Close'] = scaler.transform(data[['Close']])
    data = data.sort_values(by='Date')
    data = data.reset_index(drop=True)
    data = data.loc[:, ['Date', 'Close']]

    X, Y = [], []
    Dates = []
    Prices = []
    close_data = data[['Close']]
    date_data = data[['Date']]
    for i in range(len(close_data) - 10):
      if i+11 < len(close_data):
        Dates.append(date_data.iloc[[i+11]].iloc[0]['Date'])
        Prices.append(price_data.iloc[[i+11]].iloc[0]['Close'])
      else:
        Dates.append(date_data.iloc[[i+10]].iloc[0]['Date']+timedelta(days=1))
        Prices.append('Not applicable.')
      X.append(close_data.iloc[i:(i+10)].as_matrix())
      Y.append(close_data.iloc[i+10].as_matrix())
    retX = numpy.array(X)
    retY = numpy.array(Y)
    return retX, retY, Dates, Prices


  def create_model(self, f_model, model_filename):
    print(os.path.join(f_model,model_filename))
    if os.path.isfile(os.path.join(f_model,model_filename)):
      print('Saved parameters found. I will use this file...')
      model = load_model(os.path.join(f_model,model_filename))
    else:
      print('Saved parameters weren\'t found')
      return
    return model

if __name__ == "__main__":

  f_log = './'
  f_model = './'
  model_filename = 'stockprice_model.hdf5'

  prediction = Prediction()

  # Data
  data = None
  try:
    csv_loc = str(sys.argv[1])
  except NameError:
    print("Please give a location of the csv file.")
  if(csv_loc == ""):
    print("Please give a location of the csv file.")
  print(csv_loc)
  data = pandas.read_csv(csv_loc)
  data = data.drop('Volume',axis=1)
  data = data.drop('Adj Close',axis=1)

  data.columns = ['Date', 'Open', 'High', 'Low', 'Close']
  data['Date'] = pandas.to_datetime(data['Date'])
  print(data)
  scaler = StandardScaler()
  x_test, y_test,  Dates, Prices = prediction.load_data(data, scaler)

  model = prediction.create_model(f_model, model_filename)

  predicted = model.predict(x_test, verbose=1)
  FalseResult = 0
  TrueResult = 0
  for idx,p in enumerate(predicted):
    print('Date:' + str(Dates[idx].year) + '/' + str(Dates[idx].month) + '/' + str(Dates[idx].day) + ', Closing price (Predicted): '+ str(float(scaler.inverse_transform(p))))
    print('Date:' + str(Dates[idx].year) + '/' + str(Dates[idx].month) + '/' + str(Dates[idx].day) + ', Closing price (Actual): '+ str(Prices[idx]))
    dif1 = 0
    dif2 = 0
    dif3 = 0
    was_high_low_correct = False
    if idx > 0 and not isinstance(Prices[idx], str) :
      dif1 = float(scaler.inverse_transform(p)) - float(Prices[idx])
      dif2 = float(Prices[idx-1]) - float(Prices[idx])
      dif3 = float(Prices[idx-1]) - float(scaler.inverse_transform(p))
      if (dif2 < 0 and dif3 < 0) or (dif2 > 0 and dif3 > 0) or (dif2 == 0 and dif3 == 0):
        was_high_low_correct = True
    else:
      dif1 = 'Not applicable.'
      dif2 = 'Not applicable.'
      dif3 = 'Not applicable.'
      was_high_low_correct = 'Not applicable.'
    print('Difference between actual and previous price    :' + str(dif2))
    print('Difference between predicted and previous price :' + str(dif3))
    print('Prediction of high and low was correct?         : ' + str(was_high_low_correct))
    print('Difference between predicted and actual price   : ' + str(dif1))
    print('')
    if was_high_low_correct :
      TrueResult = TrueResult + 1
    else:
      FalseResult = FalseResult + 1
  print('Num of True: ' + str(TrueResult))
  print('Num of False: ' + str(FalseResult))
  print('Rate of true: ' + str((TrueResult/(FalseResult+TrueResult))*100) + '%')
  result = pandas.DataFrame(scaler.inverse_transform(predicted))
  result.columns = ['predict']
  result['actual'] = scaler.inverse_transform(y_test)
  result.plot()
  plt.show()

For the csv, download it from Yahoo finance. For S&P for example, you can download it from here:
https://finance.yahoo.com/quote/%5EGSPC/history?period1=1533394800&period2=1534604400&interval=1d&filter=history&frequency=1d