티스토리 뷰

LSTM(Long Short-Term Memory)을 사용하여 구글 주가를 예측하는 파이썬 코드를 소개합니다. 코드는 다음과 같은 라이브러리를 사용합니다: numpy, pandas, keras, matplotlib, pandas_datareader, yfinance.

import math
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt
import pandas_datareader as web
import yfinance as yf

먼저, yfinance 라이브러리를 이용하여 구글 주가 데이터를 다운로드합니다.

df = yf.download('GOOG', start='2015-01-03', end='2022-04-10')

종가 데이터만을 사용하여 데이터셋을 생성합니다.

data = df.filter(['Close'])
dataset = data.values
LOOKBACK = 60
training_data_len = math.ceil(len(dataset) * 0.8)

주가 데이터를 시각화하여 전반적인 패턴을 확인할 수 있습니다.

fig, ax = plt.subplots(figsize=(10, 5))
ax = plt.plot(df['Close'])
plt.xticks(rotation=45)
plt.xlabel('date')
plt.ylabel('close price')

MinMaxScaler를 사용하여 데이터를 정규화합니다. 이는 모델이 더 빠르게 수렴할 수 있도록 돕습니다.

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

다음으로, 훈련 데이터셋을 생성합니다.

train_data = scaled_data[0:training_data_len, :]
x_train = []
y_train = []

for i in range(LOOKBACK, training_data_len):
    x_train.append(train_data[i - LOOKBACK:i, 0])
    y_train.append(train_data[i, 0])

x_train, y_train = np.array(x_train), np.array(y_train)
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))

이제, Keras를 사용하여 LSTM 모델을 구축하고 훈련시킵니다.

model = Sequential()
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
model.add(LSTM(50, return_sequences=False))
model.add(Dense(25))
model.add(Dense(1))

model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=1, epochs=3)

테스트 데이터를 준비하고 모델을 사용하여 예측합니다.

test_data = scaled_data[training_data_len - LOOKBACK:, :]
test_data_len = len(test_data)

x_test = []
y_test = dataset[training_data_len:, :]

for i in range(LOOKBACK, test_data_len):
    x_test.append(test_data[i - LOOKBACK:i, 0])

x_test = np.array(x_test)
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

prediction = model.predict(x_test)
prediction = scaler.inverse_transform(prediction)

예측 성능을 평가하기 위해 RMSE(Root Mean Squared Error)를 계산합니다.

rmse = np.sqrt(np.mean(prediction - y_test) ** 2)
print(f"RMSE {rmse}")

평균 RMSE 2.288983996448621는 예측주가와 실제 주가가 평균적으로 2.28 차이가 난다는 뜻입니다.

예측성능이 매우 안좋습니다.

hidden parameter조정, 입력 데이터 조정 등을 통해서 예측력을 올릴수 있습니다. 

마지막으로, 실제 주가 데이터와 예측 결과를 시각화하여 비교합니다.

train = data[:training_data_len].copy()
valid = data[training_data_len:].copy()
valid['prediction'] = prediction

plt.figure(figsize=(15, 6))
plt.title('Google prediction')
plt.xlabel('date')
plt.ylabel('close price')
plt.plot(train['Close'], color='black')
plt.plot(valid['Close'], color='black', linestyle='dashed')
plt.plot(valid['prediction'], color='r')

plt.legend(['train', 'validation', 'prediction'], loc='lower right')
plt.show()

 

모든 코드를 첨부합니다. 

import math
import numpy as np
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from keras.models import Sequential
from keras.layers import Dense, LSTM
import matplotlib.pyplot as plt

import pandas_datareader as web
import yfinance as yf

df = yf.download('GOOG', start='2015-01-03', end='2022-04-10')

data = df.filter(['Close'])
# df_indexed = pd.read_csv('dataset/GOOG-year.csv', index_col=0, parse_dates=True)
# df3 = pd.read_csv('dataset/GOOG-year.csv')

dataset = data.values
LOOKBACK = 60

training_data_len = math.ceil(len(dataset) * 0.8)

fig, ax = plt.subplots(figsize=(10, 5))
ax = plt.plot(df['Close'])

plt.xticks(rotation=45)
plt.xlabel('date')
plt.ylabel('close price')

# plt.show()

scaler = MinMaxScaler(feature_range=(0, 1))
scaled_data = scaler.fit_transform(dataset)

train_data = scaled_data[0:training_data_len, :]
x_train = []
y_train = []
# construct training data
for i in range(LOOKBACK, training_data_len):
    x_train.append(train_data[i - LOOKBACK:i, 0])
    y_train.append(train_data[i, 0])
    if i == LOOKBACK:
        print(x_train)
        print(y_train)

# convert list in to numpy array
x_train, y_train = np.array(x_train), np.array(y_train)
print(x_train.shape)
# reshape numpy array into 3 dimensional for keras input
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], 1))
print(x_train.shape)

# Model training
model = Sequential()
# first layer
model.add(LSTM(50, return_sequences=True, input_shape=(x_train.shape[1], 1)))
# second layer
model.add(LSTM(50, return_sequences=False))

# add Dense layer to model with 25 neurons
model.add(Dense(25))
# add Dense layer to model with 1 neurons
model.add(Dense(1))

# complie the model
model.compile(optimizer='adam', loss='mean_squared_error')
model.fit(x_train, y_train, batch_size=1, epochs=3)

# test

test_data = scaled_data[training_data_len - LOOKBACK:, :]
test_data_len = len(test_data)

x_test = []
y_test = dataset[training_data_len:, :]

for i in range(LOOKBACK, test_data_len):
    x_test.append(test_data[i - LOOKBACK:i, 0])

# reshape for LSTM input
x_test = np.array(x_test)

x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], 1))

prediction = model.predict(x_test)
prediction = scaler.inverse_transform(prediction)

rmse = np.sqrt(np.mean(prediction - y_test) ** 2)
print(f"RMSE {rmse}")

# plot prediction with real

train = data[:training_data_len].copy()
valid = data[training_data_len:].copy()
valid['prediction'] = prediction

plt.figure(figsize=(15, 6))
plt.title('Google prediction')
plt.xlabel('date')
plt.ylabel('close price')
plt.plot(train['Close'], color='black')
plt.plot(valid['Close'], color='black', linestyle='dashed')
plt.plot(valid['prediction'], color='r')

plt.legend(['train', 'validation', 'prediction'], loc='lower right')
plt.show()
728x90
반응형
댓글
250x250
최근에 올라온 글
최근에 달린 댓글
Total
Today
Yesterday
링크
«   2025/07   »
1 2 3 4 5
6 7 8 9 10 11 12
13 14 15 16 17 18 19
20 21 22 23 24 25 26
27 28 29 30 31
글 보관함
공지사항