From c1bfb6d6f843b965d3d9bdc88a425cdea5a8197c Mon Sep 17 00:00:00 2001
From: quantylab <quantylab@gmail.com>
Date: Wed, 1 Apr 2020 12:17:12 +0900
Subject: [PATCH] =?UTF-8?q?v1.rich=20=EB=8D=B0=EC=9D=B4=ED=84=B0=20?=
 =?UTF-8?q?=EC=B6=94=EA=B0=80=20main=20=EB=B2=84=EA=B7=B8=20=ED=94=BD?=
 =?UTF-8?q?=EC=8A=A4?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 agent.py        |  5 +++--
 data_manager.py | 59 +++++++++++++++++++++++++++++++++++++++++--------
 main.py         | 23 +++++++++----------
 run_plaidml.cmd | 23 ++++++++++++-------
 run_tf_gpu.cmd  |  8 +++----
 settings.py     |  4 +---
 utils.py        |  5 +++--
 visualizer.py   |  2 +-
 8 files changed, 87 insertions(+), 42 deletions(-)

diff --git a/agent.py b/agent.py
index db1d4cb..6f0ed15 100644
--- a/agent.py
+++ b/agent.py
@@ -7,9 +7,10 @@ class Agent:
     STATE_DIM = 2  # 주식 보유 비율, 포트폴리오 가치 비율
 
     # 매매 수수료 및 세금
-    TRADING_CHARGE = 0.00015  # 거래 수수료 (일반적으로 0.015%)
-    TRADING_TAX = 0.0025  # 거래세 (실제 0.25%)
+    TRADING_CHARGE = 0.00015  # 거래 수수료 0.015%
+    # TRADING_CHARGE = 0.00011  # 거래 수수료 0.011%
     # TRADING_CHARGE = 0  # 거래 수수료 미적용
+    TRADING_TAX = 0.0025  # 거래세 0.25%
     # TRADING_TAX = 0  # 거래세 미적용
 
     # 행동
diff --git a/data_manager.py b/data_manager.py
index a32f94b..ebae605 100644
--- a/data_manager.py
+++ b/data_manager.py
@@ -14,6 +14,22 @@
     'close_ma120_ratio', 'volume_ma120_ratio',
 ]
 
+COLUMNS_TRAINING_DATA_V1_RICH = [
+    'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
+    'close_lastclose_ratio', 'volume_lastvolume_ratio',
+    'close_ma5_ratio', 'volume_ma5_ratio',
+    'close_ma10_ratio', 'volume_ma10_ratio',
+    'close_ma20_ratio', 'volume_ma20_ratio',
+    'close_ma60_ratio', 'volume_ma60_ratio',
+    'close_ma120_ratio', 'volume_ma120_ratio',
+    'inst_lastinst_ratio', 'frgn_lastfrgn_ratio',
+    'inst_ma5_ratio', 'frgn_ma5_ratio',
+    'inst_ma10_ratio', 'frgn_ma10_ratio',
+    'inst_ma20_ratio', 'frgn_ma20_ratio',
+    'inst_ma60_ratio', 'frgn_ma60_ratio',
+    'inst_ma120_ratio', 'frgn_ma120_ratio',
+]
+
 COLUMNS_TRAINING_DATA_V2 = [
     'per', 'pbr', 'roe',
     'open_lastclose_ratio', 'high_close_ratio', 'low_close_ratio',
@@ -30,13 +46,31 @@
 ]
 
 
-def preprocess(data):
+def preprocess(data, ver='v1'):
     windows = [5, 10, 20, 60, 120]
     for window in windows:
         data['close_ma{}'.format(window)] = \
             data['close'].rolling(window).mean()
         data['volume_ma{}'.format(window)] = \
             data['volume'].rolling(window).mean()
+        data['close_ma%d_ratio' % window] = \
+            (data['close'] - data['close_ma%d' % window]) \
+            / data['close_ma%d' % window]
+        data['volume_ma%d_ratio' % window] = \
+            (data['volume'] - data['volume_ma%d' % window]) \
+            / data['volume_ma%d' % window]
+            
+        if ver == 'v1.rich':
+            data['inst_ma{}'.format(window)] = \
+                data['close'].rolling(window).mean()
+            data['frgn_ma{}'.format(window)] = \
+                data['volume'].rolling(window).mean()
+            data['inst_ma%d_ratio' % window] = \
+                (data['close'] - data['inst_ma%d' % window]) \
+                / data['inst_ma%d' % window]
+            data['frgn_ma%d_ratio' % window] = \
+                (data['volume'] - data['frgn_ma%d' % window]) \
+                / data['frgn_ma%d' % window]
 
     data['open_lastclose_ratio'] = np.zeros(len(data))
     data.loc[1:, 'open_lastclose_ratio'] = \
@@ -59,14 +93,19 @@ def preprocess(data):
             .replace(to_replace=0, method='ffill') \
             .replace(to_replace=0, method='bfill').values
 
-    windows = [5, 10, 20, 60, 120]
-    for window in windows:
-        data['close_ma%d_ratio' % window] = \
-            (data['close'] - data['close_ma%d' % window]) \
-            / data['close_ma%d' % window]
-        data['volume_ma%d_ratio' % window] = \
-            (data['volume'] - data['volume_ma%d' % window]) \
-            / data['volume_ma%d' % window]
+    if ver == 'v1.rich':
+        data['inst_lastinst_ratio'] = np.zeros(len(data))
+        data.loc[1:, 'inst_lastinst_ratio'] = \
+            (data['inst'][1:].values - data['inst'][:-1].values) \
+            / data['inst'][:-1] \
+                .replace(to_replace=0, method='ffill') \
+                .replace(to_replace=0, method='bfill').values
+        data['frgn_lastfrgn_ratio'] = np.zeros(len(data))
+        data.loc[1:, 'frgn_lastfrgn_ratio'] = \
+            (data['frgn'][1:].values - data['frgn'][:-1].values) \
+            / data['frgn'][:-1] \
+                .replace(to_replace=0, method='ffill') \
+                .replace(to_replace=0, method='bfill').values
 
     return data
 
@@ -91,6 +130,8 @@ def load_data(fpath, date_from, date_to, ver='v2'):
     training_data = None
     if ver == 'v1':
         training_data = data[COLUMNS_TRAINING_DATA_V1]
+    elif ver == 'v1.rich':
+        training_data = data[COLUMNS_TRAINING_DATA_V1_RICH]
     elif ver == 'v2':
         data.loc[:, ['per', 'pbr', 'roe']] = \
             data[['per', 'pbr', 'roe']].apply(lambda x: x / 100)
diff --git a/main.py b/main.py
index 50f3fc4..c38dc04 100644
--- a/main.py
+++ b/main.py
@@ -45,7 +45,7 @@
     # 출력 경로 설정
     output_path = os.path.join(settings.BASE_DIR, 
         'output/{}_{}_{}'.format(args.output_name, args.rl_method, args.net))
-    if not os.path.exists(output_path):
+    if not os.path.isdir(output_path):
         os.makedirs(output_path)
 
     # 파라미터 기록
@@ -59,11 +59,12 @@
     file_handler.setLevel(logging.DEBUG)
     stream_handler.setLevel(logging.INFO)
     logging.basicConfig(format="%(message)s",
-                        handlers=[file_handler, stream_handler], level=logging.DEBUG)
+        handlers=[file_handler, stream_handler], level=logging.DEBUG)
         
     # 로그, Keras Backend 설정을 먼저하고 RLTrader 모듈들을 이후에 임포트해야 함
     from agent import Agent
-    from learners import DQNLearner, PolicyGradientLearner, ActorCriticLearner, A2CLearner, A3CLearner
+    from learners import DQNLearner, PolicyGradientLearner, \
+        ActorCriticLearner, A2CLearner, A3CLearner
 
     # 모델 경로 준비
     value_network_path = ''
@@ -83,6 +84,7 @@
             output_path, '{}_{}_policy_{}.h5'.format(
                 args.rl_method, args.net, args.output_name))
 
+    common_params = {}
     list_stock_code = []
     list_chart_data = []
     list_training_data = []
@@ -97,17 +99,14 @@
             args.start_date, args.end_date, ver=args.ver)
         
         # 최소/최대 투자 단위 설정
-        min_trading_unit = max(
-            int(100000 / chart_data.iloc[-1]['close']), 1)
-        max_trading_unit = max(
-            int(1000000 / chart_data.iloc[-1]['close']), 1)
+        min_trading_unit = max(int(100000 / chart_data.iloc[-1]['close']), 1)
+        max_trading_unit = max(int(1000000 / chart_data.iloc[-1]['close']), 1)
 
         # 공통 파라미터 설정
         common_params = {'rl_method': args.rl_method, 
             'delayed_reward_threshold': args.delayed_reward_threshold,
             'net': args.net, 'num_steps': args.num_steps, 'lr': args.lr,
-            'output_path': output_path, 
-            'reuse_models': args.reuse_models}
+            'output_path': output_path, 'reuse_models': args.reuse_models}
 
         # 강화학습 시작
         learner = None
@@ -124,13 +123,11 @@
                 learner = PolicyGradientLearner(**{**common_params, 
                 'policy_network_path': policy_network_path})
             elif args.rl_method == 'ac':
-                learner = ActorCriticLearner(**{
-                    **common_params, 
+                learner = ActorCriticLearner(**{**common_params, 
                     'value_network_path': value_network_path, 
                     'policy_network_path': policy_network_path})
             elif args.rl_method == 'a2c':
-                learner = A2CLearner(**{
-                    **common_params, 
+                learner = A2CLearner(**{**common_params, 
                     'value_network_path': value_network_path, 
                     'policy_network_path': policy_network_path})
             if learner is not None:
diff --git a/run_plaidml.cmd b/run_plaidml.cmd
index e5e715d..40015d2 100644
--- a/run_plaidml.cmd
+++ b/run_plaidml.cmd
@@ -1,8 +1,15 @@
-REM 현대차:005380 삼성전자:005930 NAVER:035420 한국전력:015760 LG화학:051910 셀트리온:068270
-
-python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name 005380 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
-python main.py --stock_code 005930 --rl_method a2c --net lstm --num_steps 5 --output_name 005930 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
-python main.py --stock_code 035420 --rl_method a2c --net lstm --num_steps 5 --output_name 035420 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
-python main.py --stock_code 015760 --rl_method a2c --net lstm --num_steps 5 --output_name 015760 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
-python main.py --stock_code 051910 --rl_method a2c --net lstm --num_steps 5 --output_name 051910 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
-python main.py --stock_code 068270 --rl_method a2c --net lstm --num_steps 5 --output_name 068270 --learning --num_epoches 100 --lr 0.001 --start_epsilon 0.9 --backend plaidml
+REM 현대차:005380 삼성전자:005930 한국전력:015760
+REM NAVER:035420 LG화학:051910 셀트리온:068270
+
+REM A2C
+python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name c_005380 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9 --backend plaidml
+python main.py --stock_code 005930 --rl_method a2c --net lstm --num_steps 5 --output_name c_005930 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9 --backend plaidml
+python main.py --stock_code 015760 --rl_method a2c --net lstm --num_steps 5 --output_name c_015760 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9 --backend plaidml
+
+REM A3C
+python main.py --stock_code 005380 005930 015760 --rl_method a3c --net lstm --num_steps 5 --output_name 005380_005930_015760 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9 --backend plaidml
+
+REM Testing
+python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name test_005380 --num_epoches 1 --start_epsilon 0 --start_date 20180101 --end_date 20181231 --reuse_models --value_network_name a2c_lstm_policy_b_005380 --policy_network_name a2c_lstm_value_b_005380 --backend plaidml
+
+
diff --git a/run_tf_gpu.cmd b/run_tf_gpu.cmd
index c12c18d..ede8038 100644
--- a/run_tf_gpu.cmd
+++ b/run_tf_gpu.cmd
@@ -1,13 +1,13 @@
-REM 현대차:005380 삼성전자:005930 한국전력:015760
-REM NAVER:035420 LG화학:051910 셀트리온:068270
+REM 삼성전자:005930 NAVER:035420 LG화학:051910 현대차:005380 셀트리온:068270 한국전력:015760
+REM  
 
 REM A2C
-python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name c_005380 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9
 python main.py --stock_code 005930 --rl_method a2c --net lstm --num_steps 5 --output_name c_005930 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9
+python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name c_005380 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9
 python main.py --stock_code 015760 --rl_method a2c --net lstm --num_steps 5 --output_name c_015760 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9
 
 REM A3C
-python main.py --stock_code 005380 005930 015760 --rl_method a3c --net lstm --num_steps 5 --output_name 005380_005930_015760 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9
+python main.py --stock_code 005930 005380 015760 --rl_method a3c --net lstm --num_steps 5 --learning --num_epoches 1000 --lr 0.001 --start_epsilon 1 --discount_factor 0.9 --output_name train --start_date 20170101 --end_date 20181231
 
 REM Testing
 python main.py --stock_code 005380 --rl_method a2c --net lstm --num_steps 5 --output_name test_005380 --num_epoches 1 --start_epsilon 0 --start_date 20180101 --end_date 20181231 --reuse_models --value_network_name a2c_lstm_policy_b_005380 --policy_network_name a2c_lstm_value_b_005380
\ No newline at end of file
diff --git a/settings.py b/settings.py
index a92295d..5634873 100644
--- a/settings.py
+++ b/settings.py
@@ -1,5 +1,3 @@
-import time
-import datetime
 import locale
 import os
 import platform
@@ -10,7 +8,7 @@
 
 
 # 로케일 설정
-if 'Darwin' in platform.system():
+if 'Linux' in platform.system() or 'Darwin' in platform.system():
     locale.setlocale(locale.LC_ALL, 'ko_KR.UTF-8')
 elif 'Windows' in platform.system():
     locale.setlocale(locale.LC_ALL, '')
diff --git a/utils.py b/utils.py
index 04c3a9c..6246688 100644
--- a/utils.py
+++ b/utils.py
@@ -3,13 +3,14 @@
 import numpy as np
 
 
-# Date Time Format
+# 날짜, 시간 관련 문자열 형식
 FORMAT_DATE = "%Y%m%d"
 FORMAT_DATETIME = "%Y%m%d%H%M%S"
 
 
 def get_today_str():
-    today = datetime.datetime.combine(datetime.date.today(), datetime.datetime.min.time())
+    today = datetime.datetime.combine(
+        datetime.date.today(), datetime.datetime.min.time())
     today_str = today.strftime('%Y%m%d')
     return today_str
 
diff --git a/visualizer.py b/visualizer.py
index 621480e..dc17e5c 100644
--- a/visualizer.py
+++ b/visualizer.py
@@ -1,7 +1,7 @@
 import threading
 import numpy as np
 import matplotlib.pyplot as plt
-# plt.switch_backend('agg')
+plt.switch_backend('agg')
 
 from mplfinance.original_flavor import candlestick_ohlc
 from agent import Agent