diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..5c98b42 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/StockAnalysisInPython.iml b/.idea/StockAnalysisInPython.iml new file mode 100644 index 0000000..78f8be5 --- /dev/null +++ b/.idea/StockAnalysisInPython.iml @@ -0,0 +1,11 @@ + + + + + + + + + + \ No newline at end of file diff --git a/.idea/inspectionProfiles/profiles_settings.xml b/.idea/inspectionProfiles/profiles_settings.xml new file mode 100644 index 0000000..105ce2d --- /dev/null +++ b/.idea/inspectionProfiles/profiles_settings.xml @@ -0,0 +1,6 @@ + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..ae7c9ee --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,7 @@ + + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..3f2e96a --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/other.xml b/.idea/other.xml new file mode 100644 index 0000000..640fd80 --- /dev/null +++ b/.idea/other.xml @@ -0,0 +1,7 @@ + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/01_Stock_Investment/Investar/Analyzer.py b/01_Stock_Investment/Investar/Analyzer.py new file mode 100644 index 0000000..1aa7ef1 --- /dev/null +++ b/01_Stock_Investment/Investar/Analyzer.py @@ -0,0 +1,93 @@ +import pandas as pd +import pymysql +from datetime import datetime +from datetime import timedelta +import re + +class MarketDB: + def __init__(self): + """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성""" + self.conn = pymysql.connect(host='localhost', user='root', + password='snake.land.', db='INVESTAR', charset='utf8') + self.codes = {} + self.get_comp_info() + + def __del__(self): + """소멸자: MariaDB 연결 해제""" + self.conn.close() + + def get_comp_info(self): + """company_info 테이블에서 읽어와서 codes에 저장""" + sql = "SELECT * FROM company_info" + krx = pd.read_sql(sql, self.conn) + for idx in range(len(krx)): + self.codes[krx['code'].values[idx]] = krx['company'].values[idx] + + def get_daily_price(self, code, start_date=None, end_date=None): + """KRX 종목의 일별 시세를 데이터프레임 형태로 반환 + - code : KRX 종목코드('005930') 또는 상장기업명('삼성전자') + - start_date : 조회 시작일('2020-01-01'), 미입력 시 1년 전 오늘 + - end_date : 조회 종료일('2020-12-31'), 미입력 시 오늘 날짜 + """ + if start_date is None: + one_year_ago = datetime.today() - timedelta(days=365) + start_date = one_year_ago.strftime('%Y-%m-%d') + print("start_date is initialized to '{}'".format(start_date)) + else: + start_lst = re.split('\D+', start_date) + if start_lst[0] == '': + start_lst = start_lst[1:] + start_year = int(start_lst[0]) + start_month = int(start_lst[1]) + start_day = int(start_lst[2]) + if start_year < 1900 or start_year > 2200: + print(f"ValueError: start_year({start_year:d}) is wrong.") + return + if start_month < 1 or start_month > 12: + print(f"ValueError: start_month({start_month:d}) is wrong.") + return + if start_day < 1 or start_day > 31: + print(f"ValueError: start_day({start_day:d}) is wrong.") + return + start_date=f"{start_year:04d}-{start_month:02d}-{start_day:02d}" + + if end_date is None: + end_date = datetime.today().strftime('%Y-%m-%d') + print("end_date is initialized to '{}'".format(end_date)) + else: + end_lst = re.split('\D+', end_date) + if end_lst[0] == '': + end_lst = end_lst[1:] + end_year = int(end_lst[0]) + end_month = int(end_lst[1]) + end_day = int(end_lst[2]) + if end_year < 1800 or end_year > 2200: + print(f"ValueError: end_year({end_year:d}) is wrong.") + return + if end_month < 1 or end_month > 12: + print(f"ValueError: end_month({end_month:d}) is wrong.") + return + if end_day < 1 or end_day > 31: + print(f"ValueError: end_day({end_day:d}) is wrong.") + return + end_date = f"{end_year:04d}-{end_month:02d}-{end_day:02d}" + + codes_keys = list(self.codes.keys()) + codes_values = list(self.codes.values()) + + if code in codes_keys: + pass + elif code in codes_values: + idx = codes_values.index(code) + code = codes_keys[idx] + else: + print(f"ValueError: Code({code}) doesn't exist.") + sql = f"SELECT * FROM daily_price WHERE code = '{code}'"\ + f" and date >= '{start_date}' and date <= '{end_date}'" + df = pd.read_sql(sql, self.conn) + df.index = df['date'] + return df + + + + diff --git a/01_Stock_Investment/Investar/DBUpdater.py b/01_Stock_Investment/Investar/DBUpdater.py new file mode 100644 index 0000000..3e1e204 --- /dev/null +++ b/01_Stock_Investment/Investar/DBUpdater.py @@ -0,0 +1,170 @@ + +import pandas as pd +from bs4 import BeautifulSoup +import urllib, pymysql, calendar, time, json +from urllib.request import urlopen +from datetime import datetime +from threading import Timer + +class DBUpdater: + def __init__(self): + """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성""" + self.conn = pymysql.connect(host='localhost', user='root', + password='snake.land.', db='INVESTAR', charset='utf8') + + with self.conn.cursor() as curs: + sql = """ + CREATE TABLE IF NOT EXISTS company_info ( + code VARCHAR(20), + company VARCHAR(40), + last_update DATE, + PRIMARY KEY (code)) + """ + curs.execute(sql) + sql = """ + CREATE TABLE IF NOT EXISTS daily_price ( + code VARCHAR(20), + date DATE, + open BIGINT(20), + high BIGINT(20), + low BIGINT(20), + close BIGINT(20), + diff BIGINT(20), + volume BIGINT(20), + PRIMARY KEY (code, date)) + """ + curs.execute(sql) + self.conn.commit() + self.codes = dict() + + def __del__(self): + """소멸자: MariaDB 연결 해제""" + self.conn.close() + + def read_krx_code(self): + """KRX로부터 상장기업 목록 파일을 읽어와서 데이터프레임으로 반환""" + url = 'http://kind.krx.co.kr/corpgeneral/corpList.do?method='\ + 'download&searchType=13' + krx = pd.read_html(url, header=0)[0] + krx = krx[['종목코드', '회사명']] + krx = krx.rename(columns={'종목코드': 'code', '회사명': 'company'}) + krx.code = krx.code.map('{:06d}'.format) + return krx + + def update_comp_info(self): + """종목코드를 company_info 테이블에 업데이트 한 후 딕셔너리에 저장""" + sql = "SELECT * FROM company_info" + df = pd.read_sql(sql, self.conn) + for idx in range(len(df)): + self.codes[df['code'].values[idx]] = df['company'].values[idx] + + with self.conn.cursor() as curs: + sql = "SELECT max(last_update) FROM company_info" + curs.execute(sql) + rs = curs.fetchone() + today = datetime.today().strftime('%Y-%m-%d') + if rs[0] == None or rs[0].strftime('%Y-%m-%d') < today: + krx = self.read_krx_code() + for idx in range(len(krx)): + code = krx.code.values[idx] + company = krx.company.values[idx] + sql = f"REPLACE INTO company_info (code, company, last"\ + f"_update) VALUES ('{code}', '{company}', '{today}')" + curs.execute(sql) + self.codes[code] = company + tmnow = datetime.now().strftime('%Y-%m-%d %H:%M') + print(f"[{tmnow}] #{idx+1:04d} REPLACE INTO company_info "\ + f"VALUES ({code}, {company}, {today})") + self.conn.commit() + print('') + + def read_naver(self, code, company, pages_to_fetch): + """네이버에서 주식 시세를 읽어서 데이터프레임으로 반환""" + try: + url = f"http://finance.naver.com/item/sise_day.nhn?code={code}" + with urlopen(url) as doc: + if doc is None: + return None + html = BeautifulSoup(doc, "lxml") + pgrr = html.find("td", class_="pgRR") + if pgrr is None: + return None + s = str(pgrr.a["href"]).split('=') + lastpage = s[-1] + df = pd.DataFrame() + pages = min(int(lastpage), pages_to_fetch) + for page in range(1, pages + 1): + pg_url = '{}&page={}'.format(url, page) + df = df.append(pd.read_html(pg_url, header=0)[0]) + tmnow = datetime.now().strftime('%Y-%m-%d %H:%M') + print('[{}] {} ({}) : {:04d}/{:04d} pages are downloading...'. + format(tmnow, company, code, page, pages), end="\r") + df = df.rename(columns={'날짜':'date','종가':'close','전일비':'diff' + ,'시가':'open','고가':'high','저가':'low','거래량':'volume'}) + df['date'] = df['date'].replace('.', '-') + df = df.dropna() + df[['close', 'diff', 'open', 'high', 'low', 'volume']] = df[['close', + 'diff', 'open', 'high', 'low', 'volume']].astype(int) + df = df[['date', 'open', 'high', 'low', 'close', 'diff', 'volume']] + except Exception as e: + print('Exception occured :', str(e)) + return None + return df + + def replace_into_db(self, df, num, code, company): + """네이버에서 읽어온 주식 시세를 DB에 REPLACE""" + with self.conn.cursor() as curs: + for r in df.itertuples(): + sql = f"REPLACE INTO daily_price VALUES ('{code}', "\ + f"'{r.date}', {r.open}, {r.high}, {r.low}, {r.close}, "\ + f"{r.diff}, {r.volume})" + curs.execute(sql) + self.conn.commit() + print('[{}] #{:04d} {} ({}) : {} rows > REPLACE INTO daily_'\ + 'price [OK]'.format(datetime.now().strftime('%Y-%m-%d'\ + ' %H:%M'), num+1, company, code, len(df))) + + def update_daily_price(self, pages_to_fetch): + """KRX 상장법인의 주식 시세를 네이버로부터 읽어서 DB에 업데이트""" + for idx, code in enumerate(self.codes): + df = self.read_naver(code, self.codes[code], pages_to_fetch) + if df is None: + continue + self.replace_into_db(df, idx, code, self.codes[code]) + + def execute_daily(self): + """실행 즉시 및 매일 오후 다섯시에 daily_price 테이블 업데이트""" + self.update_comp_info() + + try: + with open('config.json', 'r') as in_file: + config = json.load(in_file) + pages_to_fetch = config['pages_to_fetch'] + except FileNotFoundError: + with open('config.json', 'w') as out_file: + pages_to_fetch = 100 + config = {'pages_to_fetch': 1} + json.dump(config, out_file) + self.update_daily_price(pages_to_fetch) + + tmnow = datetime.now() + lastday = calendar.monthrange(tmnow.year, tmnow.month)[1] + if tmnow.month == 12 and tmnow.day == lastday: + tmnext = tmnow.replace(year=tmnow.year+1, month=1, day=1, + hour=17, minute=0, second=0) + elif tmnow.day == lastday: + tmnext = tmnow.replace(month=tmnow.month+1, day=1, hour=17, + minute=0, second=0) + else: + tmnext = tmnow.replace(day=tmnow.day+1, hour=17, minute=0, + second=0) + tmdiff = tmnext - tmnow + secs = tmdiff.seconds + t = Timer(secs, self.execute_daily) + print("Waiting for next update ({}) ... ".format(tmnext.strftime + ('%Y-%m-%d %H:%M'))) + t.start() + +if __name__ == '__main__': + dbu = DBUpdater() + dbu.execute_daily() diff --git a/01_Stock_Investment/Investar/MarketDB.py b/01_Stock_Investment/Investar/MarketDB.py new file mode 100644 index 0000000..51966df --- /dev/null +++ b/01_Stock_Investment/Investar/MarketDB.py @@ -0,0 +1,38 @@ +import pandas as pd +#from bs4 import BeautifulSoup +#import urllib +#from urllib.request import urlopen +import pymysql +#import time +#import pandas.io.sql as sql +from datetime import datetime +#from threading import Timer +#import matplotlib.pyplot as plt + +class MarketDB: + def __init__(self): + """생성자: MariaDB 연결 및 종목코드 딕셔너리 생성""" + self.conn = pymysql.connect(host='localhost', user='root', password='snake.land.', db='INVESTAR', charset='utf8') + self.codes = dict() + self.getCompanyInfo() + + def __del__(self): + """소멸자: MariaDB 연결 해제""" + self.conn.close() + + def getCompanyInfo(self): + """company_info 테이블에서 읽어와서 companyData와 codes에 저장""" + sql = "SELECT * FROM company_info" + companyInfo = pd.read_sql(sql, self.conn) + for idx in range(len(companyInfo)): + self.codes[companyInfo['code'].values[idx]] = companyInfo['company'].values[idx] + + def getDailyPrice(self, code, startDate, endDate): + """daily_price 테이블에서 읽어와서 데이터프레임으로 반환""" + sql = "SELECT * FROM daily_price WHERE code = '{}' and date >= '{}' and date <= '{}'".format(code, startDate, endDate) + df = pd.read_sql(sql, self.conn) + df.index = df['date'] + return df + + + diff --git a/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py b/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py index 4a733bd..5511f63 100644 --- a/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py +++ b/03_NumPy_and_Pandas/ch03_02_DowKospi_Scatter.py @@ -6,6 +6,8 @@ dow = pdr.get_data_yahoo('^DJI', '2000-01-04') kospi = pdr.get_data_yahoo('^KS11', '2000-01-04') +dow + df = pd.DataFrame({'DOW' dow['Close'], 'KOSPI' kospi['Close']}) df = df.fillna(method='bfill') df = df.fillna(method='ffill') diff --git a/README.md b/README.md index 3cdcd0d..4660ec9 100644 --- a/README.md +++ b/README.md @@ -8,4 +8,5 @@ - 서적에 삽입된 그림의 PPT 원본은 PowerPoint_Materials.pptx 파일에 있습니다. -![Portpolio_optimization](./06_Trading_Strategy/imgs/Portpolio_optimization.jpg) +![Portpolio_optimization](06_Trading_Strategy/imgs/Portpolio_optimization.jpg) + diff --git a/stockTest/getData.py b/stockTest/getData.py new file mode 100644 index 0000000..5f27803 --- /dev/null +++ b/stockTest/getData.py @@ -0,0 +1,3 @@ +import pandas as pd +import numpy as np +