Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
39 changes: 36 additions & 3 deletions data_collector/data_collector.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,13 @@
from operator import itemgetter
from tkinter import E
import FinanceDataReader as fdr
import pandas_datareader as pdr
from marcap import marcap_data
from query_manager import QueryManager
import pandas as pd
from datetime import datetime
import numpy as np
import time

class DataCollector(QueryManager):

Expand Down Expand Up @@ -45,6 +48,7 @@ def get_price_table(self):
except Exception as e:
print(f'{e} : {code}')


def get_finance_table(self, path1):
self.create_finance_table()
total = len(self.codes)
Expand All @@ -54,6 +58,7 @@ def get_finance_table(self, path1):
cnt = self.replace_finance_table(code, at, total, cnt, path1)
at+=1


def get_raw_price_info(self):
self.create_raw_price_info_table()
raw_price_info_df = marcap_data('1995-05-02', '2021-12-31')
Expand All @@ -63,6 +68,7 @@ def get_raw_price_info(self):
self.replace_raw_price_info_table(r, at, total)
at+=1


def add_main_sector_tocompany(self, path):
total = len(self.codes)
at = 0
Expand All @@ -73,6 +79,7 @@ def add_main_sector_tocompany(self, path):
except Exception as e:
print(e, code)


def get_price_average_info(self):
self.create_price_average_table()
total = len(self.codes)
Expand All @@ -86,12 +93,13 @@ def get_price_average_info(self):
at_code+=1
at+=1


def get_price_monthly_info(self):
self.create_price_monthly_info_table()
data = self.bring_additional_data()
total = len(self.codes)
error_list = []
at = 0;
at = 0
for code in self.codes.itertuples():
try:
np_adjclose = self.get_adjclose(code.Symbol)
Expand All @@ -104,10 +112,12 @@ def get_price_monthly_info(self):
self.replace_price_monthly_table(code, x, y, at, total, at_code, total_code)
at_code+=1
at+=1


except Exception as e:
error_list.append([e, code])
print(f'{e} : {code}')
pass

print(error_list)

Expand All @@ -118,7 +128,30 @@ def get_market_open_info(self):
at = 0
for r in df_open.itertuples():
self.replace_market_open_info_table(r, at, total)
at+=1
at += 1


def update_new_price_info(self):
flag = 0
data = None
# ck = time.time()
for code in self.codes.itertuples():
new = self.get_new_price_info(code, '2022-01-03', '2022-01-03')
if new is None:
continue
if flag == 0:
flag = 1
data = new
continue
data = np.append(data, new, axis=0)
if flag == 4:
break
types = ['int64', 'object', 'int64', 'int64', 'int64', 'int64', 'int64', 'int64', 'float64', 'int64', 'int64', 'int64']
df = pd.DataFrame(data)
dic = { name:value for name, value in zip(df.columns, types) }
df = df.astype(dic)
# print(time.time() - ck) # 706 초
df.to_csv('/Users/choewonjun/Documents/coding/finance_data/quant/data/price_monthly.csv', index=False, header=False,mode='a')


if __name__ == "__main__":
Expand All @@ -130,5 +163,5 @@ def get_market_open_info(self):
# dc.get_finance_table(path)

# dc.add_main_sector_tocompany(path)
dc.get_price_monthly_info()
dc.update_new_price_info()
# dc.get_price_average_info()
42 changes: 41 additions & 1 deletion data_collector/data_preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import math
from operator import itemgetter
from marcap import marcap_data
from pykrx import stock


class DataPreprocessor:
Expand Down Expand Up @@ -211,4 +212,43 @@ def check_dec(self, ymd_parm, at):
if self.ymd == 12:
return 1
else:
return 0
return 0

def get_new_price_only_info(self, code, start, end):
"""
start, end 에서는 2021-00-00 형태로 보내주면 된다.
Open, High, Low, Close, Volume, Change
"""
df = fdr.DataReader(f'{code}', int(start[:4]))
df = df.reset_index()
df = df[df["Date"] >= datetime(int(start[:4]), int(start[5:7]), int(start[8:10]))]
df = df[df["Date"] <= datetime(int(end[:4]), int(end[5:7]), int(end[8:10]))]
df_np = df.to_numpy()
return (df_np)

def get_new_price_info(self, code, start, end):
"""
가격 정보 업데이트 함수
start 부터 end 날 까지의 일일 가격 정보를 리턴해준다. 그런데 price csv 파일이 월단위로 있어서 추후 어떻게 할지 정해야할듯 하다.
"""
try:
price_info = self.get_new_price_only_info(code.Symbol, start, end)
ids = [[code.ID]] * len(price_info) # ID
dates_to_volume = price_info[:,:6] # Date, Open, High, Low, Close, Volume
dates_to_volume[0][0] = dates_to_volume[0][0].date()
pvolume = [[0]] * len(price_info) # PVolume : 뭔지 몰라서 0으로 해주었다.
changes = price_info[:,6:] # Changes
marcap = stock.get_market_cap(start.replace('-',''), end.replace('-',''), code.Symbol) # 시가총액, 거래량, 거래대금, 상장주식수
marcap = marcap.to_numpy()
marcap = marcap[:, [0, 3]] # 시가총액, 상장주식수
ranks = [[0]] * len(price_info) # 어떻게 구할지 몰라서 일단 0으로 해두었다. 필요시 계산작업 추가하겠다.
tmp_a = np.append(ids, dates_to_volume, axis=1)
tmp_b = np.append(pvolume, changes, axis=1)
tmp_c = np.append(marcap, ranks, axis=1)
new_price = np.append(tmp_a, tmp_b, axis=1)
new_price = np.append(new_price, tmp_c, axis=1)
# return
return new_price
except:
print(f'Exception : {code.Symbol}')
return None