Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

修改: createtables_mysql.sql #6

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified common/__pycache__/GetMysqlConn.cpython-35.pyc
Binary file not shown.
5 changes: 3 additions & 2 deletions lkad/GetAllInc_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@ def getTable():
maxDateList = []
for i in tableNameList:
sqlStr = "SELECT DATE_FORMAT(max(DATE),'%Y%m%d') as maxdate FROM "+ i
print(sqlStr)
cursor.execute(sqlStr)

maxDate = cursor.fetchone()["maxdate"]
Expand Down Expand Up @@ -79,7 +80,7 @@ def getCSV(code, url):
fordername = 'AllStockDataInc'
filename = str(code) + '.CSV'
if not os.path.isdir(fordername):
print("mkdir")
#print("mkdir")
os.mkdir(fordername)

with request.urlopen(url) as web:
Expand Down Expand Up @@ -199,7 +200,7 @@ def main():
# print(e
#获取下载链接和股票代码
valid_code_url=return_valid_code_url()
pool=Pool(processes=24)
pool=Pool(processes=3)
for c,u in valid_code_url:
pool.apply_async(getCSV,(c,u,))

Expand Down
33 changes: 18 additions & 15 deletions lkad/GetAll_mysql.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,15 +52,18 @@ def listStock(cursor):
def getCSV(code, url):
fordername = 'AllStockData'
filename = str(code) + '.CSV'
fullfilename = fordername+os.path.sep+filename
if not os.path.isdir(fordername):
print("mkdir")
#print("mkdir")
os.mkdir(fordername)
with request.urlopen(url) as web:
# 为防止编码错误,使用二进制写文件模式
print(web)
with open(fordername+os.path.sep+filename, 'wb') as outfile:
outfile.write(web.read())
print("write OK "+str(code))
if not os.path.exists(fullfilename):
with request.urlopen(url) as web:
# 为防止编码错误,使用二进制写文件模式
# print(web)
with open(fullfilename, 'wb') as outfile:
outfile.write(web.read())
print("csvfile download OK "+str(code))

#print("id1")

#print(id(cursor))
Expand All @@ -69,19 +72,19 @@ def getCSV(code, url):
#print("Id2")
# print(id(cursor))
saveInDB(code,cursor)

print("一只股票入库完毕")
# 删除CSV文件
os.remove(fordername+filename)
cursor.close()
print("一只股票入库完毕",code)
# 删除CSV文件,如果需要保留,直接注释即可,适合网络状态不太好的时候。
os.remove(fullfilename)

# 将获取的数据入库
def saveInDB(code,cursor):
# 建表
if not is_table_exist(code,cursor): # 如果表不存在,先创建表
#print("not_exist")
#print(cursor,"not_exist")
create_table(code,cursor) # 如果表不存在,先建表
else: # 存在则截断
#print("exist")
#print(cursor,"exist")
cursor.execute("truncate table stock_" + code)
cursor.execute("commit")

Expand Down Expand Up @@ -214,7 +217,7 @@ def get_type(code):
return switcher.get(code_pre, '未知')

# 建表
def create_table(code):
def create_table(code,cursor):
sql = "CREATE TABLE stock_" + code + """
(
UUID VARCHAR(80) PRIMARY KEY,
Expand Down Expand Up @@ -289,7 +292,7 @@ def main(key, url):
#=======================================================
time1 = time.time()
dict = listStock(cursor)
pool = Pool(processes = 24) # 设定并发进程的数量
pool = Pool(processes = 10) # 设定并发进程的数量
for key in dict:
pool.apply_async(main, (key, dict[key], ))

Expand Down
2 changes: 1 addition & 1 deletion lkad/createtables_mysql.sql
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ profits_yoy decimal(20, 4) comment '净利润同比(%)' ,
distrib varchar(50) comment '分配方案' ,
report_date date comment '发布日期' ,
year varchar(5) comment '年份' ,
quarter varchar(2) '季度'
quarter varchar(2) comment '季度'
) comment '业绩报告表';


Expand Down
8 changes: 8 additions & 0 deletions lkad/reade.text
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
使用说明请看这里:
lkad下面是mysql数据库的代码,使用方法如下:
安装依赖 pip3 install -r requirement.txt
然后运行建表语句:mysql -uroot -P <createtables_mysql.sql
注意,如果数据库和运行环境不在同一台主机上,自己进去../config/config.conf里面改配置。
然后获取基本信息表的数据: python3 GetStockBasics_mysql.py
完成后获取全量信息:python3 GetAll_mysql.py 获取全量信息可以自己改线程数,默认为10个。这个全量信息比较大,需要2-3个小时才能跑完,所占用数据库的空间也比较大,大概有15G左右。
每日增量信息获取 : python3 GetAllInc_mysql.py
15 changes: 15 additions & 0 deletions lkad/requirement.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
beautifulsoup4==4.6.0
bs4==0.0.1
chardet==2.3.0
cx-Oracle==6.1
lxml==4.1.1
PyMySQL==0.8.0
pytz==2017.3
requests==2.9.1
six==1.10.0
SQLAlchemy==1.2.2
ssh-import-id==5.5
tushare==1.1.3
urllib3==1.13.1
numpy==1.14.0
pandas==0.22.0