Skip to content

Commit

Permalink
获取东方财富资金流向保存到 mongodb中
Browse files Browse the repository at this point in the history
  • Loading branch information
tauruswang committed Jun 20, 2018
1 parent 5979c24 commit d60fc8a
Show file tree
Hide file tree
Showing 8 changed files with 617 additions and 170 deletions.
271 changes: 264 additions & 7 deletions QUANTAXIS/QASU/crawl_eastmoney.py
Original file line number Diff line number Diff line change
@@ -1,21 +1,275 @@

import os
from QUANTAXIS.QAUtil import QALocalize
from QUANTAXIS_CRAWLY.run_selenium_alone import read_east_money_page_zjlx_to_sqllite
from QUANTAXIS_CRAWLY.run_selenium_alone import (read_east_money_page_zjlx_to_sqllite, open_chrome_driver, close_chrome_dirver)
import urllib
import pandas as pd
import time

from QUANTAXIS.QAUtil import (DATABASE)



def QA_request_eastmoney_zjlx( param_stock_code_list ):
# 改用
strUrl = "http://data.eastmoney.com/zjlx/{}.html".format(param_stock_code_list[0])

# 延时
time.sleep(1.223)

response = urllib.request.urlopen(strUrl)
content = response.read()

# 🛠todo 改用 re 正则表达式做匹配
strings = content.decode("utf-8", "ignore")
string_lines = strings.split("\r\n")

#for aline in string_lines:
# aline = aline.strip()
# if '_stockCode' in aline:
# _stockCode = aline[len('var _stockCode = '):]
# _stockCode = _stockCode.strip("\"\"\,")

# if '_stockMarke' in aline:
# _stockMarke = aline[len('_stockMarke = '):]
# _stockMarke = _stockMarke.strip("\"\"\,")
# # 60XXXX ,
#_stockMarke = 1

# 00XXXX ,
# _stockMarke = 2

# 30XXXX ,
# _stockMarke = 2

# if '_stockName' in aline:
# _stockName = aline[len('_stockName = '):]
# _stockName = _stockName.strip("\"\"\,")

# if '_market' in aline:
# _market = aline[len('_market = '):]
# _market = _market.strip("\"\"\,")
# break
#_market= 'hsa'

# print(_stockCode)
# print(_stockMarke)
# print(_stockName)
# print(_market)

values = []
for aline in string_lines:
aline = aline.strip()
if 'EM_CapitalFlowInterface' in aline:
# print(aline)
# print('------------------')
aline = aline.strip()
if aline.startswith('var strUrl = '):
if 'var strUrl = ' in aline:
aline = aline[len('var strUrl = '):]
values = aline.split('+')
# print(values)
break
# print('------------------')

print(values)


for iStockCode in range(len(param_stock_code_list)):
requestStr = ""

strCode = param_stock_code_list[iStockCode]
if strCode[0:2] == '60':
_stockMarke = '1'
elif strCode[0:2] == '00' or strCode[0:2] == '30':
_stockMarke = '2'
else:
print(strCode + " 暂不支持, 60, 00, 30 开头的股票代码")
return

for iItem in values:
if '_stockCode' in iItem:
requestStr = requestStr + param_stock_code_list[iStockCode]
elif '_stockMarke' in iItem:
requestStr = requestStr + _stockMarke
else:
if 'http://ff.eastmoney.com/' in iItem:
requestStr = 'http://ff.eastmoney.com/'
else:
iItem = iItem.strip(' "')
iItem = iItem.rstrip(' "')
requestStr = requestStr + iItem

# print(requestStr)
# 延时
time.sleep(1.456)

response = urllib.request.urlopen(requestStr)
content2 = response.read()

# print(content2)
strings = content2.decode("utf-8", "ignore")

# print(strings)

list_data_zjlx = []

if 'var aff_data=({data:[["' in strings:
leftChars = strings[len('var aff_data=({data:[["'):]
# print(leftChars)
dataArrays = leftChars.split(',')
# print(dataArrays)
for aItemIndex in range(0, len(dataArrays), 13):
'''
日期
收盘价
涨跌幅
主力净流入 净额 净占比
超大单净流入 净额 净占比
大单净流入 净额 净占比
中单净流入 净额 净占比
小单净流入 净额 净占比
'''
dict_row = {}

dict_row['stock_code'] = param_stock_code_list[iStockCode]

# 日期
# print(aItemIndex)
data01 = dataArrays[aItemIndex]
data01 = data01.strip('"')
# print('日期',data01)

dict_row['date'] = data01

# 主力净流入 净额
data02 = dataArrays[aItemIndex + 1]
data02 = data02.strip('"')
# print('主力净流入 净额',data02)

dict_row['zljll_je_wy'] = data02

# 主力净流入 净占比
data03 = dataArrays[aItemIndex + 2]
data03 = data03.strip('"')
# print('主力净流入 净占比',data03)
# date01 = aItemData.strip('[\'\'')

dict_row['zljll_jzb_bfb'] = data03

# 超大单净流入 净额
data04 = dataArrays[aItemIndex + 3]
data04 = data04.strip('"')
# print('超大单净流入 净额',data04)

dict_row['cddjll_je_wy'] = data04

# 超大单净流入 净占比
data05 = dataArrays[aItemIndex + 4]
data05 = data05.strip('"')
# print('超大单净流入 净占比',data05)

dict_row['cddjll_je_jzb'] = data05

# 大单净流入 净额
data06 = dataArrays[aItemIndex + 5]
data06 = data06.strip('"')
# print('大单净流入 净额',data06)

dict_row['ddjll_je_wy'] = data06

# 大单净流入 净占比
data07 = dataArrays[aItemIndex + 6]
data07 = data07.strip('"')
# print('大单净流入 净占比',data07)

dict_row['ddjll_je_jzb'] = data07

# 中单净流入 净额
data08 = dataArrays[aItemIndex + 7]
data08 = data08.strip('"')
# print('中单净流入 净额',data08)

dict_row['zdjll_je_wy'] = data08

# 中单净流入 净占比
data09 = dataArrays[aItemIndex + 8]
data09 = data09.strip('"')
# print('中单净流入 净占比',data09)

dict_row['zdjll_je_jzb'] = data09

# 小单净流入 净额
data10 = dataArrays[aItemIndex + 9]
data10 = data10.strip('"')
# print('小单净流入 净额',data10)

dict_row['xdjll_je_wy'] = data10

# 小单净流入 净占比
data11 = dataArrays[aItemIndex + 10]
data11 = data11.strip('"')
# print('小单净流入 净占比',data11)

dict_row['xdjll_je_jzb'] = data11

# 收盘价
data12 = dataArrays[aItemIndex + 11]
data12 = data12.strip('"')
# print('收盘价',data12)

dict_row['close_price'] = data12

# 涨跌幅
data13 = dataArrays[aItemIndex + 12]
data13 = data13.strip('"')
data13 = data13.strip('"]]})')
# print('涨跌幅',data13)

dict_row['change_price'] = data13

# 读取一条记录成功
# print("成功读取一条记录")
# print(dict_row)
list_data_zjlx.append(dict_row)

# print(list_data_zjlx)

df = pd.DataFrame(list_data_zjlx)

# print(df)

client = DATABASE
coll_stock_zjlx = client.eastmoney_stock_zjlx

# coll_stock_zjlx.insert_many(QA_util_to_json_from_pandas(df))

for i in range(len(list_data_zjlx)):
aRec = list_data_zjlx[i]

# 🛠todo 当天结束后,获取当天的资金流相,当天的资金流向是瞬时间点的
ret = coll_stock_zjlx.find_one(aRec)
if ret == None:
coll_stock_zjlx.insert_one(aRec)
print("🤑 插入新的记录 ", aRec)
else:
print("😵 记录已经存在 ", ret)


def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode = None):
'''
作为测试用例来获取, 对比 reqeust 方式的获取数据是否一致
'''
def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCodeList = None):

# todo 🛠 check stockCode 是否存在有效合法
# todo 🛠 QALocalize 从QALocalize 目录中读取 固定位置存放驱动文件

print("📨当前工作路径文件位置 : ",os.getcwd())
path_check = os.getcwd()+"/selenium_driver"
path_check = os.getcwd()+"/QUANTAXIS_WEBDRIVER"
if os.path.exists(path_check) == False:
print("😵 确认当前路径是否包含selenium_driver目录 😰 ")
return
else:
print(os.getcwd()+"/selenium_drive"," 目录存在 😁")
print(os.getcwd()+"/QUANTAXIS_WEBDRIVER"," 目录存在 😁")
print("")

path_for_save_data = QALocalize.download_path + "/eastmoney_stock_zjlx"
Expand All @@ -31,10 +285,13 @@ def QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode = None):
else:
print(path_for_save_data,"目录存在!准备读取数据 😋")

full_path_name = path_for_save_data + "/" + stockCode + "_zjlx.sqlite.db"
browser = open_chrome_driver()

read_east_money_page_zjlx_to_sqllite(stockCode,full_path_name)
for indexCode in range(len(stockCodeList)):
full_path_name = path_for_save_data + "/" + stockCodeList[indexCode] + "_zjlx.sqlite.db"
read_east_money_page_zjlx_to_sqllite(stockCodeList[indexCode],full_path_name, browser)

close_chrome_dirver(browser)
#创建目录
#启动线程读取网页,写入数据库
#等待完成
13 changes: 10 additions & 3 deletions QUANTAXIS/QASU/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -226,13 +226,20 @@ def QA_SU_crawl_eastmoney(action="zjlx",stockCode=None):

if stockCode=="all":
#读取tushare股票列表代码
print(" 一共需要获取 %d 个股票的 资金流向 , 需要大概 %d 小时" % (len(stockItems), (len(stockItems)*30)/60/60 ))
print("💪 一共需要获取 %d 个股票的 资金流向 , 需要大概 %d 小时" % (len(stockItems), (len(stockItems)*5)/60/60 ))

code_list = []
for stock in stockItems:
code_list.append(stock['code'])
#print(stock['code'])
crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stock['code'])
crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(code_list)
#print(stock)

return
else:
#todo 检查股票代码是否合法
return crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stockCode)
#return crawl_eastmoney_file.QA_read_eastmoney_zjlx_web_page_to_sqllite(stockCode=stockCode)
code_list = []
code_list.append(stockCode)
return crawl_eastmoney_file.QA_request_eastmoney_zjlx(param_stock_code_list=code_list)

19 changes: 19 additions & 0 deletions QUANTAXIS/QASU/save_to_db_fields_description.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,25 @@
{"meigujingzichan": "每股净资产"},
{"baoliu2": "保留"}
]
},
{
'eastmoney_stock_zjlx':
[
{"stock_code" : "股票代码"},
{"date" : "日期"},
{"zljll_je_wy" : "主力资金流入(金额)单位万元"},
{"zljll_jzb_bfb" : "主力资金流入(净占比)%"},
{"cddjll_je_wy" : "超级大单流入(金额)单位万元"},
{"cddjll_je_jzb" : "超级大单流入(净占比)"},
{"ddjll_je_wy" : "大单资金流入(金额)单位万元"},
{"ddjll_je_jzb" : "大单资金流入(净占比)%"},
{"zdjll_je_wy" : "中单资金流入(金额)单位万元"},
{"zdjll_je_jzb" : "中单资金流入(净占比)%"},
{"xdjll_je_wy" : "小单资金流入(金额)单位万元"},
{"xdjll_je_jzb" : "小单资金流入(净占比)"},
{"close_price" : "收盘价"},
{"change_price" : "涨跌幅"}
]
}


Expand Down
Loading

0 comments on commit d60fc8a

Please sign in to comment.