From d54a8f00d9d5d663c4d6c5572e15f4553508ea23 Mon Sep 17 00:00:00 2001 From: foolcage <5533061@qq.com> Date: Sat, 27 Apr 2024 23:17:39 +0800 Subject: [PATCH] Use concept main_tag mapping --- examples/data_runner/actor_runner.py | 14 +- examples/data_runner/finance_runner.py | 2 +- examples/data_runner/index_runner.py | 2 +- examples/data_runner/joinquant_fund_runner.py | 2 +- .../data_runner/joinquant_kdata_runner.py | 2 +- examples/data_runner/kdata_runner.py | 2 +- examples/data_runner/sina_data_runner.py | 6 +- examples/data_runner/trading_runner.py | 2 +- examples/requirements.txt | 2 +- requirements.txt | 3 +- scripts/prepare_recent_data.py | 5 +- scripts/qmt_runner.py | 2 +- scripts/report_stock_by_tag.py | 4 +- src/zvt/__init__.py | 8 +- src/zvt/sched/sched.py | 5 +- src/zvt/server.py | 2 + src/zvt/tag/concept_main_tag_mapping.json | 398 ++++++++++++++++ src/zvt/tag/concept_mapping.json | 442 ------------------ src/zvt/tag/industry_main_tag_mapping.json | 88 ++++ src/zvt/tag/industry_mapping.json | 133 ------ src/zvt/tag/main_tags_from_concept.json | 3 - src/zvt/tag/tag_service.py | 268 ++++++----- src/zvt/tag/tag_stats.py | 20 +- src/zvt/tag/tag_utils.py | 62 ++- src/zvt/trading/common.py | 1 - src/zvt/trading/trading_service.py | 39 +- src/zvt/utils/inform_utils.py | 16 + {examples => src/zvt/utils}/recorder_utils.py | 18 +- 28 files changed, 731 insertions(+), 820 deletions(-) create mode 100644 src/zvt/tag/concept_main_tag_mapping.json delete mode 100644 src/zvt/tag/concept_mapping.json create mode 100644 src/zvt/tag/industry_main_tag_mapping.json delete mode 100644 src/zvt/tag/industry_mapping.json delete mode 100644 src/zvt/tag/main_tags_from_concept.json create mode 100644 src/zvt/utils/inform_utils.py rename {examples => src/zvt/utils}/recorder_utils.py (75%) diff --git a/examples/data_runner/actor_runner.py b/examples/data_runner/actor_runner.py index e60bfe8b..ba360ead 100644 --- a/examples/data_runner/actor_runner.py +++ b/examples/data_runner/actor_runner.py @@ -3,23 +3,13 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder -from examples.utils import add_to_eastmoney -from zvt import init_log, zvt_config +from zvt.utils.recorder_utils import run_data_recorder +from zvt import init_log from zvt.domain import ( - Stock, - Stock1dHfqKdata, - Stockhk, - Stockhk1dHfqKdata, - Block, - Block1dKdata, - BlockCategory, StockInstitutionalInvestorHolder, StockTopTenFreeHolder, StockActorSummary, ) -from zvt.informer import EmailInformer -from zvt.utils import date_time_by_interval, current_date logger = logging.getLogger(__name__) diff --git a/examples/data_runner/finance_runner.py b/examples/data_runner/finance_runner.py index f38dac45..73490ff0 100644 --- a/examples/data_runner/finance_runner.py +++ b/examples/data_runner/finance_runner.py @@ -3,7 +3,7 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from zvt import init_log from zvt.domain import ( Stock, diff --git a/examples/data_runner/index_runner.py b/examples/data_runner/index_runner.py index ca3736c0..9eaf1a63 100644 --- a/examples/data_runner/index_runner.py +++ b/examples/data_runner/index_runner.py @@ -3,7 +3,7 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from zvt import init_log from zvt.consts import IMPORTANT_INDEX from zvt.domain import Index, Index1dKdata, IndexStock diff --git a/examples/data_runner/joinquant_fund_runner.py b/examples/data_runner/joinquant_fund_runner.py index c4b41bcc..92d026ae 100644 --- a/examples/data_runner/joinquant_fund_runner.py +++ b/examples/data_runner/joinquant_fund_runner.py @@ -3,7 +3,7 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from zvt import init_log from zvt.domain import Fund, FundStock, StockValuation diff --git a/examples/data_runner/joinquant_kdata_runner.py b/examples/data_runner/joinquant_kdata_runner.py index ac832c55..902262e2 100644 --- a/examples/data_runner/joinquant_kdata_runner.py +++ b/examples/data_runner/joinquant_kdata_runner.py @@ -14,7 +14,7 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from zvt import init_log from zvt.domain import Stock, Stock1dHfqKdata diff --git a/examples/data_runner/kdata_runner.py b/examples/data_runner/kdata_runner.py index 3b62e947..4358844c 100644 --- a/examples/data_runner/kdata_runner.py +++ b/examples/data_runner/kdata_runner.py @@ -3,7 +3,7 @@ from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from examples.report_utils import inform from examples.utils import get_hot_topics from zvt import init_log, zvt_config diff --git a/examples/data_runner/sina_data_runner.py b/examples/data_runner/sina_data_runner.py index 0456d07b..732ba614 100644 --- a/examples/data_runner/sina_data_runner.py +++ b/examples/data_runner/sina_data_runner.py @@ -1,13 +1,11 @@ # -*- coding: utf-8 -*- import logging -import time from apscheduler.schedulers.background import BackgroundScheduler -from examples.recorder_utils import run_data_recorder -from zvt import init_log, zvt_config +from zvt.utils.recorder_utils import run_data_recorder +from zvt import init_log from zvt.domain import * -from zvt.informer.informer import EmailInformer logger = logging.getLogger(__name__) diff --git a/examples/data_runner/trading_runner.py b/examples/data_runner/trading_runner.py index 36982358..29de58e3 100644 --- a/examples/data_runner/trading_runner.py +++ b/examples/data_runner/trading_runner.py @@ -4,7 +4,7 @@ from apscheduler.schedulers.background import BackgroundScheduler from sqlalchemy import or_, and_ -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from examples.report_utils import inform from zvt import init_log from zvt.api import get_big_players, get_latest_kdata_date diff --git a/examples/requirements.txt b/examples/requirements.txt index c40fcf05..b33a6107 100644 --- a/examples/requirements.txt +++ b/examples/requirements.txt @@ -1,5 +1,5 @@ zvt >= 0.10.1 apscheduler >= 3.4.0 -eastmoneypy == 0.1.4 +eastmoneypy == 0.1.5 tabulate>=0.8.8 ta \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f5267fad..11f8bea5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,5 @@ dash-bootstrap-components==1.3.1 dash_daq==0.5.0 scikit-learn==1.2.1 fastapi==0.110.0 -fastapi-pagination==0.12.23 \ No newline at end of file +fastapi-pagination==0.12.23 +apscheduler==3.10.4 \ No newline at end of file diff --git a/scripts/prepare_recent_data.py b/scripts/prepare_recent_data.py index efee5360..612cff99 100644 --- a/scripts/prepare_recent_data.py +++ b/scripts/prepare_recent_data.py @@ -1,5 +1,7 @@ # -*- coding: utf-8 -*- -from examples.recorder_utils import run_data_recorder +from zvt.tag.tag_models import ActivateSubTagsModel +from zvt.tag.tag_service import activate_sub_tags +from zvt.utils.recorder_utils import run_data_recorder from zvt.api.selector import get_entity_ids_by_filter from zvt.domain import StockEvents, BlockStock from zvt.tag import StockAutoTagger @@ -32,3 +34,4 @@ ) StockAutoTagger().tag() + # activate_sub_tags(ActivateSubTagsModel(sub_tags=["低空经济", "跨境支付"])) diff --git a/scripts/qmt_runner.py b/scripts/qmt_runner.py index eab2b224..3605d246 100644 --- a/scripts/qmt_runner.py +++ b/scripts/qmt_runner.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -from examples.recorder_utils import run_data_recorder +from zvt.utils.recorder_utils import run_data_recorder from zvt.api.selector import get_entity_ids_by_filter from zvt.domain import Stock1dHfqKdata diff --git a/scripts/report_stock_by_tag.py b/scripts/report_stock_by_tag.py index c25249a1..a77f31f8 100644 --- a/scripts/report_stock_by_tag.py +++ b/scripts/report_stock_by_tag.py @@ -143,8 +143,8 @@ def report_vol_up_stocks(main_line_tags): # 放入股票池 build_system_stock_pools() - build_stock_pool_tag_stats(stock_pool_name="main_line", force_rebuild_latest=True) - build_stock_pool_tag_stats(stock_pool_name="vol_up", force_rebuild_latest=True) + build_stock_pool_tag_stats(stock_pool_name="main_line") + build_stock_pool_tag_stats(stock_pool_name="vol_up") main_line_tags = report_main_line_stocks() logger.info(f"mainline is :{main_line_tags}") diff --git a/src/zvt/__init__.py b/src/zvt/__init__.py index 5d280546..e96ffe5f 100644 --- a/src/zvt/__init__.py +++ b/src/zvt/__init__.py @@ -217,10 +217,10 @@ def old_db_to_provider_dir(data_path): import zvt.recorders as zvt_recorders import zvt.factors as zvt_factors -try: - import zvt.recorders.qmt.quotes.qmt_kdata_recorder as qmt_kdata_recorde -except Exception as e: - logger.warning("QMT need run in Windows!", e) +# try: +# import zvt.recorders.qmt.quotes.qmt_kdata_recorder as qmt_kdata_recorde +# except Exception as e: +# logger.warning("QMT need run in Windows!", e) __all__ = ["zvt_env", "zvt_config", "init_log", "init_env", "init_config", "__version__"] diff --git a/src/zvt/sched/sched.py b/src/zvt/sched/sched.py index c5c640d0..264e1c1a 100644 --- a/src/zvt/sched/sched.py +++ b/src/zvt/sched/sched.py @@ -5,9 +5,10 @@ from apscheduler.jobstores.sqlalchemy import SQLAlchemyJobStore from apscheduler.schedulers.background import BackgroundScheduler -from zvt import zvt_env +from zvt import ZVT_HOME + +jobs_db_path = os.path.join(ZVT_HOME, "jobs.db") -jobs_db_path = os.path.join(zvt_env["data_path"], "jobs.db") jobstores = {"default": SQLAlchemyJobStore(url=f"sqlite:///{jobs_db_path}")} diff --git a/src/zvt/server.py b/src/zvt/server.py index c93f095e..287c9d8e 100644 --- a/src/zvt/server.py +++ b/src/zvt/server.py @@ -9,6 +9,8 @@ from zvt.rest.work import work_router from fastapi.middleware.cors import CORSMiddleware +from zvt.sched.sched import zvt_scheduler + app = FastAPI() origins = ["*"] diff --git a/src/zvt/tag/concept_main_tag_mapping.json b/src/zvt/tag/concept_main_tag_mapping.json new file mode 100644 index 00000000..3bd21800 --- /dev/null +++ b/src/zvt/tag/concept_main_tag_mapping.json @@ -0,0 +1,398 @@ +{ + "广电": "文化传媒", + "数字阅读": "文化传媒", + "网络游戏": "文化传媒", + "手游概念": "文化传媒", + "影视概念": "文化传媒", + "可燃冰": "资源", + "油价相关": "资源", + "低碳冶金": "资源", + "基本金属": "资源", + "黄金概念": "资源", + "天然气": "资源", + "稀缺资源": "资源", + "页岩气": "资源", + "煤化工": "资源", + "油气设服": "资源", + "小金属概念": "资源", + "新能源": "新能源", + "BC电池": "新能源", + "TOPCon电池": "新能源", + "钒电池": "新能源", + "钙钛矿电池": "新能源", + "麒麟电池": "新能源", + "动力电池回收": "新能源", + "钠离子电池": "新能源", + "固态电池": "新能源", + "刀片电池": "新能源", + "HIT电池": "新能源", + "燃料电池": "新能源", + "锂电池": "新能源", + "储能": "新能源", + "充电桩": "新能源", + "熔盐储能": "新能源", + "换电概念": "新能源", + "盐湖提锂": "新能源", + "风能": "新能源", + "太阳能": "新能源", + "氢能源": "新能源", + "抽水蓄能": "新能源", + "光伏建筑一体化": "新能源", + "人造太阳": "新能源", + "可控核聚变": "新能源", + "核能核电": "新能源", + "绿色电力": "电力", + "特高压": "电力", + "虚拟电厂": "电力", + "智能电网": "电力", + "超超临界发电": "电力", + "半导体概念": "半导体", + "中芯概念": "半导体", + "存储芯片": "半导体", + "AI芯片": "半导体", + "汽车芯片": "半导体", + "国产芯片": "半导体", + "第三代半导体": "半导体", + "第四代半导体": "半导体", + "英伟达概念": "半导体", + "铜缆高速连接": "半导体", + "高带宽内存": "半导体", + "Chiplet概念": "半导体", + "光刻胶": "半导体", + "氮化镓": "半导体", + "EDA概念": "半导体", + "IGBT概念": "半导体", + "PCB": "半导体", + "VPN": "通信", + "IPv6": "通信", + "WiFi": "通信", + "毫米波概念": "通信", + "5G概念": "通信", + "6G概念": "通信", + "F5G概念": "通信", + "量子通信": "通信", + "天基互联": "通信", + "新能源车": "汽车", + "华为汽车": "汽车", + "汽车拆解": "汽车", + "汽车一体化压铸": "汽车", + "小米汽车": "汽车", + "汽车热管理": "汽车", + "电子后视镜": "汽车", + "高压快充": "汽车", + "车联网": "汽车", + "激光雷达": "汽车", + "特斯拉": "汽车", + "EDR概念": "汽车", + "无人驾驶": "汽车", + "ETC": "汽车", + "物联网": "智能机器", + "2025规划": "智能机器", + "智能机器": "智能机器", + "工业互联": "智能机器", + "轮毂电机": "智能机器", + "发电机概念": "智能机器", + "同步磁阻电机": "智能机器", + "机器人执行器": "智能机器", + "新型工业化": "智能机器", + "工业母机": "智能机器", + "工业4.0": "智能机器", + "减速器": "智能机器", + "机器人概念": "智能机器", + "PLC概念": "智能机器", + "机器视觉": "智能机器", + "生物医药": "医药", + "痘病毒防治": "医药", + "地塞米松": "医药", + "消毒剂": "医药", + "口罩": "医药", + "肝素概念": "医药", + "健康中国": "医药", + "幽门螺杆菌概念": "医药", + "代糖概念": "医药", + "医疗器械概念": "医药", + "生物疫苗": "医药", + "维生素": "医药", + "注射器概念": "医药", + "流感": "医药", + "AI制药": "医药", + "中药概念": "医药", + "减肥药": "医药", + "创新药": "医药", + "新冠药物": "医药", + "长寿药": "医药", + "独家药品": "医药", + "病毒防治": "医药", + "SPD概念": "医药", + "辅助生殖": "医药", + "肝炎概念": "医药", + "蒙脱石散": "医药", + "血氧仪": "医药", + "熊去氧胆酸": "医药", + "抗原检测": "医药", + "抗菌面料": "医药", + "千金藤素": "医药", + "DRG/DIP": "医药", + "CRO": "医药", + "阿兹海默": "医药", + "CAR-T细胞疗法": "医药", + "新冠检测": "医药", + "青蒿素": "医药", + "超级真菌": "医药", + "气溶胶检测": "医药", + "重组蛋白": "医药", + "疫苗冷链": "医药", + "精准医疗": "医药", + "单抗概念": "医药", + "免疫治疗": "医药", + "基因测序": "医药", + "体外诊断": "医药", + "互联医疗": "医药", + "人脑工程": "医药", + "啤酒概念": "大消费", + "进口博览": "大消费", + "退税商店": "大消费", + "拼多多概念": "大消费", + "抖音小店": "大消费", + "乳业": "大消费", + "C2M概念": "大消费", + "调味品概念": "大消费", + "毛发医疗": "大消费", + "化妆品概念": "大消费", + "白酒": "大消费", + "医疗美容": "大消费", + "户外露营": "大消费", + "在线旅游": "大消费", + "跨境电商": "大消费", + "电商概念": "大消费", + "新零售": "大消费", + "智能家居": "大消费", + "网红直播": "大消费", + "免税概念": "大消费", + "预制菜概念": "大消费", + "培育钻石": "大消费", + "婴童概念": "大消费", + "托育服务": "大消费", + "智慧灯杆": "消费电子", + "UWB概念": "消费电子", + "电子纸概念": "消费电子", + "胎压监测": "消费电子", + "3D玻璃": "消费电子", + "屏下摄像": "消费电子", + "超清视频": "消费电子", + "植物照明": "消费电子", + "LED": "消费电子", + "3D摄像头": "消费电子", + "eSIM": "消费电子", + "蓝宝石": "消费电子", + "无线耳机": "消费电子", + "智能穿戴": "消费电子", + "AI手机": "消费电子", + "AIPC": "消费电子", + "柔性屏(折叠屏)": "消费电子", + "星闪概念": "消费电子", + "传感器": "消费电子", + "被动元件": "消费电子", + "小米概念": "消费电子", + "无线充电": "消费电子", + "智能电视": "消费电子", + "空间计算": "消费电子", + "裸眼3D": "消费电子", + "混合现实": "消费电子", + "增强现实": "消费电子", + "虚拟现实": "消费电子", + "MicroLED": "消费电子", + "MiniLED": "消费电子", + "OLED": "消费电子", + "化工原料": "材料", + "环氧丙烷": "材料", + "PVDF概念": "材料", + "新材料": "材料", + "MLCC": "材料", + "碳纤维": "材料", + "PEEK材料概念": "材料", + "磷化工": "材料", + "碳基材料": "材料", + "纳米银": "材料", + "碳化硅": "材料", + "复合集流体": "材料", + "有机硅": "材料", + "稀土永磁": "材料", + "石墨烯": "材料", + "氟化工": "材料", + "草甘膦": "材料", + "钛白粉": "材料", + "降解塑料": "材料", + "工业气体": "材料", + "氦气概念": "材料", + "超级电容": "材料", + "军民融合": "军工", + "海工装备": "军工", + "军工": "军工", + "航母概念": "军工", + "航天概念": "军工", + "国家安防": "军工", + "空间站概念": "军工", + "大飞机": "军工", + "ERP概念": "AI", + "数字哨兵": "AI", + "电子身份证": "AI", + "电子车牌": "AI", + "大数据": "AI", + "智慧城市": "AI", + "云计算": "AI", + "国产软件": "AI", + "生物识别": "AI", + "RCS概念": "AI", + "远程办公": "AI", + "在线教育": "AI", + "百度概念": "AI", + "人工智能": "AI", + "液冷概念": "AI", + "光通信模块": "AI", + "CPO概念": "AI", + "AI语料": "AI", + "Kimi概念": "AI", + "Sora概念": "AI", + "短剧互动游戏": "AI", + "多模态AI": "AI", + "数据要素": "AI", + "算力概念": "AI", + "MLOps概念": "AI", + "ChatGPT概念": "AI", + "AIGC概念": "AI", + "数据确权": "AI", + "Web3.0": "AI", + "虚拟数字人": "AI", + "数字水印": "AI", + "数据安全": "AI", + "云游戏": "AI", + "数字孪生": "AI", + "边缘计算": "AI", + "数据中心": "AI", + "华为概念": "AI", + "鸿蒙概念": "AI", + "华为欧拉": "AI", + "华为昇腾": "AI", + "国资云概念": "AI", + "东数西算": "AI", + "网络安全": "AI", + "元宇宙概念": "AI", + "NFT概念": "AI", + "信创": "AI", + "数字经济": "AI", + "区块链": "AI", + "智慧政务": "AI", + "数字货币": "AI", + "电子竞技": "AI", + "知识产权": "AI", + "时空大数据": "AI", + "低空经济": "低空经济", + "飞行汽车(eVTOL)": "低空经济", + "无人机": "低空经济", + "北斗导航": "低空经济", + "通用航空": "低空经济", + "建筑节能": "房地产", + "REITs概念": "房地产", + "租售同权": "房地产", + "铁路基建": "房地产", + "PPP模式": "房地产", + "工程机械概念": "房地产", + "新型城镇化": "房地产", + "装配建筑": "房地产", + "地下管网": "房地产", + "民爆概念": "房地产", + "参股期货": "金融", + "参股券商": "金融", + "参股保险": "金融", + "跨境支付": "金融", + "互联金融": "金融", + "券商概念": "金融", + "移动支付": "金融", + "参股银行": "金融", + "粮食概念": "农业", + "水产养殖": "农业", + "生态农业": "农业", + "蝗虫防治": "农业", + "农业种植": "农业", + "鸡肉概念": "农业", + "转基因": "农业", + "人造肉": "农业", + "食品安全": "农业", + "生物质能发电": "公用", + "噪声防治": "公用", + "土壤修复": "公用", + "地热能": "公用", + "海绵城市": "公用", + "节能环保": "公用", + "尾气治理": "公用", + "职业教育": "公用", + "医废处理": "公用", + "快递概念": "物流", + "RCEP概念": "物流", + "央企改革": "国企", + "中特估": "国企", + "中字头": "国企", + "沪企改革": "国企", + "国企改革": "国企", + "世界杯": "其他", + "东盟自贸区概念": "其他", + "娃哈哈概念": "其他", + "空气能热泵": "其他", + "核酸采样亭": "其他", + "中俄贸易概念": "其他", + "净水概念": "其他", + "京津冀": "其他", + "低价股": "其他", + "商汤概念": "其他", + "粤港自贸": "其他", + "土地流转": "其他", + "壳资源": "其他", + "盲盒经济": "其他", + "内贸流通": "其他", + "京东金融": "其他", + "乡村振兴": "其他", + "东北振兴": "其他", + "社区团购": "其他", + "地摊经济": "其他", + "快手概念": "其他", + "蚂蚁概念": "其他", + "证金持股": "其他", + "养老概念": "其他", + "冷链物流": "其他", + "贬值受益": "其他", + "纾困概念": "其他", + "阿里概念": "其他", + "深圳特区": "其他", + "超级品牌": "其他", + "中超概念": "其他", + "养老金": "其他", + "专精特新": "其他", + "统一大市场": "其他", + "光伏高速公路": "其他", + "核污染防治": "其他", + "磁悬浮概念": "其他", + "垃圾分类": "其他", + "电子烟": "其他", + "工业大麻": "其他", + "全息技术": "其他", + "超导概念": "其他", + "北交所概念": "其他", + "赛马概念": "其他", + "体育产业": "其他", + "雄安新区": "其他", + "共享经济": "其他", + "彩票概念": "其他", + "苹果概念": "其他", + "供销社概念": "其他", + "水利建设": "其他", + "3D打印": "其他", + "创投": "其他", + "猪肉概念": "其他", + "字节概念": "其他", + "海洋经济": "其他", + "上海自贸": "其他", + "一带一路": "其他", + "碳交易": "其他", + "宠物经济": "其他" +} \ No newline at end of file diff --git a/src/zvt/tag/concept_mapping.json b/src/zvt/tag/concept_mapping.json deleted file mode 100644 index 2b6b7082..00000000 --- a/src/zvt/tag/concept_mapping.json +++ /dev/null @@ -1,442 +0,0 @@ -{ - "文化传媒": [ - "广电", - "数字阅读", - "网络游戏", - "手游概念", - "影视概念" - ], - "资源": [ - "可燃冰", - "油价相关", - "低碳冶金", - "基本金属", - "黄金概念", - "天然气", - "稀缺资源", - "页岩气", - "煤化工", - "油气设服" - ], - "新能源": [ - "新能源", - "BC电池", - "TOPCon电池", - "钒电池", - "钙钛矿电池", - "麒麟电池", - "动力电池回收", - "钠离子电池", - "固态电池", - "刀片电池", - "HIT电池", - "燃料电池", - "锂电池", - "储能", - "充电桩", - "熔盐储能", - "换电概念", - "盐湖提锂", - "风能", - "太阳能", - "氢能源", - "抽水蓄能", - "光伏建筑一体化", - "人造太阳", - "可控核聚变", - "核能核电" - ], - "电力": [ - "绿色电力", - "特高压", - "虚拟电厂", - "智能电网", - "超超临界发电" - ], - "半导体": [ - "半导体概念", - "中芯概念", - "存储芯片", - "AI芯片", - "汽车芯片", - "国产芯片", - "第三代半导体", - "第四代半导体", - "英伟达概念", - "铜缆高速连接", - "高带宽内存", - "Chiplet概念", - "光刻胶", - "氮化镓", - "EDA概念", - "IGBT概念", - "PCB" - ], - "通信": [ - "VPN", - "IPv6", - "WiFi", - "毫米波概念", - "5G概念", - "6G概念", - "F5G概念", - "量子通信", - "天基互联" - ], - "汽车": [ - "新能源车", - "华为汽车", - "汽车拆解", - "汽车一体化压铸", - "小米汽车", - "汽车热管理", - "电子后视镜", - "高压快充", - "车联网", - "激光雷达", - "特斯拉", - "EDR概念", - "无人驾驶", - "ETC" - ], - "智能机器": [ - "物联网", - "2025规划", - "智能机器", - "工业互联", - "轮毂电机", - "发电机概念", - "同步磁阻电机", - "机器人执行器", - "新型工业化", - "工业母机", - "工业4.0", - "减速器", - "机器人概念", - "PLC概念", - "机器视觉" - ], - "医药": [ - "生物医药", - "痘病毒防治", - "地塞米松", - "消毒剂", - "口罩", - "肝素概念", - "健康中国", - "幽门螺杆菌概念", - "代糖概念", - "医疗器械概念", - "生物疫苗", - "维生素", - "注射器概念", - "流感", - "AI制药", - "中药概念", - "减肥药", - "创新药", - "新冠药物", - "长寿药", - "独家药品", - "病毒防治", - "SPD概念", - "辅助生殖", - "肝炎概念", - "蒙脱石散", - "血氧仪", - "熊去氧胆酸", - "抗原检测", - "抗菌面料", - "千金藤素", - "DRG/DIP", - "CRO", - "阿兹海默", - "CAR-T细胞疗法", - "新冠检测", - "青蒿素", - "超级真菌", - "气溶胶检测", - "重组蛋白", - "疫苗冷链", - "精准医疗", - "单抗概念", - "免疫治疗", - "基因测序", - "体外诊断", - "互联医疗", - "人脑工程" - ], - "大消费": [ - "啤酒概念", - "进口博览", - "退税商店", - "拼多多概念", - "抖音小店", - "乳业", - "C2M概念", - "调味品概念", - "毛发医疗", - "化妆品概念", - "白酒", - "医疗美容", - "户外露营", - "在线旅游", - "跨境电商", - "电商概念", - "新零售", - "智能家居", - "网红直播", - "免税概念", - "预制菜概念", - "培育钻石", - "婴童概念", - "托育服务" - ], - "消费电子": [ - "智慧灯杆", - "UWB概念", - "电子纸概念", - "胎压监测", - "3D玻璃", - "屏下摄像", - "超清视频", - "植物照明", - "LED", - "3D摄像头", - "eSIM", - "蓝宝石", - "无线耳机", - "智能穿戴", - "AI手机", - "AIPC", - "柔性屏(折叠屏)", - "星闪概念", - "传感器", - "被动元件", - "小米概念", - "无线充电", - "智能电视", - "空间计算", - "裸眼3D", - "混合现实", - "增强现实", - "虚拟现实", - "MicroLED", - "MiniLED", - "OLED" - ], - "材料": [ - "化工原料", - "环氧丙烷", - "PVDF概念", - "新材料", - "MLCC", - "碳纤维", - "PEEK材料概念", - "磷化工", - "碳基材料", - "纳米银", - "碳化硅", - "复合集流体", - "有机硅", - "小金属概念", - "稀土永磁", - "石墨烯", - "氟化工", - "草甘膦", - "钛白粉", - "降解塑料", - "工业气体", - "氦气概念", - "超级电容" - ], - "军工": [ - "军民融合", - "海工装备", - "军工", - "航母概念", - "航天概念", - "国家安防", - "空间站概念", - "大飞机" - ], - "AI": [ - "ERP概念", - "数字哨兵", - "电子身份证", - "电子车牌", - "大数据", - "智慧城市", - "云计算", - "国产软件", - "生物识别", - "RCS概念", - "远程办公", - "在线教育", - "百度概念", - "人工智能", - "液冷概念", - "光通信模块", - "CPO概念", - "AI语料", - "Kimi概念", - "Sora概念", - "短剧互动游戏", - "多模态AI", - "数据要素", - "算力概念", - "MLOps概念", - "ChatGPT概念", - "AIGC概念", - "数据确权", - "Web3.0", - "虚拟数字人", - "数字水印", - "数据安全", - "云游戏", - "数字孪生", - "边缘计算", - "数据中心", - "华为概念", - "鸿蒙概念", - "华为欧拉", - "华为昇腾", - "国资云概念", - "东数西算", - "网络安全", - "元宇宙概念", - "NFT概念", - "信创", - "数字经济", - "区块链", - "智慧政务", - "数字货币", - "电子竞技", - "知识产权", - "时空大数据" - ], - "低空经济": [ - "低空经济", - "飞行汽车(eVTOL)", - "无人机", - "北斗导航", - "通用航空" - ], - "房地产": [ - "建筑节能", - "REITs概念", - "租售同权", - "铁路基建", - "PPP模式", - "工程机械概念", - "新型城镇化", - "装配建筑", - "地下管网", - "民爆概念" - ], - "金融": [ - "参股期货", - "参股券商", - "参股保险", - "跨境支付", - "互联金融", - "券商概念", - "移动支付", - "参股银行" - ], - "农业": [ - "粮食概念", - "水产养殖", - "生态农业", - "蝗虫防治", - "农业种植", - "鸡肉概念", - "转基因", - "人造肉", - "食品安全" - ], - "公用": [ - "生物质能发电", - "噪声防治", - "土壤修复", - "地热能", - "海绵城市", - "节能环保", - "尾气治理", - "职业教育", - "医废处理" - ], - "物流": [ - "快递概念", - "RCEP概念" - ], - "国企": [ - "央企改革", - "中特估", - "中字头", - "沪企改革", - "国企改革" - ], - "其他": [ - "世界杯", - "东盟自贸区概念", - "娃哈哈概念", - "空气能热泵", - "核酸采样亭", - "中俄贸易概念", - "净水概念", - "京津冀", - "低价股", - "商汤概念", - "粤港自贸", - "土地流转", - "壳资源", - "盲盒经济", - "内贸流通", - "京东金融", - "乡村振兴", - "东北振兴", - "社区团购", - "地摊经济", - "快手概念", - "蚂蚁概念", - "证金持股", - "养老概念", - "冷链物流", - "贬值受益", - "纾困概念", - "阿里概念", - "深圳特区", - "超级品牌", - "中超概念", - "养老金", - "专精特新", - "统一大市场", - "光伏高速公路", - "核污染防治", - "磁悬浮概念", - "垃圾分类", - "电子烟", - "工业大麻", - "全息技术", - "超导概念", - "北交所概念", - "赛马概念", - "体育产业", - "雄安新区", - "共享经济", - "彩票概念", - "苹果概念", - "供销社概念", - "水利建设", - "3D打印", - "创投", - "猪肉概念", - "字节概念", - "海洋经济", - "上海自贸", - "一带一路", - "碳交易", - "宠物经济" - ] -} \ No newline at end of file diff --git a/src/zvt/tag/industry_main_tag_mapping.json b/src/zvt/tag/industry_main_tag_mapping.json new file mode 100644 index 00000000..7465ddd5 --- /dev/null +++ b/src/zvt/tag/industry_main_tag_mapping.json @@ -0,0 +1,88 @@ +{ + "风电设备": "新能源", + "电池": "新能源", + "光伏设备": "新能源", + "能源金属": "新能源", + "电源设备": "新能源", + "半导体": "半导体", + "电子化学品": "半导体", + "医疗服务": "医药", + "中药": "医药", + "化学制药": "医药", + "生物制品": "医药", + "医药商业": "医药", + "医疗器械": "医疗器械", + "贸易行业": "大消费", + "家用轻工": "大消费", + "造纸印刷": "大消费", + "酿酒行业": "大消费", + "珠宝首饰": "大消费", + "美容护理": "大消费", + "食品饮料": "大消费", + "旅游酒店": "大消费", + "商业百货": "大消费", + "纺织服装": "大消费", + "家电行业": "大消费", + "小金属": "资源", + "贵金属": "资源", + "有色金属": "资源", + "煤炭行业": "资源", + "石油行业": "资源", + "燃气": "资源", + "采掘行业": "资源", + "消费电子": "消费电子", + "电子元件": "消费电子", + "光学光电子": "消费电子", + "汽车零部件": "汽车", + "汽车服务": "汽车", + "汽车整车": "汽车", + "电机": "智能机器", + "通用设备": "智能机器", + "专用设备": "智能机器", + "仪器仪表": "智能机器", + "电网设备": "电力", + "电力行业": "电力", + "房地产开发": "房地产", + "房地产服务": "房地产", + "工程建设": "房地产", + "水泥建材": "房地产", + "装修装饰": "房地产", + "装修建材": "房地产", + "工程咨询服务": "房地产", + "钢铁行业": "房地产", + "工程机械": "房地产", + "非金属材料": "材料", + "包装材料": "材料", + "化学制品": "材料", + "化肥行业": "材料", + "化学原料": "材料", + "化纤行业": "材料", + "塑料制品": "材料", + "玻璃玻纤": "材料", + "橡胶制品": "材料", + "交运设备": "公用", + "航运港口": "公用", + "公用事业": "公用", + "航空机场": "公用", + "环保行业": "公用", + "铁路公路": "公用", + "证券": "金融", + "保险": "金融", + "银行": "金融", + "多元金融": "金融", + "通信服务": "通信", + "通信设备": "通信", + "互联网服务": "AI", + "软件开发": "AI", + "计算机设备": "AI", + "文化传媒": "文化传媒", + "教育": "文化传媒", + "游戏": "文化传媒", + "农牧饲渔": "农业", + "农药兽药": "农业", + "物流行业": "物流", + "航天航空": "军工", + "船舶制造": "军工", + "专业服务": "专业服务", + "综合行业": "综合行业" +} \ No newline at end of file diff --git a/src/zvt/tag/industry_mapping.json b/src/zvt/tag/industry_mapping.json deleted file mode 100644 index 4ae817e2..00000000 --- a/src/zvt/tag/industry_mapping.json +++ /dev/null @@ -1,133 +0,0 @@ -{ - "新能源": [ - "风电设备", - "电池", - "光伏设备", - "能源金属", - "电源设备" - ], - "半导体": [ - "半导体", - "电子化学品" - ], - "医药": [ - "医疗服务", - "中药", - "化学制药", - "生物制品", - "医药商业" - ], - "医疗器械": [ - "医疗器械" - ], - "大消费": [ - "贸易行业", - "家用轻工", - "造纸印刷", - "酿酒行业", - "珠宝首饰", - "美容护理", - "食品饮料", - "旅游酒店", - "商业百货", - "纺织服装", - "家电行业" - ], - "资源": [ - "小金属", - "贵金属", - "有色金属", - "煤炭行业", - "石油行业", - "燃气", - "采掘行业" - ], - "消费电子": [ - "消费电子", - "电子元件", - "光学光电子" - ], - "汽车": [ - "汽车零部件", - "汽车服务", - "汽车整车" - ], - "智能机器": [ - "电机", - "通用设备", - "专用设备", - "仪器仪表" - ], - "电力": [ - "电网设备", - "电力行业" - ], - "房地产": [ - "房地产开发", - "房地产服务", - "工程建设", - "水泥建材", - "装修装饰", - "装修建材", - "工程咨询服务", - "钢铁行业", - "工程机械" - ], - "材料": [ - "非金属材料", - "包装材料", - "化学制品", - "化肥行业", - "化学原料", - "化纤行业", - "塑料制品", - "玻璃玻纤", - "橡胶制品" - ], - "公用": [ - "交运设备", - "航运港口", - "公用事业", - "航空机场", - "环保行业", - "铁路公路" - ], - "金融": [ - "证券", - "保险", - "银行", - "多元金融" - ], - "通信": [ - "通信服务", - "通信设备" - ], - "AI": [ - "互联网服务", - "软件开发", - "计算机设备" - ], - "文化传媒": [ - "文化传媒", - "教育", - "游戏" - ], - "农业": [ - "农牧饲渔", - "农药兽药" - ], - "物流": [ - "物流行业" - ], - "军工": [ - "航天航空", - "船舶制造" - ], - "专业服务": [ - "专业服务" - ], - "综合行业": [ - "综合行业" - ], - "未知": [] -} \ No newline at end of file diff --git a/src/zvt/tag/main_tags_from_concept.json b/src/zvt/tag/main_tags_from_concept.json deleted file mode 100644 index 9240971d..00000000 --- a/src/zvt/tag/main_tags_from_concept.json +++ /dev/null @@ -1,3 +0,0 @@ -[ - "军工" -] diff --git a/src/zvt/tag/tag_service.py b/src/zvt/tag/tag_service.py index 833fc6e7..1edc250c 100644 --- a/src/zvt/tag/tag_service.py +++ b/src/zvt/tag/tag_service.py @@ -4,7 +4,7 @@ import pandas as pd import sqlalchemy -from sqlalchemy import func, text +from sqlalchemy import func import zvt.contract.api as contract_api from zvt.api import get_trade_dates @@ -22,8 +22,7 @@ industry_to_main_tag, get_sub_tags, build_initial_stock_pool_info, - get_main_tags_from_sub_tag, - get_main_tag_concept_mapping, + get_concept_main_tag_mapping, ) from zvt.tag.tagger import StockTagger from zvt.utils import ( @@ -34,7 +33,6 @@ date_time_by_interval, TIME_FORMAT_DAY, current_date, - pd_is_not_null, ) from zvt.utils.utils import fill_dict @@ -231,10 +229,16 @@ def build_sub_tags(self): else: sub_tag_reason = f"来自概念:{sub_tag}" + main_tag = get_concept_main_tag_mapping().get(sub_tag) + main_tag_reason = sub_tag_reason + if main_tag == "其他": + main_tag = current_stock_tags.main_tag + main_tag_reason = current_stock_tags.main_tag_reason + set_stock_tags_model = SetStockTagsModel( entity_id=entity_id, - main_tag=current_stock_tags.main_tag, - main_tag_reason=current_stock_tags.main_tag_reason, + main_tag=main_tag, + main_tag_reason=main_tag_reason, sub_tag=sub_tag, sub_tag_reason=sub_tag_reason, active_hidden_tags=None, @@ -248,89 +252,9 @@ def build_sub_tags(self): else: logger.info(f"ignore {sub_tag} not in sub_tag_info yet") - def build_system_tags(self): - latest = StockSystemTags.query_data(limit=1, order=StockSystemTags.timestamp.desc(), return_type="domain") - if latest: - start = date_time_by_interval(to_time_str(latest[0].timestamp, fmt=TIME_FORMAT_DAY)) - else: - start = "2018-01-01" - trade_days = get_trade_dates(start=start, end=current_date()) - - entity_df = Stock.query_data( - provider="em", entity_ids=self.entity_ids, columns=["entity_id", "code", "name"], index="entity_id" - ) - entity_df = entity_df[["entity_id", "code", "name"]] - - for target_date in trade_days: - logger.info(f"build_system_tags to: {target_date}") - events_list = ["增减持计划", "增发", "配股", "业绩预告", "股票回购", "股东增减持", "诉讼仲裁", "违规处罚", "高管及关联方增减持", "龙虎榜"] - recent_30_days = date_time_by_interval(target_date, -30) - future_30_days = date_time_by_interval(target_date, 30) - df1 = StockEvents.query_data( - entity_ids=self.entity_ids, - start_timestamp=recent_30_days, - end_timestamp=target_date, - filters=[StockEvents.event_type.in_(events_list)], - ) - - forecast_events_list = [ - "限售解禁", - ] - df2 = StockEvents.query_data( - entity_ids=self.entity_ids, - start_timestamp=recent_30_days, - end_timestamp=future_30_days, - filters=[StockEvents.event_type.in_(forecast_events_list)], - ) - df = pd.concat([df1, df2]) - df["recent_reduction"] = ((df["event_type"] == "增减持计划") & df["level1_content"].str.contains("减持")) | ( - df["specific_event_type"] == "股东减持" - ) - df["recent_acquisition"] = ( - ((df["event_type"] == "增减持计划") & df["level1_content"].str.contains("增持")) - | (df["specific_event_type"] == "股东增持") - | (df["event_type"] == "股票回购") - ) - - # 解禁 - df["recent_unlock"] = df["event_type"] == "限售解禁" - # 增发配股 - df["recent_additional_or_rights_issue"] = (df["event_type"] == "增发") | (df["event_type"] == "配股") - - # 违规 - df["recent_violation_alert"] = df["event_type"] == "违规处罚" - - # 业绩向好 - df["recent_positive_earnings_news"] = (df["event_type"] == "业绩预告") & ( - df["level1_content"].str.contains("预增|略增|扭亏", regex=True) - ) - # 业绩不行 - df["recent_negative_earnings_news"] = (df["event_type"] == "业绩预告") & ( - df["level1_content"].str.contains("预减|略减|续亏|首亏", regex=True) - ) - - grouped_df = df.groupby("entity_id").agg( - recent_reduction=("recent_reduction", "any"), - recent_acquisition=("recent_acquisition", "any"), - recent_unlock=("recent_unlock", "any"), - recent_additional_or_rights_issue=("recent_additional_or_rights_issue", "any"), - recent_violation_alert=("recent_violation_alert", "any"), - recent_positive_earnings_news=("recent_positive_earnings_news", "any"), - recent_negative_earnings_news=("recent_negative_earnings_news", "any"), - recent_dragon_and_tiger_count=( - "event_type", - lambda x: (x == "龙虎榜").astype(int).sum(), - ), - ) - grouped_df["timestamp"] = to_pd_timestamp(target_date) - result_df = pd.concat([grouped_df, entity_df[entity_df.index.isin(grouped_df.index)]], axis=1) - result_df["id"] = result_df["entity_id"] + "_" + to_time_str(target_date, fmt=TIME_FORMAT_DAY) - contract_api.df_to_db(result_df, data_schema=StockSystemTags, force_update=False, provider="zvt") - def tag(self): self.build_stock_main_tag() self.build_sub_tags() - self.build_system_tags() def build_stock_pool_info(create_stock_pool_info_model: CreateStockPoolInfoModel, timestamp): @@ -447,33 +371,46 @@ def query_stock_tag_stats(query_stock_tag_stats_model: QueryStockTagStatsModel): return tag_stats_list -def refresh_main_tag_by_sub_tag(main_tags_need_refresh=None): - if not main_tags_need_refresh: - main_tags_need_refresh = get_main_tags_from_sub_tag() - for main_tag in main_tags_need_refresh: - logger.info(f"refresh stock_tags for main_tag:{main_tag}") - for tag, sub_tags in get_main_tag_concept_mapping().items(): - if main_tag == tag: - stock_tags: List[StockTags] = StockTags.query_data( - filters=[StockTags.sub_tag.in_(sub_tags), StockTags.main_tag != main_tag], return_type="domain" - ) - if stock_tags: - for stock_tag in stock_tags: - set_stock_tags_model = SetStockTagsModel( - entity_id=stock_tag.entity_id, - main_tag=main_tag, - main_tag_reason=stock_tag.sub_tag_reason, - sub_tag=stock_tag.sub_tag, - sub_tag_reason=stock_tag.sub_tag_reason, - active_hidden_tags=stock_tag.active_hidden_tags, - ) - logger.info(f"set_stock_tags_model:{set_stock_tags_model}") - build_stock_tags( - set_stock_tags_model=set_stock_tags_model, - timestamp=stock_tag.timestamp, - set_by_user=False, - keep_current=False, - ) +def refresh_main_tag_by_sub_tag(stock_tag: StockTags) -> StockTags: + if not stock_tag.sub_tags: + logger.warning(f"{stock_tag.entity_id} has no sub_tags yet") + return stock_tag + + sub_tag = stock_tag.sub_tag + sub_tag_reason = stock_tag.sub_tags[sub_tag] + + main_tag = get_concept_main_tag_mapping().get(sub_tag) + main_tag_reason = sub_tag_reason + if main_tag == "其他": + main_tag = stock_tag.main_tag + main_tag_reason = stock_tag.main_tag_reason + + set_stock_tags_model = SetStockTagsModel( + entity_id=stock_tag.entity_id, + main_tag=main_tag, + main_tag_reason=main_tag_reason, + sub_tag=sub_tag, + sub_tag_reason=sub_tag_reason, + active_hidden_tags=stock_tag.active_hidden_tags, + ) + logger.info(f"set_stock_tags_model:{set_stock_tags_model}") + + return build_stock_tags( + set_stock_tags_model=set_stock_tags_model, + timestamp=stock_tag.timestamp, + set_by_user=False, + keep_current=False, + ) + + +def refresh_all_main_tag_by_sub_tag(): + with contract_api.DBSession(provider="zvt", data_schema=StockTags)() as session: + stock_tags = StockTags.query_data( + session=session, + return_type="domain", + ) + for stock_tag in stock_tags: + refresh_main_tag_by_sub_tag(stock_tag) def activate_sub_tags(activate_sub_tags_model: ActivateSubTagsModel): @@ -489,28 +426,111 @@ def activate_sub_tags(activate_sub_tags_model: ActivateSubTagsModel): ) entity_ids = df["entity_id"].tolist() - df = StockTags.query_data( + stock_tags = StockTags.query_data( entity_ids=entity_ids, filters=[func.json_extract(StockTags.sub_tags, f'$."{sub_tag}"') != None], - return_type="df", + return_type="domain", ) - if pd_is_not_null(df): - entity_ids_to_set = df["entity_id"].tolist() - logger.info(f"set {entity_ids_to_set} sub_tag to {sub_tag}") - sql = text(f"update stock_tags set sub_tag = '{sub_tag}' where entity_id in {tuple(entity_ids_to_set)}") - session.execute(sql) - session.commit() - result[sub_tag] = entity_ids_to_set + if not stock_tags: + logger.info(f"all stocks with sub_tag: {sub_tag} has been activated") + continue + for stock_tag in stock_tags: + stock_tag.sub_tag = sub_tag + result[stock_tag.entity_id] = refresh_main_tag_by_sub_tag(stock_tag) + return result + + +def build_system_tags(): + entity_ids = get_entity_ids_by_filter(provider="em", ignore_delist=True, ignore_st=False, ignore_new_stock=False) + + latest = StockSystemTags.query_data(limit=1, order=StockSystemTags.timestamp.desc(), return_type="domain") + if latest: + start = date_time_by_interval(to_time_str(latest[0].timestamp, fmt=TIME_FORMAT_DAY)) + else: + start = "2018-01-01" + trade_days = get_trade_dates(start=start, end=current_date()) + + entity_df = Stock.query_data( + provider="em", entity_ids=entity_ids, columns=["entity_id", "code", "name"], index="entity_id" + ) + entity_df = entity_df[["entity_id", "code", "name"]] + + for target_date in trade_days: + logger.info(f"build_system_tags to: {target_date}") + events_list = ["增减持计划", "增发", "配股", "业绩预告", "股票回购", "股东增减持", "诉讼仲裁", "违规处罚", "高管及关联方增减持", "龙虎榜"] + recent_30_days = date_time_by_interval(target_date, -30) + future_30_days = date_time_by_interval(target_date, 30) + df1 = StockEvents.query_data( + entity_ids=entity_ids, + start_timestamp=recent_30_days, + end_timestamp=target_date, + filters=[StockEvents.event_type.in_(events_list)], + ) + + forecast_events_list = [ + "限售解禁", + ] + df2 = StockEvents.query_data( + entity_ids=entity_ids, + start_timestamp=recent_30_days, + end_timestamp=future_30_days, + filters=[StockEvents.event_type.in_(forecast_events_list)], + ) + df = pd.concat([df1, df2]) + df["recent_reduction"] = ((df["event_type"] == "增减持计划") & df["level1_content"].str.contains("减持")) | ( + df["specific_event_type"] == "股东减持" + ) + df["recent_acquisition"] = ( + ((df["event_type"] == "增减持计划") & df["level1_content"].str.contains("增持")) + | (df["specific_event_type"] == "股东增持") + | (df["event_type"] == "股票回购") + ) + + # 解禁 + df["recent_unlock"] = df["event_type"] == "限售解禁" + # 增发配股 + df["recent_additional_or_rights_issue"] = (df["event_type"] == "增发") | (df["event_type"] == "配股") + + # 违规 + df["recent_violation_alert"] = df["event_type"] == "违规处罚" + + # 业绩向好 + df["recent_positive_earnings_news"] = (df["event_type"] == "业绩预告") & ( + df["level1_content"].str.contains("预增|略增|扭亏", regex=True) + ) + # 业绩不行 + df["recent_negative_earnings_news"] = (df["event_type"] == "业绩预告") & ( + df["level1_content"].str.contains("预减|略减|续亏|首亏", regex=True) + ) + + grouped_df = df.groupby("entity_id").agg( + recent_reduction=("recent_reduction", "any"), + recent_acquisition=("recent_acquisition", "any"), + recent_unlock=("recent_unlock", "any"), + recent_additional_or_rights_issue=("recent_additional_or_rights_issue", "any"), + recent_violation_alert=("recent_violation_alert", "any"), + recent_positive_earnings_news=("recent_positive_earnings_news", "any"), + recent_negative_earnings_news=("recent_negative_earnings_news", "any"), + recent_dragon_and_tiger_count=( + "event_type", + lambda x: (x == "龙虎榜").astype(int).sum(), + ), + ) + grouped_df["timestamp"] = to_pd_timestamp(target_date) + result_df = pd.concat([grouped_df, entity_df[entity_df.index.isin(grouped_df.index)]], axis=1) + result_df["id"] = result_df["entity_id"] + "_" + to_time_str(target_date, fmt=TIME_FORMAT_DAY) + contract_api.df_to_db(result_df, data_schema=StockSystemTags, force_update=False, provider="zvt") if __name__ == "__main__": - # refresh_main_tag_by_sub_tag() - activate_sub_tags(ActivateSubTagsModel(sub_tags=["军民融合"])) + # refresh_all_main_tag_by_sub_tag() + activate_sub_tags(ActivateSubTagsModel(sub_tags=["小金属概念"])) # build_initial_main_tag_info() # build_initial_sub_tag_info() # build_initial_hidden_tag_info() # build_initial_stock_pool_info() # StockAutoTagger().tag() + # the __all__ is generated __all__ = [ "stock_tags_need_update", diff --git a/src/zvt/tag/tag_stats.py b/src/zvt/tag/tag_stats.py index 82f4325a..97b5b848 100644 --- a/src/zvt/tag/tag_stats.py +++ b/src/zvt/tag/tag_stats.py @@ -10,23 +10,11 @@ from zvt.factors.top_stocks import TopStocks, get_top_stocks from zvt.tag import CreateStockPoolsModel, build_stock_pool from zvt.tag.tag_schemas import TagStats, StockTags, StockPools -from zvt.tag.tag_utils import get_main_tag_concept_mapping -from zvt.utils import to_pd_timestamp, pd_is_not_null, date_time_by_interval, to_time_str, TIME_FORMAT_DAY - +from zvt.utils import to_pd_timestamp, pd_is_not_null, date_time_by_interval logger = logging.getLogger(__name__) -def sub_tag_to_main_tag_group(name, sub_tag, main_tag): - for main_tag_group, sub_tags in get_main_tag_concept_mapping().items(): - if sub_tag in sub_tags: - if main_tag_group == "其他": - return main_tag - if main_tag_group != main_tag: - logger.info(f"{name} use {main_tag_group} instead of [{main_tag}]") - return main_tag_group - - def build_system_stock_pools(): for stock_pool_name in ["main_line", "vol_up"]: datas = StockPools.query_data( @@ -123,10 +111,6 @@ def build_stock_pool_tag_stats(stock_pool_name, force_rebuild_latest=False): df = pd.concat([tags_df, kdata_df[["turnover", "name"]]], axis=1) - df["main_tag"] = df.apply( - lambda row: sub_tag_to_main_tag_group(row["name"], row["sub_tag"], row["main_tag"]), axis=1 - ) - grouped_df = ( df.groupby("main_tag") .agg( @@ -175,4 +159,4 @@ def build_stock_pool_tag_stats(stock_pool_name, force_rebuild_latest=False): build_stock_pool_tag_stats(stock_pool_name="main_line", force_rebuild_latest=True) # build_stock_pool_tag_stats(stock_pool_name="vol_up") # the __all__ is generated -__all__ = ["sub_tag_to_main_tag_group", "build_system_stock_pools", "build_stock_pool_tag_stats"] +__all__ = ["build_system_stock_pools", "build_stock_pool_tag_stats"] diff --git a/src/zvt/tag/tag_utils.py b/src/zvt/tag/tag_utils.py index fdef52eb..171144c3 100644 --- a/src/zvt/tag/tag_utils.py +++ b/src/zvt/tag/tag_utils.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- import json import os -from typing import List +from typing import List, Dict import pandas as pd @@ -15,25 +15,36 @@ ) -def get_main_tag_industry_mapping() -> dict: - with open(os.path.join(os.path.dirname(__file__), "industry_mapping.json"), encoding="utf-8") as f: +def get_industry_main_tag_mapping() -> Dict[str, str]: + with open(os.path.join(os.path.dirname(__file__), "industry_main_tag_mapping.json"), encoding="utf-8") as f: return json.load(f) -def get_main_tag_concept_mapping() -> dict: - with open(os.path.join(os.path.dirname(__file__), "concept_mapping.json"), encoding="utf-8") as f: - return json.load(f) +def get_main_tag_industry_mapping() -> Dict[str, List[str]]: + mapping = get_industry_main_tag_mapping() + result = {} + for industry, main_tag in mapping.items(): + result.setdefault(main_tag, []) + result.get(main_tag).append(industry) + return result -def get_main_tags_from_sub_tag() -> List[str]: - with open(os.path.join(os.path.dirname(__file__), "main_tags_from_concept.json"), encoding="utf-8") as f: +def get_concept_main_tag_mapping() -> Dict[str, str]: + with open(os.path.join(os.path.dirname(__file__), "concept_main_tag_mapping.json"), encoding="utf-8") as f: return json.load(f) +def get_main_tag_concept_mapping() -> Dict[str, List[str]]: + mapping = get_concept_main_tag_mapping() + result = {} + for concept, main_tag in mapping.items(): + result.setdefault(main_tag, []) + result.get(main_tag).append(concept) + return result + + def get_initial_sub_tags() -> List[str]: - mapping = get_main_tag_concept_mapping() - current_concept_list = flatten_list([mapping.get(tag) for tag in mapping]) - return current_concept_list + return list(get_concept_main_tag_mapping().keys()) def get_industry_list(): @@ -52,14 +63,12 @@ def get_concept_list(): def check_missed_industry(): - mapping = get_main_tag_industry_mapping() - current_industry_list = flatten_list([mapping.get(tag) for tag in mapping]) + current_industry_list = get_industry_main_tag_mapping().keys() return list(set(get_industry_list()) - set(current_industry_list)) def check_missed_concept(): - mapping = get_main_tag_concept_mapping() - current_concept_list = flatten_list([mapping.get(tag) for tag in mapping]) + current_concept_list = get_concept_main_tag_mapping().keys() return list(set(get_concept_list()) - set(current_concept_list)) @@ -69,13 +78,13 @@ def get_initial_main_tag_info(): from_industry = [ { - "id": f"{entity_id}_{tag}", + "id": f"{entity_id}_{main_tag}", "entity_id": entity_id, "timestamp": timestamp, - "tag": tag, - "tag_reason": f"来自这些行业:{','.join(industries)}", + "tag": main_tag, + "tag_reason": f"来自这些行业:{industry}", } - for tag, industries in get_main_tag_industry_mapping().items() + for industry, main_tag in get_industry_main_tag_mapping().items() ] from_concept = [] @@ -150,10 +159,7 @@ def get_initial_hidden_tag_info(): def industry_to_main_tag(industry): - for main_tag, industries in get_main_tag_industry_mapping().items(): - if industry in industries: - return main_tag - return industry + return get_industry_main_tag_mapping().get(industry, default=industry) def build_initial_main_tag_info(): @@ -205,7 +211,15 @@ def get_stock_pool_names(): # json.dump(check_missed_concept(), json_file, indent=2, ensure_ascii=False) # with open("missed_industry.json", "w") as json_file: # json.dump(check_missed_industry(), json_file, indent=2, ensure_ascii=False) - print(industry_to_main_tag("光伏设备")) + # print(industry_to_main_tag("光伏设备")) + # result = {} + # for main_tag, concepts in get_main_tag_industry_mapping().items(): + # for tag in concepts: + # result[tag] = main_tag + # with open("industry_main_tag_mapping.json", "w") as json_file: + # json.dump(result, json_file, indent=2, ensure_ascii=False) + print(list(get_concept_main_tag_mapping().keys())) + # the __all__ is generated __all__ = [ diff --git a/src/zvt/trading/common.py b/src/zvt/trading/common.py index f8e3a73b..f3d7c73e 100644 --- a/src/zvt/trading/common.py +++ b/src/zvt/trading/common.py @@ -4,6 +4,5 @@ class ExecutionStatus(Enum): init = "init" - pending = "pending" success = "success" failed = "failed" diff --git a/src/zvt/trading/trading_service.py b/src/zvt/trading/trading_service.py index cd5a4e2c..346439ee 100644 --- a/src/zvt/trading/trading_service.py +++ b/src/zvt/trading/trading_service.py @@ -1,4 +1,5 @@ # -*- coding: utf-8 -*- +import logging from typing import List, Union import pandas as pd @@ -13,6 +14,8 @@ from zvt.trading.trading_schemas import TradingPlan from zvt.utils import to_time_str, to_pd_timestamp, now_pd_timestamp, date_time_by_interval, current_date +logger = logging.getLogger(__name__) + def build_trading_plan(build_trading_plan_model: BuildTradingPlanModel): with contract_api.DBSession(provider="zvt", data_schema=TradingPlan)() as session: @@ -89,31 +92,13 @@ def get_future_trading_plan(): ) -class DayTrader(StockTrader): - def __init__( - self, - entity_ids: List[str] = None, - start_timestamp: Union[str, pd.Timestamp] = None, - end_timestamp: Union[str, pd.Timestamp] = None, - provider: str = "qmt", - level: Union[str, IntervalLevel] = IntervalLevel.LEVEL_1MIN, - profit_threshold=(3, -0.3), - keep_history=True, - ) -> None: - super().__init__( - entity_ids=entity_ids, - exchanges=None, - codes=None, - start_timestamp=start_timestamp, - end_timestamp=end_timestamp, - provider=provider, - level=level, - trader_name="day_trader", - real_time=True, - kdata_use_begin_time=False, - draw_result=False, - rich_mode=False, - adjust_type=AdjustType.qfq, - profit_threshold=profit_threshold, - keep_history=keep_history, +def check_trading_plan(): + with contract_api.DBSession(provider="zvt", data_schema=TradingPlan)() as session: + plans = TradingPlan.query_data( + session=session, + filters=[TradingPlan.status == ExecutionStatus.init.value, TradingPlan.trading_date == current_date()], + order=TradingPlan.trading_date.asc(), + return_type="domain", ) + + logger.info(f"current plans:{plans}") diff --git a/src/zvt/utils/inform_utils.py b/src/zvt/utils/inform_utils.py new file mode 100644 index 00000000..0f21824e --- /dev/null +++ b/src/zvt/utils/inform_utils.py @@ -0,0 +1,16 @@ +# -*- coding: utf-8 -*- +from zvt import zvt_config +from zvt.contract.api import get_entities +from zvt.informer import EmailInformer + + +def inform_email(entity_ids, entity_type, target_date, title, provider): + msg = "no targets" + if entity_ids: + entities = get_entities(provider=provider, entity_type=entity_type, entity_ids=entity_ids, return_type="domain") + assert len(entities) == len(entity_ids) + + infos = [f"{entity.name}({entity.code})" for entity in entities] + msg = "\n".join(infos) + "\n" + + EmailInformer().send_message(zvt_config["email_username"], f"{target_date} {title}", msg) diff --git a/examples/recorder_utils.py b/src/zvt/utils/recorder_utils.py similarity index 75% rename from examples/recorder_utils.py rename to src/zvt/utils/recorder_utils.py index e86d216e..a7c70577 100644 --- a/examples/recorder_utils.py +++ b/src/zvt/utils/recorder_utils.py @@ -23,9 +23,9 @@ def run_data_recorder( logger.info(f" record data: {domain.__name__}, entity_provider: {entity_provider}, data_provider: {data_provider}") unfinished_entity_ids = entity_ids - while retry_times > 0: - email_action = EmailInformer() + email_action = EmailInformer() + while retry_times > 0: try: if return_unfinished: unfinished_entity_ids = domain.record_data( @@ -35,19 +35,9 @@ def run_data_recorder( return_unfinished=return_unfinished, **recorder_kv, ) - if not unfinished_entity_ids: - unfinished_entity_ids = [] - logger.info(f"unfinished_entity_ids({len(unfinished_entity_ids)}): {unfinished_entity_ids}") if unfinished_entity_ids: - time.sleep(60 * 2) - retry_times = retry_times - 1 - if retry_times == 0: - email_action.send_message( - zvt_config["email_username"], - f"record {domain.__name__} error", - f"record {domain.__name__} error: {e}", - ) - continue + logger.info(f"unfinished_entity_ids({len(unfinished_entity_ids)}): {unfinished_entity_ids}") + raise Exception("Would retry with unfinished latter!") else: domain.record_data( entity_ids=entity_ids,