-
-
Notifications
You must be signed in to change notification settings - Fork 117
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* test data load process and fix bug * fix bug when evaluating * refine json content --------- Co-authored-by: USTCKevinF <[email protected]> Co-authored-by: xuyang1 <[email protected]>
- Loading branch information
1 parent
8f95638
commit 479d441
Showing
8 changed files
with
87 additions
and
49 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
{ | ||
"Turnover_Rate_Factor": { | ||
"description": "A traditional factor based on 20-day average turnover rate, adjusted for market capitalization, which is further improved by applying the information distribution theory.", | ||
"formulation": "\\text{Adjusted Turnover Rate} = \\frac{\\text{mean}(20\\text{-day turnover rate})}{\\text{Market Capitalization}}", | ||
"variables": { | ||
"20-day turnover rate": "Average turnover rate over the past 20 days.", | ||
"Market Capitalization": "Total market value of a company's outstanding shares." | ||
}, | ||
"gt_code": "import pandas as pd\n\ndata_f = pd.read_hdf('daily_f.h5')\n\ndata = data_f.reset_index()\nwindow_size = 20\n\nnominator=data.groupby('instrument')[['30\u65e5\u6362\u624b\u7387']].rolling(window=window_size).mean().reset_index(0, drop=True)\n# transfer to series\nnew=nominator['30\u65e5\u6362\u624b\u7387']\ndata['Turnover_Rate_Factor']=new/data['\u6d41\u901aA\u80a1']\n\n# # set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['Turnover_Rate_Factor']).set_index(data_f.index)\n\n# transfer the result to series\nresult=result['Turnover_Rate_Factor']\nresult.to_hdf(\"result.h5\", key=\"data\")\n" | ||
}, | ||
"PctTurn20": { | ||
"description": "A factor representing the percentage change in turnover rate over the past 20 trading days, market-value neutralized.", | ||
"formulation": "\\text{PctTurn20} = \\frac{1}{N} \\sum_{i=1}^{N} \\left( \\frac{\\text{Turnover}_{i, t} - \\text{Turnover}_{i, t-20}}{\\text{Turnover}_{i, t-20}} \\right)", | ||
"variables": { | ||
"N": "Number of stocks in the market.", | ||
"Turnover_{i, t}": "Turnover of stock i at day t.", | ||
"Turnover_{i, t-20}": "Turnover of stock i at day t-20." | ||
}, | ||
"gt_code": "import pandas as pd\nfrom statsmodels import api as sm\n\n\ndef fill_mean(s: pd.Series) -> pd.Series:\n return s.fillna(s.mean()).fillna(0.0)\n\n\ndef market_value_neutralize(s: pd.Series, mv: pd.Series) -> pd.Series:\n s = s.groupby(\"datetime\", group_keys=False).apply(fill_mean)\n mv = mv.groupby(\"datetime\", group_keys=False).apply(fill_mean)\n\n df_f = mv.to_frame(\"\u5e02\u503c\")\n df_f[\"const\"] = 1\n X = df_f[[\"\u5e02\u503c\", \"const\"]]\n\n # Perform the Ordinary Least Squares (OLS) regression\n model = sm.OLS(s, X)\n results = model.fit()\n\n # Calculate the residuals\n df_f[\"residual\"] = results.resid\n df_f[\"norm_resi\"] = df_f.groupby(level=\"datetime\", group_keys=False)[\"residual\"].apply(\n lambda x: (x - x.mean()) / x.std(),\n )\n return df_f[\"norm_resi\"]\n\n\n# get_turnover\ndf_pv = pd.read_hdf(\"daily_pv.h5\", key=\"data\")\ndf_f = pd.read_hdf(\"daily_f.h5\", key=\"data\")\nturnover = df_pv[\"$money\"] / df_f[\"\u6d41\u901a\u5e02\u503c\"]\n\nf = turnover.groupby(\"instrument\").pct_change(periods=20)\n\nf_neutralized = market_value_neutralize(f, df_f[\"\u6d41\u901a\u5e02\u503c\"])\n\nf_neutralized.to_hdf(\"result.h5\", key=\"data\")\n" | ||
}, | ||
"PB_ROE": { | ||
"description": "Constructed using the ranking difference between PB and ROE, with PB and ROE replacing original PB and ROE to obtain reconstructed factor values.", | ||
"formulation": "\\text{rank}(PB\\_t) - rank(ROE_t)", | ||
"variables": { | ||
"\\text{rank}(PB_t)": "Ranking PB on cross-section at time t.", | ||
"\\text{rank}(ROE_t)": "Ranking single-quarter ROE on cross-section at time t." | ||
}, | ||
"gt_code": "#!/usr/bin/env python\n\nimport pandas as pd\n\ndata_f = pd.read_hdf('daily_f.h5')\n\ndata = data_f.reset_index()\n\n# Calculate the rank of PB and ROE\ndata['PB_rank'] = data.groupby('datetime')['B/P'].rank()\ndata['ROE_rank'] = data.groupby('datetime')['ROE'].rank()\n\n# Calculate the difference between the ranks\ndata['PB_ROE'] = data['PB_rank'] - data['ROE_rank']\n\n# set the datetime and instrument as index and drop the original index\nresult=pd.DataFrame(data['PB_ROE']).set_index(data_f.index)\n\n# transfer the result to series\nresult=result['PB_ROE']\nresult.to_hdf(\"result.h5\", key=\"data\")\n" | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters