diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..3fcd1ed --- /dev/null +++ b/.gitignore @@ -0,0 +1,3 @@ +*.egg-info +__pycache__/ +*.pyc \ No newline at end of file diff --git a/cot_reports/cot_reports.py b/cot_reports/cot_reports.py index 11a83ec..dd6e0ac 100644 --- a/cot_reports/cot_reports.py +++ b/cot_reports/cot_reports.py @@ -1,3 +1,4 @@ +import os import pandas as pd import requests, zipfile, io from datetime import date @@ -5,7 +6,7 @@ # cot_hist - downloads compressed bulk files -def cot_hist(cot_report_type = "legacy_fut"): +def cot_hist(cot_report_type = "legacy_fut", store_txt=True, verbose=True): '''Downloads the compressed COT report historical data of the selected report type starting from, depending on the selected report type, 1986, 1995 or 2006 until 2016 from the cftc.gov webpage as zip file, unzips the downloaded folder and returns @@ -32,39 +33,39 @@ def cot_hist(cot_report_type = "legacy_fut"): if cot_report_type== "legacy_fut": url_end = "deacot1986_2016" txt = "FUT86_16.txt" - print("Selected: COT Legacy report. Futures only.") + if verbose: print("Selected: COT Legacy report. Futures only.") elif cot_report_type == "legacy_futopt": url_end = "deahistfo_1995_2016" txt = "Com95_16.txt" - print("Selected: COT Legacy report. Futures and Options.") + if verbose: print("Selected: COT Legacy report. Futures and Options.") elif cot_report_type == "supplemental_futopt": url_end = "dea_cit_txt_2006_2016" txt = "CIT06_16.txt" - print("Selected: COT Sumpplemental report. Futures and Options.") + if verbose: print("Selected: COT Sumpplemental report. Futures and Options.") elif cot_report_type == "disaggregated_fut": url_end = "fut_disagg_txt_hist_2006_2016" txt = "F_Disagg06_16.txt" - print("Selected: COT Disaggregated report. Futures only.") + if verbose: print("Selected: COT Disaggregated report. Futures only.") elif cot_report_type == "disaggregated_futopt": url_end = "com_disagg_txt_hist_2006_2016" txt = "C_Disagg06_16.txt" - print("Selected: COT Disaggregated report. Futures and Options.") + if verbose: print("Selected: COT Disaggregated report. Futures and Options.") elif cot_report_type == "traders_in_financial_futures_fut": url_end = "fin_fut_txt_2006_2016" txt = "F_TFF_2006_2016.txt" - print("Selected: COT Traders in Financial Futures report. Futures only.") + if verbose: print("Selected: COT Traders in Financial Futures report. Futures only.") elif cot_report_type == "traders_in_financial_futures_futopt": url_end = "fin_com_txt_2006_2016" txt = "C_TFF_2006_2016.txt" - print("Selected: COT Traders in Financial Futures report. Futures and Options.") + if verbose: print("Selected: COT Traders in Financial Futures report. Futures and Options.") except ValueError: - print("""Input needs to be either: + if verbose: print("""Input needs to be either: "legacy_fut", "legacy_futopt", supplemental_futopt", "disaggregated_fut", "disaggregated_futopt", "traders_in_financial_futures_fut" or @@ -74,8 +75,11 @@ def cot_hist(cot_report_type = "legacy_fut"): req = requests.get(cot_url) z = zipfile.ZipFile(io.BytesIO(req.content)) z.extractall() - print("Stored the extracted file", txt, "in the working directory.") df = pd.read_csv(txt, low_memory=False) + if store_txt: + if verbose: print("Stored the extracted file", txt, "in the working directory.") + else: + os.remove(txt) return df ## Example: @@ -83,7 +87,7 @@ def cot_hist(cot_report_type = "legacy_fut"): # cot_year - downloads single years -def cot_year(year = 2020, cot_report_type = "legacy_fut"): +def cot_year(year = 2020, cot_report_type = "legacy_fut", store_txt=True, verbose=True): '''Downloads the selected COT report historical data for a single year from the cftc.gov webpage as zip file, unzips the downloaded folder and returns the cot data as DataFrame. @@ -109,7 +113,7 @@ def cot_year(year = 2020, cot_report_type = "legacy_fut"): Raises: ValueError: Raises an exception and returns the argument options.''' - print("Selected:", cot_report_type) + if verbose: print("Selected:", cot_report_type) try: if cot_report_type== "legacy_fut": rep = "deacot" @@ -151,7 +155,11 @@ def cot_year(year = 2020, cot_report_type = "legacy_fut"): z = zipfile.ZipFile(io.BytesIO(r.content)) z.extractall() df = pd.read_csv(txt, low_memory=False) - print("Downloaded single year data from:", year) + if verbose: print("Downloaded single year data from:", year) + if store_txt: + if verbose: print("Stored the file", txt, "in the working directory.") + else: + os.remove(txt) return df ## Example: @@ -160,7 +168,7 @@ def cot_year(year = 2020, cot_report_type = "legacy_fut"): # cot_all - downloads complete available data of a chosen COT report type -def cot_all(cot_report_type="legacy_fut"): +def cot_all(cot_report_type="legacy_fut", store_txt=True, verbose=True): '''Downloads all historical data for the chosen COT report type (compressed historical bulk file and all remaining single year files) from the cftc.gov webpage as zip files, unzips the downloaded folders and returns the cot data files merged as a DataFrame. @@ -184,9 +192,9 @@ def cot_all(cot_report_type="legacy_fut"): Raises: ValueError: Raises an exception and returns the argument options.''' - df = cot_hist(cot_report_type) + df = cot_hist(cot_report_type, store_txt=store_txt, verbose=verbose) for i in range(2017, date.today().year+1): - years = pd.DataFrame(cot_year(i, cot_report_type)) + years = pd.DataFrame(cot_year(i, cot_report_type, store_txt=store_txt, verbose=verbose)) df = df.append(years, ignore_index=True) return df @@ -195,7 +203,7 @@ def cot_all(cot_report_type="legacy_fut"): # cot_all_reports - downloads complete available data for all COT report types -def cot_all_reports(): +def cot_all_reports(store_txt=True, verbose=True): '''Downloads all available historical information of all COT reports and returns a dataframe for each of the report types - seven in total. The function iterates through cot_reports.cot_all() for each cot report type. @@ -217,9 +225,9 @@ def cot_all_reports(): l = ["legacy_fut", "legacy_futopt", "supplemental_futopt", "disaggregated_fut", "disaggregated_futopt", "traders_in_financial_futures_fut", "traders_in_financial_futures_futopt"] for report in l: - print(report) + if verbose: print(report) temp = '{}'.format(report) - vars()[temp] = cot_all(cot_report_type=report) + vars()[temp] = cot_all(cot_report_type=report, store_txt=store_txt, verbose=verbose) return vars()['{}'.format("legacy_fut")],vars()['{}'.format("legacy_futopt")],vars()['{}'.format("supplemental_futopt")],\ vars()['{}'.format("disaggregated_fut")],vars()['{}'.format("disaggregated_futopt")],vars()['{}'.format("traders_in_financial_futures_fut")],\