From d9386bcf7136b21970de92ff6e5839997353dcc8 Mon Sep 17 00:00:00 2001
From: Anthony Hevia <anthonyhevia@microsoft.com>
Date: Sun, 17 Oct 2021 13:49:07 -0700
Subject: [PATCH 1/2] a cleaner package refactor

---
 cm_nlp/__init__.py                            |   1 +
 {src => cm_nlp}/tools/README.md               |   0
 .../tools/ontology_graph/__init__.py          |   0
 .../climate_mind_OWL_to_networkx.py           |   0
 .../tools/ontology_graph/install_graphviz.py  |   0
 .../Climate_Mind_DiGraph.gpickle              | Bin
 .../Climate_Mind_Digraph_Test_Ont.json        |   0
 .../tests_output_folder/output.csv            |   0
 .../preprocess_data/coreNLP/coreNLP_python.py |   0
 .../tools/preprocess_data/coref_resolution.py |   0
 cm_nlp/tools/preprocess_data/data_pipeline.py |   0
 .../tools/preprocess_data/example_data.jsonl  |   0
 .../tools/preprocess_data/helper_functions.py |   0
 .../entity_checkin_one/Dockerfile             |   0
 .../entity_checkin_one/checkin_one.jsonl      |   0
 .../entity_checkin_one/help.txt               |   0
 ...ause_effect_prediction_pos_sentences.jsonl |   0
 .../prodigy_deployments/prodigy/Dockerfile    |   0
 .../prodigy_deployments/prodigy/data.jsonl    |   0
 .../data_split_on_sentences_latest.jsonl      |   0
 .../prodigy/prodigy_empty.json                |   0
 .../prodigy_app2/Dockerfile                   |   0
 ...sification_remaining_md_1150_dataset.jsonl |   0
 ...00_sentences_cause_effect_rel_150421.jsonl |   0
 .../prodigy_temp_deployment/Dockerfile        |   0
 .../workshop_examples/Dockerfile              |   0
 .../workshop_examples/help.txt                |   0
 ...kshop_examples_simple_one_from_final.jsonl |   0
 .../workshop_examples/workshop.jsonl          |   0
 {src => cm_nlp}/utils/README.md               |   0
 cm_nlp/utils/__init__.py                      |   1 +
 {src => cm_nlp}/utils/analyze_jsonl.py        |   0
 .../utils/annotated_csv_to_jsonl.py           |   0
 cm_nlp/utils/database_download/__init__.py    |   1 +
 ...a80a0f12-c8ad-4c58-a458-27170af442da.jsonl |   0
 ...c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl |   0
 ...04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl |   0
 .../database_download/dataset_scraper.py      |   0
 .../database_download/prodigy_empty.json      |   0
 .../utils/diffbot_scraping_articles.py        |   0
 {src => cm_nlp}/utils/diversity_analysis.py   |   0
 .../utils}/pocket_extract.py                  |   0
 .../utils/process_extracted_text.py           |   0
 {src => cm_nlp}/utils/split_sentences.py      |   0
 .../BERT_Climate_Classifier.ipynb             |   0
 .../BERT_Pretraining.ipynb                    |   0
 .../BERT_Summarizer.ipynb                     |   0
 .../Climate_Casual_Sentence_Classifier.ipynb  |   0
 .../TF_to_Torch_conversion.ipynb              |   0
 src/utils/CSVUtils.ipynb                      |  39 ---
 src/utils/DiffbotUtils.ipynb                  |  16 -
 src/utils/JSONLUtils.ipynb                    |  16 -
 src/utils/PocketUtils.ipynb                   |  50 ----
 src/utils/SentenceUtils.ipynb                 |  16 -
 src/utils/diffbot_scraping_articles.ipynb     | 279 ------------------
 55 files changed, 3 insertions(+), 416 deletions(-)
 create mode 100644 cm_nlp/__init__.py
 rename {src => cm_nlp}/tools/README.md (100%)
 rename src/tools/preprocess_data/data_pipeline.py => cm_nlp/tools/ontology_graph/__init__.py (100%)
 rename {src => cm_nlp}/tools/ontology_graph/climate_mind_OWL_to_networkx.py (100%)
 rename {src => cm_nlp}/tools/ontology_graph/install_graphviz.py (100%)
 rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle (100%)
 rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json (100%)
 rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/output.csv (100%)
 rename {src => cm_nlp}/tools/preprocess_data/coreNLP/coreNLP_python.py (100%)
 rename {src => cm_nlp}/tools/preprocess_data/coref_resolution.py (100%)
 create mode 100644 cm_nlp/tools/preprocess_data/data_pipeline.py
 rename {src => cm_nlp}/tools/preprocess_data/example_data.jsonl (100%)
 rename {src => cm_nlp}/tools/preprocess_data/helper_functions.py (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/Dockerfile (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/help.txt (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/Dockerfile (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/data.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/prodigy_empty.json (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_app2/Dockerfile (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/Dockerfile (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/help.txt (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl (100%)
 rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/workshop.jsonl (100%)
 rename {src => cm_nlp}/utils/README.md (100%)
 create mode 100644 cm_nlp/utils/__init__.py
 rename {src => cm_nlp}/utils/analyze_jsonl.py (100%)
 rename {src => cm_nlp}/utils/annotated_csv_to_jsonl.py (100%)
 create mode 100644 cm_nlp/utils/database_download/__init__.py
 rename {src => cm_nlp}/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl (100%)
 rename {src => cm_nlp}/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl (100%)
 rename {src => cm_nlp}/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl (100%)
 rename {src => cm_nlp}/utils/database_download/dataset_scraper.py (100%)
 rename {src => cm_nlp}/utils/database_download/prodigy_empty.json (100%)
 rename {src => cm_nlp}/utils/diffbot_scraping_articles.py (100%)
 rename {src => cm_nlp}/utils/diversity_analysis.py (100%)
 rename {src/utils/to_delete => cm_nlp/utils}/pocket_extract.py (100%)
 rename {src => cm_nlp}/utils/process_extracted_text.py (100%)
 rename {src => cm_nlp}/utils/split_sentences.py (100%)
 rename {src/model_dev => model_dev}/BERT_Climate_Classifier.ipynb (100%)
 rename {src/model_dev => model_dev}/BERT_Pretraining.ipynb (100%)
 rename {src/model_dev => model_dev}/BERT_Summarizer.ipynb (100%)
 rename {src/model_dev => model_dev}/Climate_Casual_Sentence_Classifier.ipynb (100%)
 rename {src/model_dev => model_dev}/TF_to_Torch_conversion.ipynb (100%)
 delete mode 100644 src/utils/CSVUtils.ipynb
 delete mode 100644 src/utils/DiffbotUtils.ipynb
 delete mode 100644 src/utils/JSONLUtils.ipynb
 delete mode 100644 src/utils/PocketUtils.ipynb
 delete mode 100644 src/utils/SentenceUtils.ipynb
 delete mode 100644 src/utils/diffbot_scraping_articles.ipynb

diff --git a/cm_nlp/__init__.py b/cm_nlp/__init__.py
new file mode 100644
index 0000000..d310fdd
--- /dev/null
+++ b/cm_nlp/__init__.py
@@ -0,0 +1 @@
+# package
\ No newline at end of file
diff --git a/src/tools/README.md b/cm_nlp/tools/README.md
similarity index 100%
rename from src/tools/README.md
rename to cm_nlp/tools/README.md
diff --git a/src/tools/preprocess_data/data_pipeline.py b/cm_nlp/tools/ontology_graph/__init__.py
similarity index 100%
rename from src/tools/preprocess_data/data_pipeline.py
rename to cm_nlp/tools/ontology_graph/__init__.py
diff --git a/src/tools/ontology_graph/climate_mind_OWL_to_networkx.py b/cm_nlp/tools/ontology_graph/climate_mind_OWL_to_networkx.py
similarity index 100%
rename from src/tools/ontology_graph/climate_mind_OWL_to_networkx.py
rename to cm_nlp/tools/ontology_graph/climate_mind_OWL_to_networkx.py
diff --git a/src/tools/ontology_graph/install_graphviz.py b/cm_nlp/tools/ontology_graph/install_graphviz.py
similarity index 100%
rename from src/tools/ontology_graph/install_graphviz.py
rename to cm_nlp/tools/ontology_graph/install_graphviz.py
diff --git a/src/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle b/cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle
similarity index 100%
rename from src/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle
rename to cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle
diff --git a/src/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json b/cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json
similarity index 100%
rename from src/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json
rename to cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json
diff --git a/src/tools/ontology_graph/tests_output_folder/output.csv b/cm_nlp/tools/ontology_graph/tests_output_folder/output.csv
similarity index 100%
rename from src/tools/ontology_graph/tests_output_folder/output.csv
rename to cm_nlp/tools/ontology_graph/tests_output_folder/output.csv
diff --git a/src/tools/preprocess_data/coreNLP/coreNLP_python.py b/cm_nlp/tools/preprocess_data/coreNLP/coreNLP_python.py
similarity index 100%
rename from src/tools/preprocess_data/coreNLP/coreNLP_python.py
rename to cm_nlp/tools/preprocess_data/coreNLP/coreNLP_python.py
diff --git a/src/tools/preprocess_data/coref_resolution.py b/cm_nlp/tools/preprocess_data/coref_resolution.py
similarity index 100%
rename from src/tools/preprocess_data/coref_resolution.py
rename to cm_nlp/tools/preprocess_data/coref_resolution.py
diff --git a/cm_nlp/tools/preprocess_data/data_pipeline.py b/cm_nlp/tools/preprocess_data/data_pipeline.py
new file mode 100644
index 0000000..e69de29
diff --git a/src/tools/preprocess_data/example_data.jsonl b/cm_nlp/tools/preprocess_data/example_data.jsonl
similarity index 100%
rename from src/tools/preprocess_data/example_data.jsonl
rename to cm_nlp/tools/preprocess_data/example_data.jsonl
diff --git a/src/tools/preprocess_data/helper_functions.py b/cm_nlp/tools/preprocess_data/helper_functions.py
similarity index 100%
rename from src/tools/preprocess_data/helper_functions.py
rename to cm_nlp/tools/preprocess_data/helper_functions.py
diff --git a/src/tools/prodigy_deployments/entity_checkin_one/Dockerfile b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/Dockerfile
similarity index 100%
rename from src/tools/prodigy_deployments/entity_checkin_one/Dockerfile
rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/Dockerfile
diff --git a/src/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl
rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl
diff --git a/src/tools/prodigy_deployments/entity_checkin_one/help.txt b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/help.txt
similarity index 100%
rename from src/tools/prodigy_deployments/entity_checkin_one/help.txt
rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/help.txt
diff --git a/src/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl
rename to cm_nlp/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl
diff --git a/src/tools/prodigy_deployments/prodigy/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy/Dockerfile
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy/Dockerfile
rename to cm_nlp/tools/prodigy_deployments/prodigy/Dockerfile
diff --git a/src/tools/prodigy_deployments/prodigy/data.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/data.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy/data.jsonl
rename to cm_nlp/tools/prodigy_deployments/prodigy/data.jsonl
diff --git a/src/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl
rename to cm_nlp/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl
diff --git a/src/tools/prodigy_deployments/prodigy/prodigy_empty.json b/cm_nlp/tools/prodigy_deployments/prodigy/prodigy_empty.json
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy/prodigy_empty.json
rename to cm_nlp/tools/prodigy_deployments/prodigy/prodigy_empty.json
diff --git a/src/tools/prodigy_deployments/prodigy_app2/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy_app2/Dockerfile
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy_app2/Dockerfile
rename to cm_nlp/tools/prodigy_deployments/prodigy_app2/Dockerfile
diff --git a/src/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl
rename to cm_nlp/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl
diff --git a/src/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl
rename to cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl
diff --git a/src/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile
similarity index 100%
rename from src/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile
rename to cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile
diff --git a/src/tools/prodigy_deployments/workshop_examples/Dockerfile b/cm_nlp/tools/prodigy_deployments/workshop_examples/Dockerfile
similarity index 100%
rename from src/tools/prodigy_deployments/workshop_examples/Dockerfile
rename to cm_nlp/tools/prodigy_deployments/workshop_examples/Dockerfile
diff --git a/src/tools/prodigy_deployments/workshop_examples/help.txt b/cm_nlp/tools/prodigy_deployments/workshop_examples/help.txt
similarity index 100%
rename from src/tools/prodigy_deployments/workshop_examples/help.txt
rename to cm_nlp/tools/prodigy_deployments/workshop_examples/help.txt
diff --git a/src/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl b/cm_nlp/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl
rename to cm_nlp/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl
diff --git a/src/tools/prodigy_deployments/workshop_examples/workshop.jsonl b/cm_nlp/tools/prodigy_deployments/workshop_examples/workshop.jsonl
similarity index 100%
rename from src/tools/prodigy_deployments/workshop_examples/workshop.jsonl
rename to cm_nlp/tools/prodigy_deployments/workshop_examples/workshop.jsonl
diff --git a/src/utils/README.md b/cm_nlp/utils/README.md
similarity index 100%
rename from src/utils/README.md
rename to cm_nlp/utils/README.md
diff --git a/cm_nlp/utils/__init__.py b/cm_nlp/utils/__init__.py
new file mode 100644
index 0000000..d310fdd
--- /dev/null
+++ b/cm_nlp/utils/__init__.py
@@ -0,0 +1 @@
+# package
\ No newline at end of file
diff --git a/src/utils/analyze_jsonl.py b/cm_nlp/utils/analyze_jsonl.py
similarity index 100%
rename from src/utils/analyze_jsonl.py
rename to cm_nlp/utils/analyze_jsonl.py
diff --git a/src/utils/annotated_csv_to_jsonl.py b/cm_nlp/utils/annotated_csv_to_jsonl.py
similarity index 100%
rename from src/utils/annotated_csv_to_jsonl.py
rename to cm_nlp/utils/annotated_csv_to_jsonl.py
diff --git a/cm_nlp/utils/database_download/__init__.py b/cm_nlp/utils/database_download/__init__.py
new file mode 100644
index 0000000..d310fdd
--- /dev/null
+++ b/cm_nlp/utils/database_download/__init__.py
@@ -0,0 +1 @@
+# package
\ No newline at end of file
diff --git a/src/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl b/cm_nlp/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl
similarity index 100%
rename from src/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl
rename to cm_nlp/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl
diff --git a/src/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl b/cm_nlp/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl
similarity index 100%
rename from src/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl
rename to cm_nlp/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl
diff --git a/src/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl b/cm_nlp/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl
similarity index 100%
rename from src/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl
rename to cm_nlp/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl
diff --git a/src/utils/database_download/dataset_scraper.py b/cm_nlp/utils/database_download/dataset_scraper.py
similarity index 100%
rename from src/utils/database_download/dataset_scraper.py
rename to cm_nlp/utils/database_download/dataset_scraper.py
diff --git a/src/utils/database_download/prodigy_empty.json b/cm_nlp/utils/database_download/prodigy_empty.json
similarity index 100%
rename from src/utils/database_download/prodigy_empty.json
rename to cm_nlp/utils/database_download/prodigy_empty.json
diff --git a/src/utils/diffbot_scraping_articles.py b/cm_nlp/utils/diffbot_scraping_articles.py
similarity index 100%
rename from src/utils/diffbot_scraping_articles.py
rename to cm_nlp/utils/diffbot_scraping_articles.py
diff --git a/src/utils/diversity_analysis.py b/cm_nlp/utils/diversity_analysis.py
similarity index 100%
rename from src/utils/diversity_analysis.py
rename to cm_nlp/utils/diversity_analysis.py
diff --git a/src/utils/to_delete/pocket_extract.py b/cm_nlp/utils/pocket_extract.py
similarity index 100%
rename from src/utils/to_delete/pocket_extract.py
rename to cm_nlp/utils/pocket_extract.py
diff --git a/src/utils/process_extracted_text.py b/cm_nlp/utils/process_extracted_text.py
similarity index 100%
rename from src/utils/process_extracted_text.py
rename to cm_nlp/utils/process_extracted_text.py
diff --git a/src/utils/split_sentences.py b/cm_nlp/utils/split_sentences.py
similarity index 100%
rename from src/utils/split_sentences.py
rename to cm_nlp/utils/split_sentences.py
diff --git a/src/model_dev/BERT_Climate_Classifier.ipynb b/model_dev/BERT_Climate_Classifier.ipynb
similarity index 100%
rename from src/model_dev/BERT_Climate_Classifier.ipynb
rename to model_dev/BERT_Climate_Classifier.ipynb
diff --git a/src/model_dev/BERT_Pretraining.ipynb b/model_dev/BERT_Pretraining.ipynb
similarity index 100%
rename from src/model_dev/BERT_Pretraining.ipynb
rename to model_dev/BERT_Pretraining.ipynb
diff --git a/src/model_dev/BERT_Summarizer.ipynb b/model_dev/BERT_Summarizer.ipynb
similarity index 100%
rename from src/model_dev/BERT_Summarizer.ipynb
rename to model_dev/BERT_Summarizer.ipynb
diff --git a/src/model_dev/Climate_Casual_Sentence_Classifier.ipynb b/model_dev/Climate_Casual_Sentence_Classifier.ipynb
similarity index 100%
rename from src/model_dev/Climate_Casual_Sentence_Classifier.ipynb
rename to model_dev/Climate_Casual_Sentence_Classifier.ipynb
diff --git a/src/model_dev/TF_to_Torch_conversion.ipynb b/model_dev/TF_to_Torch_conversion.ipynb
similarity index 100%
rename from src/model_dev/TF_to_Torch_conversion.ipynb
rename to model_dev/TF_to_Torch_conversion.ipynb
diff --git a/src/utils/CSVUtils.ipynb b/src/utils/CSVUtils.ipynb
deleted file mode 100644
index a6c5fbe..0000000
--- a/src/utils/CSVUtils.ipynb
+++ /dev/null
@@ -1,39 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import json\n",
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "def csv_to_jsonl(input_file, output_file):\n",
-    "    \"\"\" Turns a CSV into a JSONL file\"\"\"\n",
-    "    sentences = pd.read_csv(input_file, sep = \",\")\n",
-    "    sentence_list = sentences[sentences.Pred == 1].text.tolist()\n",
-    "\n",
-    "    text_list = []\n",
-    "    for sentence in sentence_list:\n",
-    "        text_list.append({\"text\": sentence})\n",
-    "\n",
-    "\n",
-    "    with open(output_file, 'w') as f:\n",
-    "        for item in text_list:\n",
-    "            f.write(json.dumps(item) + \"\\n\")\n"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/utils/DiffbotUtils.ipynb b/src/utils/DiffbotUtils.ipynb
deleted file mode 100644
index e7e0067..0000000
--- a/src/utils/DiffbotUtils.ipynb
+++ /dev/null
@@ -1,16 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# hide"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/utils/JSONLUtils.ipynb b/src/utils/JSONLUtils.ipynb
deleted file mode 100644
index a97307c..0000000
--- a/src/utils/JSONLUtils.ipynb
+++ /dev/null
@@ -1,16 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#hide"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/utils/PocketUtils.ipynb b/src/utils/PocketUtils.ipynb
deleted file mode 100644
index 9273d83..0000000
--- a/src/utils/PocketUtils.ipynb
+++ /dev/null
@@ -1,50 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export\n",
-    "def url(endpoint):\n",
-    "    \"\"\"Returns the Pocket URL endpoint\"\"\"\n",
-    "    return \"https://getpocket.com/v3/\" + endpoint"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#export \n",
-    "def authorize(consumer_key, headers = {\"X-Accept\": \"application/json\"}):\n",
-    "    \"\"\" Creates an authorization token to access our Pocket article database.\"\"\"\n",
-    "    redirect_uri = \"https://google.com\"\n",
-    "    req = requests.post(url(\"oauth/request\"), data={\n",
-    "        \"consumer_key\": consumer_key,\n",
-    "        \"redirect_uri\": redirect_uri\n",
-    "    }, headers=headers)\n",
-    "    request_token = req.json()[\"code\"]\n",
-    "    print(\"<<< After signing in at link below press ENTER >>>\")\n",
-    "    print(f\"https://getpocket.com/auth/authorize?request_token={request_token}&redirect_uri={redirect_uri}\")\n",
-    "    input() # super hacky way of making it wait\n",
-    "    # After authenticating the request_token ask for an access token\n",
-    "    res = requests.post(url(\"oauth/authorize\"), data={\n",
-    "        \"consumer_key\": consumer_key,\n",
-    "        \"code\": request_token,\n",
-    "    }, headers=headers)\n",
-    "\n",
-    "    if not res.ok:\n",
-    "        print(\"ERROR: Authorization failed:\", req.text)\n",
-    "        sys.exit(1)\n",
-    "\n",
-    "    return res.json()[\"access_token\"]"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/utils/SentenceUtils.ipynb b/src/utils/SentenceUtils.ipynb
deleted file mode 100644
index a97307c..0000000
--- a/src/utils/SentenceUtils.ipynb
+++ /dev/null
@@ -1,16 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#hide"
-   ]
-  }
- ],
- "metadata": {},
- "nbformat": 4,
- "nbformat_minor": 2
-}
diff --git a/src/utils/diffbot_scraping_articles.ipynb b/src/utils/diffbot_scraping_articles.ipynb
deleted file mode 100644
index d804a53..0000000
--- a/src/utils/diffbot_scraping_articles.ipynb
+++ /dev/null
@@ -1,279 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# Original Author: Mukut Mukherjee"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pandas as pd"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data=pd.read_csv('cm_pocket_export_23-11-2020.csv',index_col=0)\n",
-    "data.info()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution_index=[]\n",
-    "for index,row in data.iterrows():\n",
-    "    tags=row['tags']\n",
-    "    if 'cause'in tags:\n",
-    "        solution_index.append(index)\n",
-    "    elif 'causes' in tags:\n",
-    "        solution_index.append(index)\n",
-    "    elif 'Cause' in tags:\n",
-    "        solution_index.append(index)\n",
-    "    elif 'Causes' in tags:\n",
-    "        solution_index.append(index)\n",
-    "    elif 'caused' in tags:\n",
-    "        solution_index.append(index)\n",
-    "    elif 'causing' in tags:\n",
-    "        solution_index.append(index)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution=data.loc[solution_index]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(solution)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#when scraping all articles\n",
-    "solution = data"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fb_string='https://www.facebook.com/'\n",
-    "fb_solution_index=[]\n",
-    "for index,row in solution.iterrows():\n",
-    "    if fb_string in row['given_url']:\n",
-    "        fb_solution_index.append(index)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution_wo_fb=solution.drop(index=fb_solution_index)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(solution_wo_fb)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "solution_wo_fb.to_csv('pocket_11_july_causes.csv')"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "￼\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import requests "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "len(articles)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "token='8e24add53f9f80fed9598eab3e1356f6'\n",
-    "URL='https://api.diffbot.com/v3/article'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import time\n",
-    "from tqdm import tqdm"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "#data_to_store=[]\n",
-    "#articles=solution_wo_fb['resolved_url'].values.tolist()\n",
-    "#arricles_processed=[]\n",
-    "\n",
-    "for x in tqdm(range(2206, 3084)):\n",
-    "    PARAMS = {'token':token,'url':articles[x]} \n",
-    "    r = requests.get(url = URL, params = PARAMS) \n",
-    "    data = r.json()\n",
-    "    data_to_store.append(data)\n",
-    "    arricles_processed.append(articles[x])\n",
-    "  "
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_to_store[2204]['objects'][0]['text']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "arricles_processed"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data.keys()"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_to_store[4]['objects'][0]['images'][0]['url']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_to_store[4]['request']['pageUrl']"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "data_to_store[4]"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "import pickle"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "pickle.dump( data_to_store, open( \"all_pocket.p\", \"wb\" ) )"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "index error: 2205"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 4
-}

From a8b6ad7eba6f847844a4bf26db9a09cb35b63990 Mon Sep 17 00:00:00 2001
From: Anthony Hevia <anthonyhevia@microsoft.com>
Date: Sun, 17 Oct 2021 13:49:22 -0700
Subject: [PATCH 2/2] forgot a file

---
 cm_nlp/tools/ontology_graph/__init__.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/cm_nlp/tools/ontology_graph/__init__.py b/cm_nlp/tools/ontology_graph/__init__.py
index e69de29..d310fdd 100644
--- a/cm_nlp/tools/ontology_graph/__init__.py
+++ b/cm_nlp/tools/ontology_graph/__init__.py
@@ -0,0 +1 @@
+# package
\ No newline at end of file