From d9386bcf7136b21970de92ff6e5839997353dcc8 Mon Sep 17 00:00:00 2001 From: Anthony Hevia Date: Sun, 17 Oct 2021 13:49:07 -0700 Subject: [PATCH 1/2] a cleaner package refactor --- cm_nlp/__init__.py | 1 + {src => cm_nlp}/tools/README.md | 0 .../tools/ontology_graph/__init__.py | 0 .../climate_mind_OWL_to_networkx.py | 0 .../tools/ontology_graph/install_graphviz.py | 0 .../Climate_Mind_DiGraph.gpickle | Bin .../Climate_Mind_Digraph_Test_Ont.json | 0 .../tests_output_folder/output.csv | 0 .../preprocess_data/coreNLP/coreNLP_python.py | 0 .../tools/preprocess_data/coref_resolution.py | 0 cm_nlp/tools/preprocess_data/data_pipeline.py | 0 .../tools/preprocess_data/example_data.jsonl | 0 .../tools/preprocess_data/helper_functions.py | 0 .../entity_checkin_one/Dockerfile | 0 .../entity_checkin_one/checkin_one.jsonl | 0 .../entity_checkin_one/help.txt | 0 ...ause_effect_prediction_pos_sentences.jsonl | 0 .../prodigy_deployments/prodigy/Dockerfile | 0 .../prodigy_deployments/prodigy/data.jsonl | 0 .../data_split_on_sentences_latest.jsonl | 0 .../prodigy/prodigy_empty.json | 0 .../prodigy_app2/Dockerfile | 0 ...sification_remaining_md_1150_dataset.jsonl | 0 ...00_sentences_cause_effect_rel_150421.jsonl | 0 .../prodigy_temp_deployment/Dockerfile | 0 .../workshop_examples/Dockerfile | 0 .../workshop_examples/help.txt | 0 ...kshop_examples_simple_one_from_final.jsonl | 0 .../workshop_examples/workshop.jsonl | 0 {src => cm_nlp}/utils/README.md | 0 cm_nlp/utils/__init__.py | 1 + {src => cm_nlp}/utils/analyze_jsonl.py | 0 .../utils/annotated_csv_to_jsonl.py | 0 cm_nlp/utils/database_download/__init__.py | 1 + ...a80a0f12-c8ad-4c58-a458-27170af442da.jsonl | 0 ...c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl | 0 ...04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl | 0 .../database_download/dataset_scraper.py | 0 .../database_download/prodigy_empty.json | 0 .../utils/diffbot_scraping_articles.py | 0 {src => cm_nlp}/utils/diversity_analysis.py | 0 .../utils}/pocket_extract.py | 0 .../utils/process_extracted_text.py | 0 {src => cm_nlp}/utils/split_sentences.py | 0 .../BERT_Climate_Classifier.ipynb | 0 .../BERT_Pretraining.ipynb | 0 .../BERT_Summarizer.ipynb | 0 .../Climate_Casual_Sentence_Classifier.ipynb | 0 .../TF_to_Torch_conversion.ipynb | 0 src/utils/CSVUtils.ipynb | 39 --- src/utils/DiffbotUtils.ipynb | 16 - src/utils/JSONLUtils.ipynb | 16 - src/utils/PocketUtils.ipynb | 50 ---- src/utils/SentenceUtils.ipynb | 16 - src/utils/diffbot_scraping_articles.ipynb | 279 ------------------ 55 files changed, 3 insertions(+), 416 deletions(-) create mode 100644 cm_nlp/__init__.py rename {src => cm_nlp}/tools/README.md (100%) rename src/tools/preprocess_data/data_pipeline.py => cm_nlp/tools/ontology_graph/__init__.py (100%) rename {src => cm_nlp}/tools/ontology_graph/climate_mind_OWL_to_networkx.py (100%) rename {src => cm_nlp}/tools/ontology_graph/install_graphviz.py (100%) rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle (100%) rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json (100%) rename {src => cm_nlp}/tools/ontology_graph/tests_output_folder/output.csv (100%) rename {src => cm_nlp}/tools/preprocess_data/coreNLP/coreNLP_python.py (100%) rename {src => cm_nlp}/tools/preprocess_data/coref_resolution.py (100%) create mode 100644 cm_nlp/tools/preprocess_data/data_pipeline.py rename {src => cm_nlp}/tools/preprocess_data/example_data.jsonl (100%) rename {src => cm_nlp}/tools/preprocess_data/helper_functions.py (100%) rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/Dockerfile (100%) rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/entity_checkin_one/help.txt (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/Dockerfile (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/data.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy/prodigy_empty.json (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_app2/Dockerfile (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile (100%) rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/Dockerfile (100%) rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/help.txt (100%) rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl (100%) rename {src => cm_nlp}/tools/prodigy_deployments/workshop_examples/workshop.jsonl (100%) rename {src => cm_nlp}/utils/README.md (100%) create mode 100644 cm_nlp/utils/__init__.py rename {src => cm_nlp}/utils/analyze_jsonl.py (100%) rename {src => cm_nlp}/utils/annotated_csv_to_jsonl.py (100%) create mode 100644 cm_nlp/utils/database_download/__init__.py rename {src => cm_nlp}/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl (100%) rename {src => cm_nlp}/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl (100%) rename {src => cm_nlp}/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl (100%) rename {src => cm_nlp}/utils/database_download/dataset_scraper.py (100%) rename {src => cm_nlp}/utils/database_download/prodigy_empty.json (100%) rename {src => cm_nlp}/utils/diffbot_scraping_articles.py (100%) rename {src => cm_nlp}/utils/diversity_analysis.py (100%) rename {src/utils/to_delete => cm_nlp/utils}/pocket_extract.py (100%) rename {src => cm_nlp}/utils/process_extracted_text.py (100%) rename {src => cm_nlp}/utils/split_sentences.py (100%) rename {src/model_dev => model_dev}/BERT_Climate_Classifier.ipynb (100%) rename {src/model_dev => model_dev}/BERT_Pretraining.ipynb (100%) rename {src/model_dev => model_dev}/BERT_Summarizer.ipynb (100%) rename {src/model_dev => model_dev}/Climate_Casual_Sentence_Classifier.ipynb (100%) rename {src/model_dev => model_dev}/TF_to_Torch_conversion.ipynb (100%) delete mode 100644 src/utils/CSVUtils.ipynb delete mode 100644 src/utils/DiffbotUtils.ipynb delete mode 100644 src/utils/JSONLUtils.ipynb delete mode 100644 src/utils/PocketUtils.ipynb delete mode 100644 src/utils/SentenceUtils.ipynb delete mode 100644 src/utils/diffbot_scraping_articles.ipynb diff --git a/cm_nlp/__init__.py b/cm_nlp/__init__.py new file mode 100644 index 0000000..d310fdd --- /dev/null +++ b/cm_nlp/__init__.py @@ -0,0 +1 @@ +# package \ No newline at end of file diff --git a/src/tools/README.md b/cm_nlp/tools/README.md similarity index 100% rename from src/tools/README.md rename to cm_nlp/tools/README.md diff --git a/src/tools/preprocess_data/data_pipeline.py b/cm_nlp/tools/ontology_graph/__init__.py similarity index 100% rename from src/tools/preprocess_data/data_pipeline.py rename to cm_nlp/tools/ontology_graph/__init__.py diff --git a/src/tools/ontology_graph/climate_mind_OWL_to_networkx.py b/cm_nlp/tools/ontology_graph/climate_mind_OWL_to_networkx.py similarity index 100% rename from src/tools/ontology_graph/climate_mind_OWL_to_networkx.py rename to cm_nlp/tools/ontology_graph/climate_mind_OWL_to_networkx.py diff --git a/src/tools/ontology_graph/install_graphviz.py b/cm_nlp/tools/ontology_graph/install_graphviz.py similarity index 100% rename from src/tools/ontology_graph/install_graphviz.py rename to cm_nlp/tools/ontology_graph/install_graphviz.py diff --git a/src/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle b/cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle similarity index 100% rename from src/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle rename to cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_DiGraph.gpickle diff --git a/src/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json b/cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json similarity index 100% rename from src/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json rename to cm_nlp/tools/ontology_graph/tests_output_folder/Climate_Mind_Digraph_Test_Ont.json diff --git a/src/tools/ontology_graph/tests_output_folder/output.csv b/cm_nlp/tools/ontology_graph/tests_output_folder/output.csv similarity index 100% rename from src/tools/ontology_graph/tests_output_folder/output.csv rename to cm_nlp/tools/ontology_graph/tests_output_folder/output.csv diff --git a/src/tools/preprocess_data/coreNLP/coreNLP_python.py b/cm_nlp/tools/preprocess_data/coreNLP/coreNLP_python.py similarity index 100% rename from src/tools/preprocess_data/coreNLP/coreNLP_python.py rename to cm_nlp/tools/preprocess_data/coreNLP/coreNLP_python.py diff --git a/src/tools/preprocess_data/coref_resolution.py b/cm_nlp/tools/preprocess_data/coref_resolution.py similarity index 100% rename from src/tools/preprocess_data/coref_resolution.py rename to cm_nlp/tools/preprocess_data/coref_resolution.py diff --git a/cm_nlp/tools/preprocess_data/data_pipeline.py b/cm_nlp/tools/preprocess_data/data_pipeline.py new file mode 100644 index 0000000..e69de29 diff --git a/src/tools/preprocess_data/example_data.jsonl b/cm_nlp/tools/preprocess_data/example_data.jsonl similarity index 100% rename from src/tools/preprocess_data/example_data.jsonl rename to cm_nlp/tools/preprocess_data/example_data.jsonl diff --git a/src/tools/preprocess_data/helper_functions.py b/cm_nlp/tools/preprocess_data/helper_functions.py similarity index 100% rename from src/tools/preprocess_data/helper_functions.py rename to cm_nlp/tools/preprocess_data/helper_functions.py diff --git a/src/tools/prodigy_deployments/entity_checkin_one/Dockerfile b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/Dockerfile similarity index 100% rename from src/tools/prodigy_deployments/entity_checkin_one/Dockerfile rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/Dockerfile diff --git a/src/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl similarity index 100% rename from src/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/checkin_one.jsonl diff --git a/src/tools/prodigy_deployments/entity_checkin_one/help.txt b/cm_nlp/tools/prodigy_deployments/entity_checkin_one/help.txt similarity index 100% rename from src/tools/prodigy_deployments/entity_checkin_one/help.txt rename to cm_nlp/tools/prodigy_deployments/entity_checkin_one/help.txt diff --git a/src/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl similarity index 100% rename from src/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl rename to cm_nlp/tools/prodigy_deployments/prodigy/ClimateBERT_cause_effect_prediction_pos_sentences.jsonl diff --git a/src/tools/prodigy_deployments/prodigy/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy/Dockerfile similarity index 100% rename from src/tools/prodigy_deployments/prodigy/Dockerfile rename to cm_nlp/tools/prodigy_deployments/prodigy/Dockerfile diff --git a/src/tools/prodigy_deployments/prodigy/data.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/data.jsonl similarity index 100% rename from src/tools/prodigy_deployments/prodigy/data.jsonl rename to cm_nlp/tools/prodigy_deployments/prodigy/data.jsonl diff --git a/src/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl similarity index 100% rename from src/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl rename to cm_nlp/tools/prodigy_deployments/prodigy/data_split_on_sentences_latest.jsonl diff --git a/src/tools/prodigy_deployments/prodigy/prodigy_empty.json b/cm_nlp/tools/prodigy_deployments/prodigy/prodigy_empty.json similarity index 100% rename from src/tools/prodigy_deployments/prodigy/prodigy_empty.json rename to cm_nlp/tools/prodigy_deployments/prodigy/prodigy_empty.json diff --git a/src/tools/prodigy_deployments/prodigy_app2/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy_app2/Dockerfile similarity index 100% rename from src/tools/prodigy_deployments/prodigy_app2/Dockerfile rename to cm_nlp/tools/prodigy_deployments/prodigy_app2/Dockerfile diff --git a/src/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl similarity index 100% rename from src/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl rename to cm_nlp/tools/prodigy_deployments/prodigy_app2/causal_classification_remaining_md_1150_dataset.jsonl diff --git a/src/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl b/cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl similarity index 100% rename from src/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl rename to cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/200_sentences_cause_effect_rel_150421.jsonl diff --git a/src/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile b/cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile similarity index 100% rename from src/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile rename to cm_nlp/tools/prodigy_deployments/prodigy_temp_deployment/Dockerfile diff --git a/src/tools/prodigy_deployments/workshop_examples/Dockerfile b/cm_nlp/tools/prodigy_deployments/workshop_examples/Dockerfile similarity index 100% rename from src/tools/prodigy_deployments/workshop_examples/Dockerfile rename to cm_nlp/tools/prodigy_deployments/workshop_examples/Dockerfile diff --git a/src/tools/prodigy_deployments/workshop_examples/help.txt b/cm_nlp/tools/prodigy_deployments/workshop_examples/help.txt similarity index 100% rename from src/tools/prodigy_deployments/workshop_examples/help.txt rename to cm_nlp/tools/prodigy_deployments/workshop_examples/help.txt diff --git a/src/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl b/cm_nlp/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl similarity index 100% rename from src/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl rename to cm_nlp/tools/prodigy_deployments/workshop_examples/ordered_workshop_examples_simple_one_from_final.jsonl diff --git a/src/tools/prodigy_deployments/workshop_examples/workshop.jsonl b/cm_nlp/tools/prodigy_deployments/workshop_examples/workshop.jsonl similarity index 100% rename from src/tools/prodigy_deployments/workshop_examples/workshop.jsonl rename to cm_nlp/tools/prodigy_deployments/workshop_examples/workshop.jsonl diff --git a/src/utils/README.md b/cm_nlp/utils/README.md similarity index 100% rename from src/utils/README.md rename to cm_nlp/utils/README.md diff --git a/cm_nlp/utils/__init__.py b/cm_nlp/utils/__init__.py new file mode 100644 index 0000000..d310fdd --- /dev/null +++ b/cm_nlp/utils/__init__.py @@ -0,0 +1 @@ +# package \ No newline at end of file diff --git a/src/utils/analyze_jsonl.py b/cm_nlp/utils/analyze_jsonl.py similarity index 100% rename from src/utils/analyze_jsonl.py rename to cm_nlp/utils/analyze_jsonl.py diff --git a/src/utils/annotated_csv_to_jsonl.py b/cm_nlp/utils/annotated_csv_to_jsonl.py similarity index 100% rename from src/utils/annotated_csv_to_jsonl.py rename to cm_nlp/utils/annotated_csv_to_jsonl.py diff --git a/cm_nlp/utils/database_download/__init__.py b/cm_nlp/utils/database_download/__init__.py new file mode 100644 index 0000000..d310fdd --- /dev/null +++ b/cm_nlp/utils/database_download/__init__.py @@ -0,0 +1 @@ +# package \ No newline at end of file diff --git a/src/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl b/cm_nlp/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl similarity index 100% rename from src/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl rename to cm_nlp/utils/database_download/cm-label-eval_download.a80a0f12-c8ad-4c58-a458-27170af442da.jsonl diff --git a/src/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl b/cm_nlp/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl similarity index 100% rename from src/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl rename to cm_nlp/utils/database_download/cm_cause_effect_rel_download.c1e8d0c0-95d1-4ca1-9774-f712740939a3.jsonl diff --git a/src/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl b/cm_nlp/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl similarity index 100% rename from src/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl rename to cm_nlp/utils/database_download/dataset_downloads/cm_cause_effect_rel_download.04bf02d9-d2cb-4c24-9775-f0bd06e0ba7f.jsonl diff --git a/src/utils/database_download/dataset_scraper.py b/cm_nlp/utils/database_download/dataset_scraper.py similarity index 100% rename from src/utils/database_download/dataset_scraper.py rename to cm_nlp/utils/database_download/dataset_scraper.py diff --git a/src/utils/database_download/prodigy_empty.json b/cm_nlp/utils/database_download/prodigy_empty.json similarity index 100% rename from src/utils/database_download/prodigy_empty.json rename to cm_nlp/utils/database_download/prodigy_empty.json diff --git a/src/utils/diffbot_scraping_articles.py b/cm_nlp/utils/diffbot_scraping_articles.py similarity index 100% rename from src/utils/diffbot_scraping_articles.py rename to cm_nlp/utils/diffbot_scraping_articles.py diff --git a/src/utils/diversity_analysis.py b/cm_nlp/utils/diversity_analysis.py similarity index 100% rename from src/utils/diversity_analysis.py rename to cm_nlp/utils/diversity_analysis.py diff --git a/src/utils/to_delete/pocket_extract.py b/cm_nlp/utils/pocket_extract.py similarity index 100% rename from src/utils/to_delete/pocket_extract.py rename to cm_nlp/utils/pocket_extract.py diff --git a/src/utils/process_extracted_text.py b/cm_nlp/utils/process_extracted_text.py similarity index 100% rename from src/utils/process_extracted_text.py rename to cm_nlp/utils/process_extracted_text.py diff --git a/src/utils/split_sentences.py b/cm_nlp/utils/split_sentences.py similarity index 100% rename from src/utils/split_sentences.py rename to cm_nlp/utils/split_sentences.py diff --git a/src/model_dev/BERT_Climate_Classifier.ipynb b/model_dev/BERT_Climate_Classifier.ipynb similarity index 100% rename from src/model_dev/BERT_Climate_Classifier.ipynb rename to model_dev/BERT_Climate_Classifier.ipynb diff --git a/src/model_dev/BERT_Pretraining.ipynb b/model_dev/BERT_Pretraining.ipynb similarity index 100% rename from src/model_dev/BERT_Pretraining.ipynb rename to model_dev/BERT_Pretraining.ipynb diff --git a/src/model_dev/BERT_Summarizer.ipynb b/model_dev/BERT_Summarizer.ipynb similarity index 100% rename from src/model_dev/BERT_Summarizer.ipynb rename to model_dev/BERT_Summarizer.ipynb diff --git a/src/model_dev/Climate_Casual_Sentence_Classifier.ipynb b/model_dev/Climate_Casual_Sentence_Classifier.ipynb similarity index 100% rename from src/model_dev/Climate_Casual_Sentence_Classifier.ipynb rename to model_dev/Climate_Casual_Sentence_Classifier.ipynb diff --git a/src/model_dev/TF_to_Torch_conversion.ipynb b/model_dev/TF_to_Torch_conversion.ipynb similarity index 100% rename from src/model_dev/TF_to_Torch_conversion.ipynb rename to model_dev/TF_to_Torch_conversion.ipynb diff --git a/src/utils/CSVUtils.ipynb b/src/utils/CSVUtils.ipynb deleted file mode 100644 index a6c5fbe..0000000 --- a/src/utils/CSVUtils.ipynb +++ /dev/null @@ -1,39 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import json\n", - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "def csv_to_jsonl(input_file, output_file):\n", - " \"\"\" Turns a CSV into a JSONL file\"\"\"\n", - " sentences = pd.read_csv(input_file, sep = \",\")\n", - " sentence_list = sentences[sentences.Pred == 1].text.tolist()\n", - "\n", - " text_list = []\n", - " for sentence in sentence_list:\n", - " text_list.append({\"text\": sentence})\n", - "\n", - "\n", - " with open(output_file, 'w') as f:\n", - " for item in text_list:\n", - " f.write(json.dumps(item) + \"\\n\")\n" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/utils/DiffbotUtils.ipynb b/src/utils/DiffbotUtils.ipynb deleted file mode 100644 index e7e0067..0000000 --- a/src/utils/DiffbotUtils.ipynb +++ /dev/null @@ -1,16 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# hide" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/utils/JSONLUtils.ipynb b/src/utils/JSONLUtils.ipynb deleted file mode 100644 index a97307c..0000000 --- a/src/utils/JSONLUtils.ipynb +++ /dev/null @@ -1,16 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#hide" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/utils/PocketUtils.ipynb b/src/utils/PocketUtils.ipynb deleted file mode 100644 index 9273d83..0000000 --- a/src/utils/PocketUtils.ipynb +++ /dev/null @@ -1,50 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export\n", - "def url(endpoint):\n", - " \"\"\"Returns the Pocket URL endpoint\"\"\"\n", - " return \"https://getpocket.com/v3/\" + endpoint" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#export \n", - "def authorize(consumer_key, headers = {\"X-Accept\": \"application/json\"}):\n", - " \"\"\" Creates an authorization token to access our Pocket article database.\"\"\"\n", - " redirect_uri = \"https://google.com\"\n", - " req = requests.post(url(\"oauth/request\"), data={\n", - " \"consumer_key\": consumer_key,\n", - " \"redirect_uri\": redirect_uri\n", - " }, headers=headers)\n", - " request_token = req.json()[\"code\"]\n", - " print(\"<<< After signing in at link below press ENTER >>>\")\n", - " print(f\"https://getpocket.com/auth/authorize?request_token={request_token}&redirect_uri={redirect_uri}\")\n", - " input() # super hacky way of making it wait\n", - " # After authenticating the request_token ask for an access token\n", - " res = requests.post(url(\"oauth/authorize\"), data={\n", - " \"consumer_key\": consumer_key,\n", - " \"code\": request_token,\n", - " }, headers=headers)\n", - "\n", - " if not res.ok:\n", - " print(\"ERROR: Authorization failed:\", req.text)\n", - " sys.exit(1)\n", - "\n", - " return res.json()[\"access_token\"]" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/utils/SentenceUtils.ipynb b/src/utils/SentenceUtils.ipynb deleted file mode 100644 index a97307c..0000000 --- a/src/utils/SentenceUtils.ipynb +++ /dev/null @@ -1,16 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#hide" - ] - } - ], - "metadata": {}, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/src/utils/diffbot_scraping_articles.ipynb b/src/utils/diffbot_scraping_articles.ipynb deleted file mode 100644 index d804a53..0000000 --- a/src/utils/diffbot_scraping_articles.ipynb +++ /dev/null @@ -1,279 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Original Author: Mukut Mukherjee" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pandas as pd" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data=pd.read_csv('cm_pocket_export_23-11-2020.csv',index_col=0)\n", - "data.info()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "solution_index=[]\n", - "for index,row in data.iterrows():\n", - " tags=row['tags']\n", - " if 'cause'in tags:\n", - " solution_index.append(index)\n", - " elif 'causes' in tags:\n", - " solution_index.append(index)\n", - " elif 'Cause' in tags:\n", - " solution_index.append(index)\n", - " elif 'Causes' in tags:\n", - " solution_index.append(index)\n", - " elif 'caused' in tags:\n", - " solution_index.append(index)\n", - " elif 'causing' in tags:\n", - " solution_index.append(index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "solution=data.loc[solution_index]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(solution)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#when scraping all articles\n", - "solution = data" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fb_string='https://www.facebook.com/'\n", - "fb_solution_index=[]\n", - "for index,row in solution.iterrows():\n", - " if fb_string in row['given_url']:\n", - " fb_solution_index.append(index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "solution_wo_fb=solution.drop(index=fb_solution_index)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(solution_wo_fb)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "solution_wo_fb.to_csv('pocket_11_july_causes.csv')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import requests " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "len(articles)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "token='8e24add53f9f80fed9598eab3e1356f6'\n", - "URL='https://api.diffbot.com/v3/article'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "from tqdm import tqdm" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#data_to_store=[]\n", - "#articles=solution_wo_fb['resolved_url'].values.tolist()\n", - "#arricles_processed=[]\n", - "\n", - "for x in tqdm(range(2206, 3084)):\n", - " PARAMS = {'token':token,'url':articles[x]} \n", - " r = requests.get(url = URL, params = PARAMS) \n", - " data = r.json()\n", - " data_to_store.append(data)\n", - " arricles_processed.append(articles[x])\n", - " " - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_to_store[2204]['objects'][0]['text']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "arricles_processed" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data.keys()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_to_store[4]['objects'][0]['images'][0]['url']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_to_store[4]['request']['pageUrl']" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "data_to_store[4]" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "pickle.dump( data_to_store, open( \"all_pocket.p\", \"wb\" ) )" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "index error: 2205" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -} From a8b6ad7eba6f847844a4bf26db9a09cb35b63990 Mon Sep 17 00:00:00 2001 From: Anthony Hevia Date: Sun, 17 Oct 2021 13:49:22 -0700 Subject: [PATCH 2/2] forgot a file --- cm_nlp/tools/ontology_graph/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/cm_nlp/tools/ontology_graph/__init__.py b/cm_nlp/tools/ontology_graph/__init__.py index e69de29..d310fdd 100644 --- a/cm_nlp/tools/ontology_graph/__init__.py +++ b/cm_nlp/tools/ontology_graph/__init__.py @@ -0,0 +1 @@ +# package \ No newline at end of file