From df80836adaa126094f329f040f3049122efd1d1c Mon Sep 17 00:00:00 2001 From: hudssntao Date: Wed, 1 Nov 2023 01:35:24 -0400 Subject: [PATCH 1/5] Added pre-commit files with black, tested run commands which reformatted several files --- .github/workflows/pre-commit.yml | 17 +++++++++++++++++ .pre-commit-config.yaml | 5 +++++ 2 files changed, 22 insertions(+) create mode 100644 .github/workflows/pre-commit.yml create mode 100644 .pre-commit-config.yaml diff --git a/.github/workflows/pre-commit.yml b/.github/workflows/pre-commit.yml new file mode 100644 index 0000000..6e852cb --- /dev/null +++ b/.github/workflows/pre-commit.yml @@ -0,0 +1,17 @@ +name: pre-commit +on: + pull_request: + push: + branches: main +permissions: + contents: read +jobs: + pre-commit: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: "3.9" + - run: pip install pre-commit -r requirements.txt + - run: pre-commit run --all-files \ No newline at end of file diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..7ac9b1e --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,5 @@ +repos: +- repo: https://github.com/psf/black + rev: 23.10.1 + hooks: + - id: black \ No newline at end of file From c9f54ed260c72f01f6891c31f280f31eda623d9e Mon Sep 17 00:00:00 2001 From: Hudson Tao Date: Wed, 1 Nov 2023 12:22:35 -0400 Subject: [PATCH 2/5] Formatted files with black. --- setup.py | 2 +- .../PKG-INFO | 23 +++++++++++++++++++ .../SOURCES.txt | 22 ++++++++++++++++++ .../dependency_links.txt | 1 + src/prompt_systematic_review/keywords.py | 20 ++++++++-------- src/prompt_systematic_review/utils.py | 8 +++---- tests/test_utils.py | 8 ++++--- 7 files changed, 66 insertions(+), 18 deletions(-) create mode 100644 src/prompt_systematic_review.egg-info/PKG-INFO create mode 100644 src/prompt_systematic_review.egg-info/SOURCES.txt create mode 100644 src/prompt_systematic_review.egg-info/dependency_links.txt diff --git a/setup.py b/setup.py index fc1f76c..6068493 100644 --- a/setup.py +++ b/setup.py @@ -1,3 +1,3 @@ from setuptools import setup -setup() \ No newline at end of file +setup() diff --git a/src/prompt_systematic_review.egg-info/PKG-INFO b/src/prompt_systematic_review.egg-info/PKG-INFO new file mode 100644 index 0000000..2673bfd --- /dev/null +++ b/src/prompt_systematic_review.egg-info/PKG-INFO @@ -0,0 +1,23 @@ +Metadata-Version: 2.1 +Name: prompt-systematic-review +Version: 0.1.0 +Summary: a systematic review of prompting +Home-page: https://github.com/trigaten/Prompt_Systematic_Review/ +Author: trigaten +Author-email: sanderschulhoff@gmail.com +License: MIT +Classifier: Development Status :: 1 - Planning +Classifier: Environment :: Console +Classifier: Intended Audience :: Science/Research +Classifier: Natural Language :: English +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence +Classifier: Topic :: Scientific/Engineering :: Information Analysis +Description-Content-Type: text/markdown +License-File: LICENSE.txt + +# Prompt Engineering Survey + +## blacklist.csv + +Papers we should not include due to being poorly written or AI generated diff --git a/src/prompt_systematic_review.egg-info/SOURCES.txt b/src/prompt_systematic_review.egg-info/SOURCES.txt new file mode 100644 index 0000000..1c3493f --- /dev/null +++ b/src/prompt_systematic_review.egg-info/SOURCES.txt @@ -0,0 +1,22 @@ +LICENSE.txt +MANIFEST.in +README.md +setup.cfg +setup.py +examples/search_pe_papers.ipynb +src/prompt_systematic_review/__init__.py +src/prompt_systematic_review/keywords.py +src/prompt_systematic_review/utils.py +src/prompt_systematic_review.egg-info/PKG-INFO +src/prompt_systematic_review.egg-info/SOURCES.txt +src/prompt_systematic_review.egg-info/dependency_links.txt +src/prompt_systematic_review.egg-info/requires.txt +src/prompt_systematic_review.egg-info/top_level.txt +src/prompt_systematic_review/__pycache__/__init__.cpython-311.pyc +src/prompt_systematic_review/__pycache__/pipeline.cpython-311.pyc +src/prompt_systematic_review/__pycache__/utils.cpython-311.pyc +tests/__init__.py +tests/test_utils.py +tests/__pycache__/__init__.cpython-311.pyc +tests/__pycache__/test_pipeline.cpython-311-pytest-7.4.0.pyc +tests/__pycache__/test_utils.cpython-311-pytest-7.4.0.pyc \ No newline at end of file diff --git a/src/prompt_systematic_review.egg-info/dependency_links.txt b/src/prompt_systematic_review.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/prompt_systematic_review.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/src/prompt_systematic_review/keywords.py b/src/prompt_systematic_review/keywords.py index b5d62aa..54b336e 100644 --- a/src/prompt_systematic_review/keywords.py +++ b/src/prompt_systematic_review/keywords.py @@ -1,11 +1,11 @@ keywords_list = [ - ["injection"], - ["gpt-3", "gpt 3"], - ["gpt-4", "gpt 4"], - ["prompt engineering"], - ["few shot", "few-shot"], - ["prompting"], - ["davinci", "text-davinci"], - ["chatgpt", "chat gpt", "gpt"], - ["generative ai", "generative artificial intelligence"] - ] \ No newline at end of file + ["injection"], + ["gpt-3", "gpt 3"], + ["gpt-4", "gpt 4"], + ["prompt engineering"], + ["few shot", "few-shot"], + ["prompting"], + ["davinci", "text-davinci"], + ["chatgpt", "chat gpt", "gpt"], + ["generative ai", "generative artificial intelligence"], +] diff --git a/src/prompt_systematic_review/utils.py b/src/prompt_systematic_review/utils.py index 3057002..5b82a11 100644 --- a/src/prompt_systematic_review/utils.py +++ b/src/prompt_systematic_review/utils.py @@ -1,14 +1,14 @@ import requests from xml.etree import ElementTree as ET -def search_arxiv(keyword,max_results=10000): + +def search_arxiv(keyword, max_results=10000): url = f"http://export.arxiv.org/api/query?search_query=all:{keyword}&start=0&max_results={max_results}" data = requests.get(url).content return data + def count_articles(data): root = ET.fromstring(data) - entries = root.findall('{http://www.w3.org/2005/Atom}entry') + entries = root.findall("{http://www.w3.org/2005/Atom}entry") return len(entries) - - diff --git a/tests/test_utils.py b/tests/test_utils.py index 5195fd3..1d93dc2 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,10 +2,12 @@ import pytest from prompt_systematic_review.utils import search_arxiv, count_articles + def test_search_arxiv(): - data = search_arxiv("covid",max_results=10) + data = search_arxiv("covid", max_results=10) assert len(data) > 0 + def test_count_articles(): - data = search_arxiv("covid",max_results=10) - assert count_articles(data) == 10 \ No newline at end of file + data = search_arxiv("covid", max_results=10) + assert count_articles(data) == 10 From 645b9ef332a9896174585bc7f0e598c0677592ff Mon Sep 17 00:00:00 2001 From: hudssntao <145732608+hudssntao@users.noreply.github.com> Date: Wed, 1 Nov 2023 13:56:08 -0400 Subject: [PATCH 3/5] Delete src/prompt_systematic_review.egg-info directory --- .../PKG-INFO | 23 ------------------- .../SOURCES.txt | 22 ------------------ .../dependency_links.txt | 1 - 3 files changed, 46 deletions(-) delete mode 100644 src/prompt_systematic_review.egg-info/PKG-INFO delete mode 100644 src/prompt_systematic_review.egg-info/SOURCES.txt delete mode 100644 src/prompt_systematic_review.egg-info/dependency_links.txt diff --git a/src/prompt_systematic_review.egg-info/PKG-INFO b/src/prompt_systematic_review.egg-info/PKG-INFO deleted file mode 100644 index 2673bfd..0000000 --- a/src/prompt_systematic_review.egg-info/PKG-INFO +++ /dev/null @@ -1,23 +0,0 @@ -Metadata-Version: 2.1 -Name: prompt-systematic-review -Version: 0.1.0 -Summary: a systematic review of prompting -Home-page: https://github.com/trigaten/Prompt_Systematic_Review/ -Author: trigaten -Author-email: sanderschulhoff@gmail.com -License: MIT -Classifier: Development Status :: 1 - Planning -Classifier: Environment :: Console -Classifier: Intended Audience :: Science/Research -Classifier: Natural Language :: English -Classifier: Programming Language :: Python :: 3 -Classifier: Topic :: Scientific/Engineering :: Artificial Intelligence -Classifier: Topic :: Scientific/Engineering :: Information Analysis -Description-Content-Type: text/markdown -License-File: LICENSE.txt - -# Prompt Engineering Survey - -## blacklist.csv - -Papers we should not include due to being poorly written or AI generated diff --git a/src/prompt_systematic_review.egg-info/SOURCES.txt b/src/prompt_systematic_review.egg-info/SOURCES.txt deleted file mode 100644 index 1c3493f..0000000 --- a/src/prompt_systematic_review.egg-info/SOURCES.txt +++ /dev/null @@ -1,22 +0,0 @@ -LICENSE.txt -MANIFEST.in -README.md -setup.cfg -setup.py -examples/search_pe_papers.ipynb -src/prompt_systematic_review/__init__.py -src/prompt_systematic_review/keywords.py -src/prompt_systematic_review/utils.py -src/prompt_systematic_review.egg-info/PKG-INFO -src/prompt_systematic_review.egg-info/SOURCES.txt -src/prompt_systematic_review.egg-info/dependency_links.txt -src/prompt_systematic_review.egg-info/requires.txt -src/prompt_systematic_review.egg-info/top_level.txt -src/prompt_systematic_review/__pycache__/__init__.cpython-311.pyc -src/prompt_systematic_review/__pycache__/pipeline.cpython-311.pyc -src/prompt_systematic_review/__pycache__/utils.cpython-311.pyc -tests/__init__.py -tests/test_utils.py -tests/__pycache__/__init__.cpython-311.pyc -tests/__pycache__/test_pipeline.cpython-311-pytest-7.4.0.pyc -tests/__pycache__/test_utils.cpython-311-pytest-7.4.0.pyc \ No newline at end of file diff --git a/src/prompt_systematic_review.egg-info/dependency_links.txt b/src/prompt_systematic_review.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/src/prompt_systematic_review.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - From f589722e0acd512d80bac04ee5dc39fdfc7c6f5c Mon Sep 17 00:00:00 2001 From: Hudson Tao Date: Wed, 1 Nov 2023 14:30:19 -0400 Subject: [PATCH 4/5] formatted paperSource.py and test_paper.py --- src/prompt_systematic_review/paperSource.py | 52 +++++++++++++++++++++ tests/test_paper.py | 30 ++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 src/prompt_systematic_review/paperSource.py create mode 100644 tests/test_paper.py diff --git a/src/prompt_systematic_review/paperSource.py b/src/prompt_systematic_review/paperSource.py new file mode 100644 index 0000000..ab7100b --- /dev/null +++ b/src/prompt_systematic_review/paperSource.py @@ -0,0 +1,52 @@ +from abc import ABC, abstractmethod +from typing import List, Tuple, Dict, Any +from datetime import date +from prompt_systematic_review import keywords +import jellyfish as j + + +class Paper: + def __init__( + self, + title: str, + firstAuthor: str, + url: str, + dateSubmitted: date, + keyWords: List[str], + ): + self.title = title + self.firstAuthor = firstAuthor + self.url = url + self.dateSubmitted = dateSubmitted + self.keywords = keyWords + try: + assert set(keyWords) == set([k.lower() for k in keyWords]) + except: + raise ValueError("Keywords must be lowercase") + + def __str__(self): + return f"{self.title}, by {self.firstAuthor}".strip() + + def __eq__(self, other): + # this is to handle papers from different sources being the same + return ( + j.jaro_winkler_similarity(self.__str__().lower(), other.__str__().lower()) + > 0.75 + ) + + def matchingKeyWords(self): + return [ + keyword for keyword in keywords.keywords_list if keyword in self.keywords + ] + + +class PaperSource(ABC): + baseURL: str + + @abstractmethod + def getPapers(self, count: int, keyWords: List[str]) -> List[Paper]: + pass + + @abstractmethod + def getPaperSrc(self, paper: Paper) -> str: + pass diff --git a/tests/test_paper.py b/tests/test_paper.py new file mode 100644 index 0000000..4764961 --- /dev/null +++ b/tests/test_paper.py @@ -0,0 +1,30 @@ +import pytest +from prompt_systematic_review.paperSource import Paper +from datetime import date + + +def test_paper(): + paper1 = Paper( + "How to write a paper", + "Harry Parnasus", + "example.com", + date(2000, 2, 2), + ["keyword1", "keyword2"], + ) + paper2 = Paper( + "How to NOT write a paper", + "John Dickenson", + "example.com", + date(2002, 3, 3), + ["keyword1", "keyword2"], + ) + alsoPaper1 = Paper( + "How to write a paper", + "Dr. Harry Parnasus", + "https://example2.com", + date(2000, 2, 5), + ["keyword1", "keyword2"], + ) + + assert paper1 == alsoPaper1 + assert paper1 != paper2 and paper2 != alsoPaper1 From a139841271cd70083c5d6592250eb2bd27568d75 Mon Sep 17 00:00:00 2001 From: hudssntao <145732608+hudssntao@users.noreply.github.com> Date: Wed, 1 Nov 2023 14:41:16 -0400 Subject: [PATCH 5/5] Update test_paper.py --- tests/test_paper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/test_paper.py b/tests/test_paper.py index 448572a..4764961 100644 --- a/tests/test_paper.py +++ b/tests/test_paper.py @@ -27,4 +27,4 @@ def test_paper(): ) assert paper1 == alsoPaper1 - assert paper1 != paper2 and paper2 != alsoPaper1 \ No newline at end of file + assert paper1 != paper2 and paper2 != alsoPaper1