Bumping up to version 0.2

shmsw25 · Oct 14, 2023 · 03ff672 · 03ff672
2 parents 57e2880 + 78f2fb4
commit 03ff672
Show file tree

Hide file tree

Showing 5 changed files with 85 additions and 15 deletions.
diff --git a/.gitignore b/.gitignore
@@ -66,3 +66,4 @@ fs-venv
 
 poetry.lock
 
+acl-publication-info.74k.parquet
diff --git a/README.md b/README.md
@@ -5,15 +5,15 @@
 [![PyPI version factscore](https://badge.fury.io/py/factscore.svg)](https://pypi.python.org/pypi/factscore/)
 [![Downloads](https://pepy.tech/badge/factscore)](https://pepy.tech/project/factscore)
 
-This is the official release accompanying our preprint, [FActScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation](https://arxiv.org/abs/2305.14251). FActScore is available as a PIP package as well.
+This is the official release accompanying our EMNLP 2023 paper, [FActScore: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation](https://arxiv.org/abs/2305.14251). FActScore is available as a PIP package as well.
 
 If you find FActScore useful, please cite:
 ```
-@article{ factscore,
+@inproceedings{ factscore,
     title={ {FActScore}: Fine-grained Atomic Evaluation of Factual Precision in Long Form Text Generation },
     author={ Min, Sewon and Krishna, Kalpesh and Lyu, Xinxi and Lewis, Mike and Yih, Wen-tau and Koh, Pang Wei and Iyyer, Mohit and Zettlemoyer, Luke and Hajishirzi, Hannaneh },
     year={ 2023 },
-    journal={ arXiv preprint arXiv:2305.14251 },
+    booktitle = { EMNLP },
     url={ https://arxiv.org/abs/2305.14251 }
 }
 ```
@@ -74,8 +74,7 @@ python -m factscore.factscorer --input_path {input_path} --model_name {estimator
 - `--print_rate_limit_error`: It specified, it prints out rate limit errors from OpenAI API.
 - `--cost_estimate`: This flag decides the type of OpenAI API cost estimation that we provide before calling it. It can be `"consider_cache"` (default) or `"ignore_cache"`.
 - `--abstain_detection`: This flag optionally enables automatic detection of abstained responses. By default this is disabled, but it is recommended to add your own function tailored to your model. The currently supported detectors are `"generic"` and `"perplexity_ai"`, and their implementations can be found in [`factscore/abstain_detection.py`](factscore/abstain_detection.py). There are two methods to add your own abstain function: a) clone our GitHub repository to install `factscore` locally (`pip install --editable .`), and then add your function to [`factscore/abstain_detection.py`](factscore/abstain_detection.py) directly; b) process your abstain detection outside our package, and use empty strings in the `output` key for the JSONL file used in `--input_path`.
-
-This command uses the English Wikipedia from 2023/04/01 as a knowledge source. See [this section](#To-use-a-custom-knowledge-source) to use your own database as a knowledge source!
+- `--knowledge_source`: In case the default knowledge source (Wikipedia - 2023/04/01) will not be used, preprocess it using the [instructions below](#To-use-a-custom-knowledge-source), and then specify the knowledge_source name under this flag.
 
 ## To evaluate your own LM
 
@@ -143,4 +142,4 @@ print (out["respond_ratio"]) # % of responding (not abstaining from answering)
 print (out["num_facts_per_response"]) # average number of atomic facts per response
 ```
 
-
+To see an example of constructing the ACL anthology knowledge source, see [`preprocessing/preprocess_acl.py`](preprocessing/preprocess_acl.py).
diff --git a/factscore/factscorer.py b/factscore/factscorer.py
@@ -108,15 +108,12 @@ def get_score(self,
                   atomic_facts=None,
                   knowledge_source=None,
                   verbose=False):
-
         if knowledge_source is None:
-            # use the default one (enwiki-20230401)
+            # use the default knowledge source
             knowledge_source = "enwiki-20230401"
-            if knowledge_source not in self.retrieval:
-                self.register_knowledge_source(knowledge_source)
-        else:
-            assert knowledge_source in self.retrieval, \
-                f"{knowledge_source} is not registered yet. Please use `register_knowledge_source()` function to register it with a database"
+
+        if knowledge_source not in self.retrieval:
+            self.register_knowledge_source(knowledge_source)
 
         if type(topics)==len(generations)==str:
             topics = [topics]
@@ -294,6 +291,10 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source, cost_est
     parser.add_argument('--cache_dir',
                         type=str,
                         default=".cache/factscore/")
+    parser.add_argument('--knowledge_source',
+                        type=str,
+                        default=None)
+
 
     parser.add_argument('--cost_estimate',
                         type=str,
@@ -351,12 +352,15 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source, cost_est
                        generations=generations,
                        gamma=args.gamma,
                        atomic_facts=atomic_facts if args.use_atomic_facts else None,
+                       knowledge_source=args.knowledge_source,
                        verbose=args.verbose)
     logging.critical("FActScore = %.1f%%" % (100*out["score"]))
     if "init_score" in out:
         logging.critical("FActScore w/o length penalty = %.1f%%" % (100*out["init_score"]))
     logging.critical("Respond ratio = %.1f%%" % (100*out["respond_ratio"]))
     logging.critical("# Atomic facts per valid response = %.1f" % (out["num_facts_per_response"]))
 
-
+    # Save out as a json file
+    with open(args.input_path.replace(".jsonl", f"_factscore_output.json"), 'w') as f:
+        f.write(json.dumps(out) + "\n")
 
diff --git a/preprocessing/preprocess_acl.py b/preprocessing/preprocess_acl.py
@@ -0,0 +1,66 @@
+import pandas as pd
+import tqdm
+import json
+import openai
+from factscore.openai_lm import call_ChatGPT
+from factscore.factscorer import FactScorer
+
+# File downloaded from https://github.com/shauryr/ACL-anthology-corpus
+# https://drive.google.com/file/d/1CFCzNGlTls0H-Zcaem4Hg_ETj4ebhcDO/view?usp=sharing
+df = pd.read_parquet('acl-publication-info.74k.parquet')
+titles = df['title'].tolist()
+full_text = df['full_text'].tolist()
+
+acl_corpus = []
+for x, y in zip(titles, full_text):
+    if x.strip() == "" or y.strip() == "":
+        continue
+    acl_corpus.append({"title": x, "text": y})
+
+with open("acl_corpus.jsonl", 'w') as f:
+    for line in acl_corpus:
+        f.write(json.dumps(line) + "\n")
+
+fs = FactScorer()
+# this will create a database using your file
+# once DB file is created, you can reuse it by only specifying `db_path`
+fs.register_knowledge_source("acl_corpus",
+                             data_path="acl_corpus.jsonl",
+                             db_path=None)
+
+
+prompt_titles = [
+    "Dense Passage Retrieval for Open-Domain Question Answering",
+    "AmbigQA: Answering Ambiguous Open-domain Questions",
+    "MetaICL: Learning to Learn In Context",
+    "Noisy Channel Language Model Prompting for Few-Shot Text Classification",
+    "Joint Passage Ranking for Diverse Multi-Answer Retrieval",
+    "Reformulating Unsupervised Style Transfer as Paraphrase Generation",
+    "Syntactically Supervised Transformers for Faster Neural Machine Translation",
+    "Hurdles to Progress in Long-form Question Answering",
+    "Generating Question-Answer Hierarchies",
+    "Do Long-Range Language Models Actually Use Long-Range Context?"
+]
+
+prompts_list = []
+
+for title in prompt_titles:
+    prompts_list.append(f"Give me a summary of the research paper titled \"{title}\".")
+
+with open("api.key", 'r') as f:
+    api_key = f.readline()
+openai.api_key = api_key.strip()
+
+responses = []
+for ptitle, prompt in tqdm.tqdm(zip(prompt_titles, prompts_list)):
+    message = [{"role": "user", "content": prompt}]
+    response = call_ChatGPT(message, model_name="gpt-3.5-turbo-0301")
+    responses.append({
+        "topic": ptitle,
+        "output": response["choices"][0]["message"]["content"]
+    })
+
+# # write the corpus to a jsonl file
+with open("acl_chatgpt_outputs.jsonl", 'w') as f:
+    for line in responses:
+        f.write(json.dumps(line) + "\n")
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "factscore"
-version = "0.1.9"
+version = "0.2.0"
 description = "FactScore is an automatic evaluation metric for factual precision in long-form text generation. It uses large language models and retrieval to break down generations into atomic facts and then measure the correctness with respect to a knowledge source (like Wikipedia)."
 authors = ["Sewon Min <[email protected]>", "Kalpesh Krishna <[email protected]>", "Xinxi Lyu <[email protected]>"]
 license = "MIT"
Original file line number	Diff line number	Diff line change
Expand Up		@@ -66,3 +66,4 @@ fs-venv

		poetry.lock

		acl-publication-info.74k.parquet