Skip to content

Commit

Permalink
Merge pull request #12 from shmsw25/api-cost-estimates
Browse files Browse the repository at this point in the history
Adding cost estimates for OpenAI API usage
  • Loading branch information
shmsw25 authored Jun 6, 2023
2 parents 91637f8 + 026faff commit 90786c3
Show file tree
Hide file tree
Showing 2 changed files with 95 additions and 27 deletions.
46 changes: 30 additions & 16 deletions factscore/atomic_facts.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,17 +41,16 @@ def __init__(self, key_path, demon_dir, model_name=None, gpt3_cache_file=None):
def save_cache(self):
self.openai_lm.save_cache()

def run(self, generation):
"""Convert the generation into a set of atomic facts."""
def run(self, generation, cost_estimate=None):
"""Convert the generation into a set of atomic facts. Return a total words cost if cost_estimate != None."""
if self.preprocess_fn:
paragraphs = self.preprocess(generation)
else:
paragraphs = [para.strip() for para in generation.split("\n") if len(para.strip()) > 0]

atomic_facts, para_breaks = self.get_atomic_facts_from_paragraph(paragraphs)
return atomic_facts, para_breaks
return self.get_atomic_facts_from_paragraph(paragraphs, cost_estimate=cost_estimate)

def get_atomic_facts_from_paragraph(self, paragraphs):
def get_atomic_facts_from_paragraph(self, paragraphs, cost_estimate=None):
sentences = []
para_breaks = []
for para_idx, paragraph in enumerate(paragraphs):
Expand All @@ -71,9 +70,14 @@ def get_atomic_facts_from_paragraph(self, paragraphs):

sentences += curr_sentences

atoms = self.get_init_atomic_facts_from_sentence([sent for i, sent in enumerate(sentences) if not (not self.is_bio and ( \
(i==0 and (sent.startswith("Sure") or sent.startswith("Here are"))) or \
(i==len(sentences)-1 and (sent.startswith("Please") or sent.startswith("I hope") or sent.startswith("Here are")))))])
atoms_or_estimate = self.get_init_atomic_facts_from_sentence([sent for i, sent in enumerate(sentences) if not (not self.is_bio and ( \
(i==0 and (sent.startswith("Sure") or sent.startswith("Here are"))) or \
(i==len(sentences)-1 and (sent.startswith("Please") or sent.startswith("I hope") or sent.startswith("Here are")))))], cost_estimate=cost_estimate)

if cost_estimate:
return atoms_or_estimate
else:
atoms = atoms_or_estimate

atomic_facts_pairs = []
for i, sent in enumerate(sentences):
Expand All @@ -98,7 +102,9 @@ def get_atomic_facts_from_paragraph(self, paragraphs):
return atomic_facts_pairs, para_breaks


def get_init_atomic_facts_from_sentence(self, sentences):
def get_init_atomic_facts_from_sentence(self, sentences, cost_estimate=None):
"""Get the initial atomic facts from the sentences. Return a total words cost if cost_estimate != None."""

is_bio = self.is_bio
demons = self.demons

Expand Down Expand Up @@ -129,15 +135,23 @@ def get_init_atomic_facts_from_sentence(self, sentences):
prompts.append(prompt)
prompt_to_sent[prompt] = sentence

for prompt in prompts:
output, _ = self.openai_lm.generate(prompt)
atoms[prompt_to_sent[prompt]] = text_to_sentences(output)
if cost_estimate:
total_words_estimate = 0
for prompt in prompts:
if cost_estimate == "consider_cache" and (prompt.strip() + "_0") in self.openai_lm.cache_dict:
continue
total_words_estimate += len(prompt.split())
return total_words_estimate
else:
for prompt in prompts:
output, _ = self.openai_lm.generate(prompt)
atoms[prompt_to_sent[prompt]] = text_to_sentences(output)

for key, value in demons.items():
if key not in atoms:
atoms[key] = value
for key, value in demons.items():
if key not in atoms:
atoms[key] = value

return atoms
return atoms


def preprocess_fn(generation, model):
Expand Down
76 changes: 65 additions & 11 deletions factscore/factscorer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self,
model_dir=".cache/factscore",
cache_dir=".cache/factscore",
openai_key="api.key",
cost_estimate="consider_cache",
batch_size=256):
assert model_name in ["retrieval+llama", "retrieval+llama+npm", "retrieval+ChatGPT", "npm", "retrieval+ChatGPT+npm"]
self.model_name = model_name
Expand All @@ -36,6 +37,7 @@ def __init__(self,
os.makedirs(cache_dir)

self.af_generator = None
self.cost_estimate = cost_estimate

if "llama" in model_name:
self.lm = CLM("inst-llama-7B",
Expand Down Expand Up @@ -77,6 +79,25 @@ def register_knowledge_source(self, name="enwiki-20230401", db_path=None, data_p
"npm-single",
cache_file=os.path.join(self.cache_dir, f"npm-{name}.pkl"))


def print_cost_estimates(self, total_words, task, model):
# https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
# Number of tokens are roughly 4/3 of the number of words
total_tokens = total_words * 4.0 / 3

# https://openai.com/pricing
# if we use davinci-003, the cost is $0.02 per 1000 tokens
# if we use gpt-3.5-turbo, the cost is $0.002 per 1000 tokens
if model == "davinci-003":
rate = 0.02
elif model == "gpt-3.5-turbo":
rate = 0.002

total_cost = total_tokens * rate / 1000

# print the total words, tokens, and cost along with rate
logging.critical("Estimated OpenAI API cost for %s ($%.3f per 1000 tokens): $%.2f for %d words and %d tokens" % (task, rate, total_cost, total_words, total_tokens))

def get_score(self,
topics,
generations,
Expand Down Expand Up @@ -108,6 +129,13 @@ def get_score(self,
demon_dir=os.path.join(self.data_dir, "demos"),
gpt3_cache_file=os.path.join(self.cache_dir, "InstructGPT.pkl"))

# estimate the total cost of atomic fact generation
total_words = 0
for gen in generations:
total_words += self.af_generator.run(gen, cost_estimate=self.cost_estimate)

self.print_cost_estimates(total_words, task="atomic fact generation", model="davinci-003")

if verbose:
topics = tqdm(topics)

Expand All @@ -121,12 +149,21 @@ def get_score(self,
atomic_facts.append(curr_afs)
if len(atomic_facts) % 10 == 0:
self.af_generator.save_cache()

assert len(atomic_facts)==len(topics)
self.af_generator.save_cache()

respond_ratio = np.mean([facts is not None for facts in atomic_facts])

if "ChatGPT" in self.model_name:
# estimate the total cost of response generation
total_words = 0
for topic, generation, facts in zip(topics, generations, atomic_facts):
if facts is not None:
total_words += self._get_score(topic, generation, facts, knowledge_source, cost_estimate=self.cost_estimate)

self.print_cost_estimates(total_words, task="factscore evaluation", model="gpt-3.5-turbo")

if verbose:
topics = tqdm(topics)

Expand All @@ -142,16 +179,17 @@ def get_score(self,
scores.append(score)
if len(scores) % 10 == 0:
self.save_cache()

self.save_cache()

return {"score": np.mean(scores),
"respond_ratio": respond_ratio,
"decisions": decisions,
"num_facts_per_response": np.mean([len(d) for d in decisions])}
"num_facts_per_response": np.mean([len(d) for d in decisions if d is not None])}

def _get_score(self, topic, generation, atomic_facts, knowledge_source):
def _get_score(self, topic, generation, atomic_facts, knowledge_source, cost_estimate=None):
decisions = []
total_words = 0
for atom in atomic_facts:
atom = atom.strip()
if self.lm:
Expand All @@ -164,6 +202,14 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source):
if not definition[-1] in string.punctuation:
definition += "."
prompt = "{}\n\nInput: {} True or False?\nOutput:".format(definition.strip(), atom.strip())

if cost_estimate:
if cost_estimate == "consider_cache" and (prompt.strip() + "_0") not in self.lm.cache_dict:
total_words += len(prompt.split())
elif cost_estimate == "ignore_cache":
total_words += len(prompt.split())
continue

output = self.lm.generate(prompt)

if type(output[1])==np.ndarray:
Expand Down Expand Up @@ -195,7 +241,10 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source):

decisions.append({"atom": atom, "is_supported": is_supported})

return decisions
if cost_estimate:
return total_words
else:
return decisions

if __name__ == '__main__':

Expand All @@ -218,6 +267,10 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source):
parser.add_argument('--cache_dir',
type=str,
default=".cache/factscore/")
parser.add_argument('--cost_estimate',
type=str,
default="consider_cache",
choices=["consider_cache", "ignore_cache"])
parser.add_argument('--use_atomic_facts',
action="store_true")
parser.add_argument('--verbose',
Expand All @@ -235,12 +288,13 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source):
logging.basicConfig(format='%(asctime)s - %(name)s - %(message)s',
datefmt='%m/%d/%Y %H:%M:%S',
level=logging.ERROR if args.print_rate_limit_error else logging.CRITICAL)

fs = FactScorer(model_name=args.model_name,
data_dir=args.data_dir,
model_dir=args.model_dir,
cache_dir=args.cache_dir,
openai_key=args.openai_key)
openai_key=args.openai_key,
cost_estimate=args.cost_estimate)

tot = 0
topics, generations, atomic_facts = [], [], []
Expand All @@ -264,9 +318,9 @@ def _get_score(self, topic, generation, atomic_facts, knowledge_source):
generations=generations,
atomic_facts=atomic_facts if args.use_atomic_facts else None,
verbose=args.verbose)
logging.critical("FActScore=%.1f%%" % (100*out["score"]))
logging.critical("Respond ratio=%.1f%%" % (100*out["respond_ratio"]))
logging.critical("# Atomic facts per response=%.1f" % (out["num_facts_per_response"]))
logging.critical("FActScore = %.1f%%" % (100*out["score"]))
logging.critical("Respond ratio = %.1f%%" % (100*out["respond_ratio"]))
logging.critical("# Atomic facts per valid response = %.1f" % (out["num_facts_per_response"]))



0 comments on commit 90786c3

Please sign in to comment.