From e30b577d2780311324ecb54ce736a773366e3af2 Mon Sep 17 00:00:00 2001 From: mkshing Date: Fri, 13 Oct 2023 16:33:10 +0900 Subject: [PATCH 1/2] fix a bug for prompt version `0.6` in jaqket_v2 --- lm_eval/tasks/ja/jaqket_v2.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/lm_eval/tasks/ja/jaqket_v2.py b/lm_eval/tasks/ja/jaqket_v2.py index f7f4cf1ecd..f4e0ddfc1b 100644 --- a/lm_eval/tasks/ja/jaqket_v2.py +++ b/lm_eval/tasks/ja/jaqket_v2.py @@ -489,6 +489,28 @@ class JAQKETV2WithLlama2(JAQKETV2WithJAAlpacaPrompt): DESCRIPTION = f"[INST] <>\n{SYSTEM_PROMPT}\n<>\n\n" FEWSHOT_SEP = " [INST] " + def preprocess_ctx(self, ctx, max_length): + # if ctx fits in max length, return + if len(self.tokenizer.encode(ctx)) <= max_length: + return ctx + + # if ctx is too long, split on a tag that separates each example + _, remainder = ctx.split(self.DESCRIPTION, 1) + ctxs = remainder.split(self.FEWSHOT_SEP) + # if there is no example and still the description + QA prompt is too long, fail + if len(ctxs) < 2: + raise ValueError( + f"description + QA prompt with no example (0-shot) doesn't fit in max_length. ctx: {ctx}" + ) + + # delete the first example, the last includes QA prompt to be answered by lm + del ctxs[0] + + # recur + return self.preprocess_ctx( + self.DESCRIPTION + self.FEWSHOT_SEP.join(ctxs), max_length + ) + def doc_to_text(self, doc): """ Insert the following prompt into `{{ user_msg }}`, which is based on prompt version 0.3 From eb33dd7fd65f6c5fe0f21f625543e1358314459d Mon Sep 17 00:00:00 2001 From: mkshing Date: Fri, 13 Oct 2023 17:34:33 +0900 Subject: [PATCH 2/2] clean up based on pull/80 --- lm_eval/tasks/ja/jaqket_v2.py | 180 +++++----------------------------- 1 file changed, 25 insertions(+), 155 deletions(-) diff --git a/lm_eval/tasks/ja/jaqket_v2.py b/lm_eval/tasks/ja/jaqket_v2.py index f4e0ddfc1b..b6ba389003 100644 --- a/lm_eval/tasks/ja/jaqket_v2.py +++ b/lm_eval/tasks/ja/jaqket_v2.py @@ -138,125 +138,41 @@ def doc_to_target(self, doc): answer = answer_list[0] return answer - def fewshot_context( - self, doc, num_fewshot, provide_description=None, rnd=None, description=None - ): - """Returns a fewshot context string that is made up of a prepended description - (if provided), the `num_fewshot` number of examples, and an appended prompt example. - - :param doc: str - The document as returned from training_docs, validation_docs, or test_docs. - :param num_fewshot: int - The number of fewshot examples to provide in the returned context string. - :param provide_description: bool - Not implemented, and this option is deprecated and will be removed in a future version in favor of a different description providing method - :param rnd: random.Random - The pseudo-random number generator used to randomly sample examples. - WARNING: This is currently a required arg although it's optionalized with a default `None`. - :param description: str - The task's description that will be prepended to the fewshot examples. - :returns: str - The fewshot context. - """ - assert ( - rnd is not None - ), "A `random.Random` generator argument must be provided to `rnd`" - assert not provide_description, ( - "The `provide_description` arg will be removed in future versions. To prepend " - "a custom description to the context, supply the corresponding string via the " - "`description` arg." + def fewshot_context(self, doc, num_fewshot, **kwargs): + max_num_tokens = max( + [len(self._tokenize(answer)) for answer in doc["answers"]["text"]] + ) + max_length = self.max_length - max_num_tokens + + # If the prompt is too long with fewshot examples, reduce the number of + # examples until it fits. + while num_fewshot >= 0: + ctx = super().fewshot_context(doc, num_fewshot, **kwargs) + if len(self._tokenize(ctx)) <= max_length: + doc["context"] = ctx + return ctx + num_fewshot -= 1 + + # if we got here then even 0 fewshot is too long + return ValueError( + f"0-shot prompt is too long for max length {max_length}:\n{ctx}" ) - if provide_description is not None: - # nudge people to not specify it at all - print( - "WARNING: provide_description is deprecated and will be removed in a future version in favor of description_dict" - ) - - if hasattr(self, "FEWSHOT_SEP"): - FEWSHOT_SEP = self.FEWSHOT_SEP - elif hasattr(self, "SEP"): - FEWSHOT_SEP = f"{self.SEP}{self.SEP}" - else: - FEWSHOT_SEP = "\n\n" - - if description: - description += FEWSHOT_SEP - elif hasattr(self, "DESCRIPTION"): - description = self.DESCRIPTION - else: - description = "" - if num_fewshot == 0: - labeled_examples = "" + def _tokenize(self, text, **kwargs): + encode_fn = self.tokenizer.encode + if "add_special_tokens" in inspect.getfullargspec(encode_fn).args: + encode_params = dict(add_special_tokens=False) else: - # for sets with no training docs, draw from other set *but ensure no overlap with current doc* - if self.has_training_docs(): - fewshotex = self.fewshot_examples(k=num_fewshot, rnd=rnd) - else: - if self._fewshot_docs is None: - self._fewshot_docs = list( - self.validation_docs() - if self.has_validation_docs() - else self.test_docs() - ) - - fewshotex = rnd.sample(self._fewshot_docs, num_fewshot + 1) - - # get rid of the doc that's the one we're evaluating, if it's in the fewshot - fewshotex = [x for x in fewshotex if x != doc][:num_fewshot] - - labeled_examples = ( - FEWSHOT_SEP.join( - [ - self.doc_to_answering_text(doc) + self.doc_to_target(doc) - for doc in fewshotex - ] - ) - + FEWSHOT_SEP - ) - - example = self.doc_to_text(doc) - return description + labeled_examples + example - - def preprocess_ctx(self, ctx, max_length): - # if ctx fits in max length, return - if len(self.tokenizer.encode(ctx)) <= max_length: - return ctx - - # if ctx is too long, split on a tag that separates each example - description, remainder = ctx.split(self.FEWSHOT_SEP, 1) - ctxs = remainder.split(self.FEWSHOT_SEP) - - # if there is no example and still the description + QA prompt is too long, fail - if len(ctxs) < 2: - raise ValueError( - f"description + QA prompt with no example (0-shot) doesn't fit in max_length. ctx: {ctx}" - ) - - # delete the first example, the last includes QA prompt to be answered by lm - del ctxs[0] - - # recur - return self.preprocess_ctx( - self.FEWSHOT_SEP.join([description, *ctxs]), max_length - ) + encode_params = {} + return encode_fn(text, **encode_params, **kwargs) def construct_requests(self, doc, ctx): if DYNAMIC_MAX_LENGTH == "false" or not hasattr(self.tokenizer, "encode"): continuation = rf.greedy_until(ctx, [self.SEP]) else: - encode_fn = self.tokenizer.encode - if "add_special_tokens" in inspect.getfullargspec(encode_fn).args: - encode_params = dict(add_special_tokens=False) - else: - encode_params = {} max_num_tokens = max( - [ - len(encode_fn(answer, **encode_params)) - for answer in doc["answers"]["text"] - ] + [len(self._tokenize(answer)) for answer in doc["answers"]["text"]] ) - ctx = self.preprocess_ctx(ctx, max_length=self.max_length - max_num_tokens) continuation = rf.greedy_until(ctx, [self.SEP], max_num_tokens) return continuation @@ -433,30 +349,6 @@ def doc_to_answering_text(self, doc): qa_prompt = self.doc_to_qa_prompt(doc) return f"ユーザー: {answer_candidate}{self.SEP}{qa_prompt}{self.SEP}システム: " - def preprocess_ctx(self, ctx, max_length): - # if ctx fits in max length, return - if len(self.tokenizer.encode(ctx)) <= max_length: - return ctx - - # if ctx is too long, split on a tag that separates each example - description, remainder = ctx.split(self.END_OF_DESCRIPTION, 1) - ctxs = remainder.split(self.START_OF_FEWSHOT) - - # if there is no example and still the description + QA prompt is too long, fail - if len(ctxs) < 2: - raise ValueError( - f"description + QA prompt with no example (0-shot) doesn't fit in max_length. ctx: {ctx}" - ) - - # delete the first example, the last includes QA prompt to be answered by lm - del ctxs[1] - - new_ctx = self.END_OF_DESCRIPTION.join( - [description, self.START_OF_FEWSHOT.join(ctxs)] - ) - # recur - return self.preprocess_ctx(new_ctx, max_length) - class JAQKETV2WithRinnaBilingualInstructionSFT(JAQKETV2WithRinnaInstructionSFT): """ @@ -489,28 +381,6 @@ class JAQKETV2WithLlama2(JAQKETV2WithJAAlpacaPrompt): DESCRIPTION = f"[INST] <>\n{SYSTEM_PROMPT}\n<>\n\n" FEWSHOT_SEP = " [INST] " - def preprocess_ctx(self, ctx, max_length): - # if ctx fits in max length, return - if len(self.tokenizer.encode(ctx)) <= max_length: - return ctx - - # if ctx is too long, split on a tag that separates each example - _, remainder = ctx.split(self.DESCRIPTION, 1) - ctxs = remainder.split(self.FEWSHOT_SEP) - # if there is no example and still the description + QA prompt is too long, fail - if len(ctxs) < 2: - raise ValueError( - f"description + QA prompt with no example (0-shot) doesn't fit in max_length. ctx: {ctx}" - ) - - # delete the first example, the last includes QA prompt to be answered by lm - del ctxs[0] - - # recur - return self.preprocess_ctx( - self.DESCRIPTION + self.FEWSHOT_SEP.join(ctxs), max_length - ) - def doc_to_text(self, doc): """ Insert the following prompt into `{{ user_msg }}`, which is based on prompt version 0.3