Skip to content

Commit

Permalink
Measure runtime and total time
Browse files Browse the repository at this point in the history
  • Loading branch information
lapp0 authored and rlouf committed Oct 21, 2024
1 parent 558f984 commit 0e02ffb
Show file tree
Hide file tree
Showing 12 changed files with 281 additions and 186 deletions.
4 changes: 2 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -5,9 +5,9 @@ authors = [{"name" = "The Outlines developers"}]
description = "A benchmarking suite for structured generation libraries."
requires-python = ">=3.10"
dependencies = [
"lm-format-enforcer==0.10.6",
"lm-format-enforcer==0.10.7",
"outlines==0.0.46",
"outlines-core==0.1.0",
"outlines-core",
"torch==2.4.0",
"transformers==4.44.0",
]
131 changes: 92 additions & 39 deletions src/benchmark_lfe.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,57 +8,110 @@
from .data import json_cases, models, regex_cases


class LMFormatEnforcerRegex:
params = [models, regex_cases]
param_names = ["model", "regex"]
timeout = 600

def setup(self, model, _):
"""Set up the benchmark.
We convert the tokenizer during set up as this only
needs to be done once for a given model.
"""
class LMFormatEnforcerBenchmark:
def do_setup(self, model, samples):
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
self.all_tokenized_samples = [
self.tokenizer.encode(sample) for sample in samples
]

def _exhaust_samples(self, token_enforcer):
for sample_tokens in self.all_tokenized_samples:
for i in range(len(sample_tokens)):
_ = token_enforcer.get_allowed_tokens(sample_tokens[: i + 1])

def _get_first_token(self, token_enforcer):
"""Get first token to verify lazy index is fully warmed up"""
_ = token_enforcer.get_allowed_tokens(self.all_tokenized_samples[0][:1])


class LMFormatEnforcerRegex(LMFormatEnforcerBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

def _get_enforcer(self, regex_name):
pattern = regex_cases[regex_name]["regex"]
parser = RegexParser(pattern)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
return TokenEnforcer(tokenizer_data, parser)

def time_lfe_total(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._exhaust_samples(enforcer)

def time_lfe_first_token(self, _, regex_name):
enforcer = self._get_enforcer(regex_name)
self._get_first_token(enforcer)


def time_lfe(self, _, regex):
regex_string, regex_example = regex["regex"], regex["example"]
regex_example_tokens = self.tokenizer.encode(regex_example)
class LMFormatEnforcerRegexRunTime(LMFormatEnforcerBenchmark):
"""Class which warms-up enforcer in setup steps"""

parser = RegexParser(regex_string)
token_enforcer = TokenEnforcer(self.tokenizer_data, parser)
_get_enforcer = LMFormatEnforcerRegex._get_enforcer

for i in range(len(regex_example_tokens)):
_ = token_enforcer.get_allowed_tokens(regex_example_tokens[: i + 1])
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

class LMFormatEnforcerJsonSchema:
params = [models, json_cases]
param_names = ["model", "json"]
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(regex_name)
self._get_first_token(self.enforcer)

def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)


class LMFormatEnforcerJsonSchema(LMFormatEnforcerBenchmark):
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 600

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

We convert the tokenizer during set up as this only
needs to be done once for a given model.
def _get_enforcer(self, json_schema_name):
schema = json_cases[json_schema_name]["schema"]
parser = JsonSchemaParser(schema)
tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
return TokenEnforcer(tokenizer_data, parser)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer)
def time_lfe_total(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._exhaust_samples(enforcer)

def time_lfe_first_token(self, _, json_schema_name):
enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(enforcer)


class LMFormatEnforcerJsonSchemaRunTime(LMFormatEnforcerBenchmark):
"""Class which warms-up enforcer in setup steps"""

_get_enforcer = LMFormatEnforcerJsonSchema._get_enforcer

params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 600

def time_lfe(self, _, json):
json_string, json_example = json["schema"], json["example"]
json_example_tokens = self.tokenizer.encode(json_example)
def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

parser = JsonSchemaParser(json_string)
token_enforcer = TokenEnforcer(self.tokenizer_data, parser)
# ensure warmed up so we're only measuring runtime
self.enforcer = self._get_enforcer(json_schema_name)
self._get_first_token(self.enforcer)

for i in range(len(json_example_tokens)):
_ = token_enforcer.get_allowed_tokens(json_example_tokens[: i + 1])
def time_lfe_runtime(self, *args):
self._exhaust_samples(self.enforcer)
144 changes: 99 additions & 45 deletions src/benchmark_outlines.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import json

import outlines.caching as caching
import torch
from outlines.fsm.guide import RegexGuide
from outlines.fsm.json_schema import build_regex_from_schema
from outlines.models.transformers import TransformerTokenizer
Expand All @@ -10,12 +11,10 @@
from .data import json_cases, models, regex_cases


class OutlinesRegex:
params = [models, regex_cases]
param_names = ["model", "regex"]
timeout = 1200
class OutlinesBenchmark:
guide_class = RegexGuide

def setup(self, model, _):
def do_setup(self, model, samples):
"""Set up the benchmark.
We JIT-compile Numba functions and convert the vocabulary
Expand All @@ -26,59 +25,114 @@ def setup(self, model, _):
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary
self.guide_class("a", self.tokenizer) # JIT-compile and convert the vocabulary

self.all_tokenized_samples = [
self.tokenizer.encode(sample)[0][0] for sample in samples
]

def _exhaust_samples(self, guide):
state = guide.initial_state
for sample_tokens in self.all_tokenized_samples:
for token in sample_tokens:
if isinstance(token, torch.Tensor):
token = token.item()
state = guide.get_next_state(state, token)
_ = guide.get_next_instruction(state)

def _get_first_token(self, guide):
"""Get first token to verify lazy index is fully warmed up"""
state = guide.get_next_state(
guide.initial_state, self.all_tokenized_samples[0][0]
)
_ = guide.get_next_instruction(state)

def time_outlines(self, _, regex):
"""Measure generation time with Outlines.
def teardown(self, *args):
caching.clear_cache()

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.

"""
caching.clear_cache()
class OutlinesRegex(OutlinesBenchmark):
params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

regex_string, regex_example = regex["regex"], regex["example"]
regex_example_tokens = self.tokenizer.encode(regex_example)[0][0]
guide = RegexGuide(regex_string, self.tokenizer)
def time_outlines_total(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

state = 0
for token in regex_example_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)
def time_outlines_first_token(self, _, regex_name):
regex_string = regex_cases[regex_name]["regex"]
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)


class OutlinesJsonSchema:
params = [models, json_cases]
param_names = ["model", "json"]
class OutlinesRegexRunTime(OutlinesBenchmark):
"""Class which warms-up Guide in setup steps"""

params = [models, regex_cases.keys()]
param_names = ["model", "regex_name"]
timeout = 1200

def setup(self, model, _):
"""Set up the benchmark.
def setup(self, model, regex_name):
samples = regex_cases[regex_name]["samples"]
self.do_setup(model, samples)

We JIT-compile Numba functions and convert the vocabulary
during set up as this only need to be ever done once.
# ensure warmed up so we're only measuring runtime
regex_string = regex_cases[regex_name]["regex"]
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

"""
self.tokenizer = AutoTokenizer.from_pretrained(
model, clean_up_tokenization_spaces=True
)
self.tokenizer = TransformerTokenizer(self.tokenizer)
RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary
def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)

def time_outlines(self, _, json_case):
"""Measure generation time with Outlines.

Outlines' generation time is split between compiling an index for each
regular expression, and walking this index while generating tokens.
class OutlinesJsonSchema(OutlinesBenchmark):
json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema)

"""
json_string, json_example = json_case["schema"], json_case["example"]
json_example_tokens = self.tokenizer.encode(json_example)[0][0]
params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200

def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

def time_outlines_total(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._exhaust_samples(guide)

def time_outlines_first_token(self, _, json_schema_name):
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(guide)


class OutlinesJsonSchemaRunTime(OutlinesBenchmark):
"""Class which warms-up Guide in setup steps"""

json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema)

params = [models, json_cases.keys()]
param_names = ["model", "json_schema_name"]
timeout = 1200

def setup(self, model, json_schema_name):
samples = json_cases[json_schema_name]["samples"]
self.do_setup(model, samples)

regex_string = build_regex_from_schema(json.dumps(json_string))
guide = RegexGuide(regex_string, self.tokenizer)
# ensure warmed up so we're only measuring runtime
json_string = json_cases[json_schema_name]["schema"]
regex_string = self.json_from_regex_fn(json.dumps(json_string))
self.guide = self.guide_class(regex_string, self.tokenizer)
self._get_first_token(self.guide)

state = 0
for token in json_example_tokens:
_ = guide.get_next_instruction(state)
state = guide.get_next_state(state, token)
def time_outlines_runtime(self, *args):
self._exhaust_samples(self.guide)
Loading

0 comments on commit 0e02ffb

Please sign in to comment.