From 0e02ffb90bc174d87268f18eaff4b1eda4c0e00d Mon Sep 17 00:00:00 2001 From: Andrew Lapp Date: Wed, 16 Oct 2024 03:59:41 -0400 Subject: [PATCH] Measure runtime and total time --- pyproject.toml | 4 +- src/benchmark_lfe.py | 131 +++++++++++++++++++-------- src/benchmark_outlines.py | 144 ++++++++++++++++++++---------- src/benchmark_outlines_core.py | 110 +++++++++-------------- src/data.py | 71 ++++++++------- src/samples/complex_str.json | 1 + src/samples/gsm8k.json | 1 + src/samples/long_integer.json | 1 + src/samples/phone_number.json | 1 + src/samples/recording_schema.json | 1 + src/samples/rpg_characters.json | 1 + src/samples/url.json | 1 + 12 files changed, 281 insertions(+), 186 deletions(-) create mode 100644 src/samples/complex_str.json create mode 100644 src/samples/gsm8k.json create mode 100644 src/samples/long_integer.json create mode 100644 src/samples/phone_number.json create mode 100644 src/samples/recording_schema.json create mode 100644 src/samples/rpg_characters.json create mode 100644 src/samples/url.json diff --git a/pyproject.toml b/pyproject.toml index 0157246..ebce4df 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,9 +5,9 @@ authors = [{"name" = "The Outlines developers"}] description = "A benchmarking suite for structured generation libraries." requires-python = ">=3.10" dependencies = [ - "lm-format-enforcer==0.10.6", + "lm-format-enforcer==0.10.7", "outlines==0.0.46", - "outlines-core==0.1.0", + "outlines-core", "torch==2.4.0", "transformers==4.44.0", ] diff --git a/src/benchmark_lfe.py b/src/benchmark_lfe.py index e9b18af..680f285 100644 --- a/src/benchmark_lfe.py +++ b/src/benchmark_lfe.py @@ -8,57 +8,110 @@ from .data import json_cases, models, regex_cases -class LMFormatEnforcerRegex: - params = [models, regex_cases] - param_names = ["model", "regex"] - timeout = 600 - - def setup(self, model, _): - """Set up the benchmark. - - We convert the tokenizer during set up as this only - needs to be done once for a given model. - - """ +class LMFormatEnforcerBenchmark: + def do_setup(self, model, samples): self.tokenizer = AutoTokenizer.from_pretrained( model, clean_up_tokenization_spaces=True ) - self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer) + self.all_tokenized_samples = [ + self.tokenizer.encode(sample) for sample in samples + ] + + def _exhaust_samples(self, token_enforcer): + for sample_tokens in self.all_tokenized_samples: + for i in range(len(sample_tokens)): + _ = token_enforcer.get_allowed_tokens(sample_tokens[: i + 1]) + + def _get_first_token(self, token_enforcer): + """Get first token to verify lazy index is fully warmed up""" + _ = token_enforcer.get_allowed_tokens(self.all_tokenized_samples[0][:1]) + + +class LMFormatEnforcerRegex(LMFormatEnforcerBenchmark): + params = [models, regex_cases.keys()] + param_names = ["model", "regex_name"] + timeout = 1200 + + def setup(self, model, regex_name): + samples = regex_cases[regex_name]["samples"] + self.do_setup(model, samples) + + def _get_enforcer(self, regex_name): + pattern = regex_cases[regex_name]["regex"] + parser = RegexParser(pattern) + tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer) + return TokenEnforcer(tokenizer_data, parser) + + def time_lfe_total(self, _, regex_name): + enforcer = self._get_enforcer(regex_name) + self._exhaust_samples(enforcer) + + def time_lfe_first_token(self, _, regex_name): + enforcer = self._get_enforcer(regex_name) + self._get_first_token(enforcer) + - def time_lfe(self, _, regex): - regex_string, regex_example = regex["regex"], regex["example"] - regex_example_tokens = self.tokenizer.encode(regex_example) +class LMFormatEnforcerRegexRunTime(LMFormatEnforcerBenchmark): + """Class which warms-up enforcer in setup steps""" - parser = RegexParser(regex_string) - token_enforcer = TokenEnforcer(self.tokenizer_data, parser) + _get_enforcer = LMFormatEnforcerRegex._get_enforcer - for i in range(len(regex_example_tokens)): - _ = token_enforcer.get_allowed_tokens(regex_example_tokens[: i + 1]) + params = [models, regex_cases.keys()] + param_names = ["model", "regex_name"] + timeout = 1200 + def setup(self, model, regex_name): + samples = regex_cases[regex_name]["samples"] + self.do_setup(model, samples) -class LMFormatEnforcerJsonSchema: - params = [models, json_cases] - param_names = ["model", "json"] + # ensure warmed up so we're only measuring runtime + self.enforcer = self._get_enforcer(regex_name) + self._get_first_token(self.enforcer) + + def time_lfe_runtime(self, *args): + self._exhaust_samples(self.enforcer) + + +class LMFormatEnforcerJsonSchema(LMFormatEnforcerBenchmark): + params = [models, json_cases.keys()] + param_names = ["model", "json_schema_name"] timeout = 600 - def setup(self, model, _): - """Set up the benchmark. + def setup(self, model, json_schema_name): + samples = json_cases[json_schema_name]["samples"] + self.do_setup(model, samples) - We convert the tokenizer during set up as this only - needs to be done once for a given model. + def _get_enforcer(self, json_schema_name): + schema = json_cases[json_schema_name]["schema"] + parser = JsonSchemaParser(schema) + tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer) + return TokenEnforcer(tokenizer_data, parser) - """ - self.tokenizer = AutoTokenizer.from_pretrained( - model, clean_up_tokenization_spaces=True - ) - self.tokenizer_data = build_token_enforcer_tokenizer_data(self.tokenizer) + def time_lfe_total(self, _, json_schema_name): + enforcer = self._get_enforcer(json_schema_name) + self._exhaust_samples(enforcer) + + def time_lfe_first_token(self, _, json_schema_name): + enforcer = self._get_enforcer(json_schema_name) + self._get_first_token(enforcer) + + +class LMFormatEnforcerJsonSchemaRunTime(LMFormatEnforcerBenchmark): + """Class which warms-up enforcer in setup steps""" + + _get_enforcer = LMFormatEnforcerJsonSchema._get_enforcer + + params = [models, json_cases.keys()] + param_names = ["model", "json_schema_name"] + timeout = 600 - def time_lfe(self, _, json): - json_string, json_example = json["schema"], json["example"] - json_example_tokens = self.tokenizer.encode(json_example) + def setup(self, model, json_schema_name): + samples = json_cases[json_schema_name]["samples"] + self.do_setup(model, samples) - parser = JsonSchemaParser(json_string) - token_enforcer = TokenEnforcer(self.tokenizer_data, parser) + # ensure warmed up so we're only measuring runtime + self.enforcer = self._get_enforcer(json_schema_name) + self._get_first_token(self.enforcer) - for i in range(len(json_example_tokens)): - _ = token_enforcer.get_allowed_tokens(json_example_tokens[: i + 1]) + def time_lfe_runtime(self, *args): + self._exhaust_samples(self.enforcer) diff --git a/src/benchmark_outlines.py b/src/benchmark_outlines.py index 67de236..5c3b183 100644 --- a/src/benchmark_outlines.py +++ b/src/benchmark_outlines.py @@ -2,6 +2,7 @@ import json import outlines.caching as caching +import torch from outlines.fsm.guide import RegexGuide from outlines.fsm.json_schema import build_regex_from_schema from outlines.models.transformers import TransformerTokenizer @@ -10,12 +11,10 @@ from .data import json_cases, models, regex_cases -class OutlinesRegex: - params = [models, regex_cases] - param_names = ["model", "regex"] - timeout = 1200 +class OutlinesBenchmark: + guide_class = RegexGuide - def setup(self, model, _): + def do_setup(self, model, samples): """Set up the benchmark. We JIT-compile Numba functions and convert the vocabulary @@ -26,59 +25,114 @@ def setup(self, model, _): model, clean_up_tokenization_spaces=True ) self.tokenizer = TransformerTokenizer(self.tokenizer) - RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary + self.guide_class("a", self.tokenizer) # JIT-compile and convert the vocabulary + + self.all_tokenized_samples = [ + self.tokenizer.encode(sample)[0][0] for sample in samples + ] + + def _exhaust_samples(self, guide): + state = guide.initial_state + for sample_tokens in self.all_tokenized_samples: + for token in sample_tokens: + if isinstance(token, torch.Tensor): + token = token.item() + state = guide.get_next_state(state, token) + _ = guide.get_next_instruction(state) + + def _get_first_token(self, guide): + """Get first token to verify lazy index is fully warmed up""" + state = guide.get_next_state( + guide.initial_state, self.all_tokenized_samples[0][0] + ) + _ = guide.get_next_instruction(state) - def time_outlines(self, _, regex): - """Measure generation time with Outlines. + def teardown(self, *args): + caching.clear_cache() - Outlines' generation time is split between compiling an index for each - regular expression, and walking this index while generating tokens. - """ - caching.clear_cache() +class OutlinesRegex(OutlinesBenchmark): + params = [models, regex_cases.keys()] + param_names = ["model", "regex_name"] + timeout = 1200 + + def setup(self, model, regex_name): + samples = regex_cases[regex_name]["samples"] + self.do_setup(model, samples) - regex_string, regex_example = regex["regex"], regex["example"] - regex_example_tokens = self.tokenizer.encode(regex_example)[0][0] - guide = RegexGuide(regex_string, self.tokenizer) + def time_outlines_total(self, _, regex_name): + regex_string = regex_cases[regex_name]["regex"] + guide = self.guide_class(regex_string, self.tokenizer) + self._exhaust_samples(guide) - state = 0 - for token in regex_example_tokens: - _ = guide.get_next_instruction(state) - state = guide.get_next_state(state, token) + def time_outlines_first_token(self, _, regex_name): + regex_string = regex_cases[regex_name]["regex"] + guide = self.guide_class(regex_string, self.tokenizer) + self._get_first_token(guide) -class OutlinesJsonSchema: - params = [models, json_cases] - param_names = ["model", "json"] +class OutlinesRegexRunTime(OutlinesBenchmark): + """Class which warms-up Guide in setup steps""" + + params = [models, regex_cases.keys()] + param_names = ["model", "regex_name"] timeout = 1200 - def setup(self, model, _): - """Set up the benchmark. + def setup(self, model, regex_name): + samples = regex_cases[regex_name]["samples"] + self.do_setup(model, samples) - We JIT-compile Numba functions and convert the vocabulary - during set up as this only need to be ever done once. + # ensure warmed up so we're only measuring runtime + regex_string = regex_cases[regex_name]["regex"] + self.guide = self.guide_class(regex_string, self.tokenizer) + self._get_first_token(self.guide) - """ - self.tokenizer = AutoTokenizer.from_pretrained( - model, clean_up_tokenization_spaces=True - ) - self.tokenizer = TransformerTokenizer(self.tokenizer) - RegexGuide("a", self.tokenizer) # JIT-compile and convert the vocabulary + def time_outlines_runtime(self, *args): + self._exhaust_samples(self.guide) - def time_outlines(self, _, json_case): - """Measure generation time with Outlines. - Outlines' generation time is split between compiling an index for each - regular expression, and walking this index while generating tokens. +class OutlinesJsonSchema(OutlinesBenchmark): + json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema) - """ - json_string, json_example = json_case["schema"], json_case["example"] - json_example_tokens = self.tokenizer.encode(json_example)[0][0] + params = [models, json_cases.keys()] + param_names = ["model", "json_schema_name"] + timeout = 1200 + + def setup(self, model, json_schema_name): + samples = json_cases[json_schema_name]["samples"] + self.do_setup(model, samples) + + def time_outlines_total(self, _, json_schema_name): + json_string = json_cases[json_schema_name]["schema"] + regex_string = self.json_from_regex_fn(json.dumps(json_string)) + guide = self.guide_class(regex_string, self.tokenizer) + self._exhaust_samples(guide) + + def time_outlines_first_token(self, _, json_schema_name): + json_string = json_cases[json_schema_name]["schema"] + regex_string = self.json_from_regex_fn(json.dumps(json_string)) + guide = self.guide_class(regex_string, self.tokenizer) + self._get_first_token(guide) + + +class OutlinesJsonSchemaRunTime(OutlinesBenchmark): + """Class which warms-up Guide in setup steps""" + + json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema) + + params = [models, json_cases.keys()] + param_names = ["model", "json_schema_name"] + timeout = 1200 + + def setup(self, model, json_schema_name): + samples = json_cases[json_schema_name]["samples"] + self.do_setup(model, samples) - regex_string = build_regex_from_schema(json.dumps(json_string)) - guide = RegexGuide(regex_string, self.tokenizer) + # ensure warmed up so we're only measuring runtime + json_string = json_cases[json_schema_name]["schema"] + regex_string = self.json_from_regex_fn(json.dumps(json_string)) + self.guide = self.guide_class(regex_string, self.tokenizer) + self._get_first_token(self.guide) - state = 0 - for token in json_example_tokens: - _ = guide.get_next_instruction(state) - state = guide.get_next_state(state, token) + def time_outlines_runtime(self, *args): + self._exhaust_samples(self.guide) diff --git a/src/benchmark_outlines_core.py b/src/benchmark_outlines_core.py index 02db4d0..3ed280e 100644 --- a/src/benchmark_outlines_core.py +++ b/src/benchmark_outlines_core.py @@ -1,78 +1,54 @@ -import json - -from outlines_core.fsm.guide import RegexGuide +from outlines.caching import cache +from outlines_core.fsm.guide import RegexGuide, create_states_mapping from outlines_core.fsm.json_schema import build_regex_from_schema -from outlines_core.models.transformers import TransformerTokenizer -from transformers import AutoTokenizer - -from .data import json_cases, models, regex_cases - - -class OutlinesCoreRegex: - params = [models, regex_cases] - param_names = ["model", "regex"] - timeout = 600 - def setup(self, model, _): - """Set up the benchmark. - - We JIT-compile Numba functions and convert the vocabulary - during set up as this only need to be ever done once. - - """ - self.tokenizer = AutoTokenizer.from_pretrained( - model, clean_up_tokenization_spaces=True +from .benchmark_outlines import ( + OutlinesJsonSchema, + OutlinesJsonSchemaRunTime, + OutlinesRegex, + OutlinesRegexRunTime, +) + + +@cache() +def cached_create_states_mapping(regex_string, tokenizer, *args, **kwargs): + return create_states_mapping(regex_string, tokenizer, *args, **kwargs) + + +class CachedOutlinesCoreRegexGuide(RegexGuide): + """ + Guide to generate text in the language of a regular expression. + CoreRegexGuide with outlines cache + """ + + @classmethod + def from_regex( + cls, + regex_string: str, + tokenizer, + **kwargs, + ): + return RegexGuide.from_regex( + regex_string, + tokenizer, + _create_states_mapping=cached_create_states_mapping, + **kwargs, ) - self.tokenizer = TransformerTokenizer(self.tokenizer) - - def time_outlines_core(self, _, regex): - """Measure generation time with Outlines. - Outlines' generation time is split between compiling an index for each - regular expression, and walking this index while generating tokens. - """ - regex_string, regex_example = regex["regex"], regex["example"] - regex_example_tokens = self.tokenizer.encode(regex_example)[0][0] - guide = RegexGuide(regex_string, self.tokenizer) +class OutlinesCoreRegex(OutlinesRegex): + guide_class = CachedOutlinesCoreRegexGuide.from_regex - state = 0 - for token in regex_example_tokens: - _ = guide.get_next_instruction(state) - state = guide.get_next_state(state, token) - - -class OutlinesCoreJsonSchema: - params = [models, json_cases] - param_names = ["model", "json"] - timeout = 600 - - def setup(self, model, _): - """Set up the benchmark. - - We JIT-compile Numba functions and convert the vocabulary - during set up as this only need to be ever done once. - - """ - self.tokenizer = AutoTokenizer.from_pretrained( - model, clean_up_tokenization_spaces=True - ) - self.tokenizer = TransformerTokenizer(self.tokenizer) - def time_outlines_core(self, _, json_case): - """Measure generation time with Outlines. +class OutlinesCoreRegexRunTime(OutlinesRegexRunTime): + guide_class = CachedOutlinesCoreRegexGuide.from_regex - Outlines' generation time is split between compiling an index for each - regular expression, and walking this index while generating tokens. - """ - json_string, json_example = json_case["schema"], json_case["example"] - json_example_tokens = self.tokenizer.encode(json_example)[0][0] +class OutlinesCoreJsonSchema(OutlinesJsonSchema): + guide_class = CachedOutlinesCoreRegexGuide.from_regex + json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema) - regex_string = build_regex_from_schema(json.dumps(json_string)) - guide = RegexGuide(regex_string, self.tokenizer) - state = 0 - for token in json_example_tokens: - _ = guide.get_next_instruction(state) - state = guide.get_next_state(state, token) +class OutlinesCoreJsonSchemaRunTime(OutlinesJsonSchemaRunTime): + guide_class = CachedOutlinesCoreRegexGuide.from_regex + json_from_regex_fn = lambda self, schema: build_regex_from_schema(schema) diff --git a/src/data.py b/src/data.py index e4a76be..2159be1 100644 --- a/src/data.py +++ b/src/data.py @@ -1,3 +1,9 @@ +import json +from pathlib import Path + +SAMPLES_PATH = Path(__file__).parent / "samples" + + models = [ "NousResearch/Nous-Hermes-llama-2-7b", # 32,000 tokens vocabulary "gpt2", # 50,257 tokens vocabulary @@ -5,39 +11,35 @@ "unsloth/gemma-2-2b-it-bnb-4bit", # 256,128 tokens vocabulary ] -regex_cases = [ - { - "name": "Phone Number", - "regex": r'\d{3}-\d{2}-\d{4}', - "example": '203-22-1234' + +regex_cases = { + "Phone Number": { + "regex": r"\d{3}-\d{3}-\d{4}", + "samples": json.load(open(SAMPLES_PATH / "phone_number.json")), }, - { - "name": "URL", - "regex": r'(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?', - "example": 'https://github.com/outlines-dev/outlines' + "URL": { + "regex": r"(https?:\/\/)?([\da-z\.-]+)\.([a-z\.]{2,6})([\/\w \.-]*)*\/?", + "samples": json.load(open(SAMPLES_PATH / "url.json")), }, - { - "name": "GSM8K", - "regex": r'A: [\w \.\*\-=\+,\?/]{10,50}\. The answer is [1-9][0-9]{0,9}\.', - "example": 'A: Some thoughts before answering. The answer is 42.' + "GSM8K": { + "regex": r"A: [\w \.\*\-=\+,\?/]{10,50}\. The answer is [1-9][0-9]{0,9}\.", + "samples": json.load(open(SAMPLES_PATH / "gsm8k.json")), + # gsm8k.json attribution: https://huggingface.co/datasets/thesven/gsm8k-reasoning }, - { - "name": "Complex string", - "regex": r'(0|[1-9][0-9]*)|true|false|([a-zA-Z_][a-zA-Z_0-9]*)', - "example": 'AVeryLongStringtoTest1234' + "Complex string": { + "regex": r"(0|[1-9][0-9]*)|true|false|([a-zA-Z_][a-zA-Z_0-9]*)", + "samples": json.load(open(SAMPLES_PATH / "complex_str.json")), }, - { - "name": "Long integer", - "regex": r'\+[1-9]\d{1,14}', - "example": '1234567891234' - } -] + "Long integer": { + "regex": r"\+[1-9]\d{1,14}", + "samples": json.load(open(SAMPLES_PATH / "long_integer.json")), + }, +} + -json_cases = [ - { - "name": "RPG character", - "schema": - { +json_cases = { + "RPG character": { + "schema": { "$defs": { "Armor": { "enum": ["leather", "chainmail", "plate"], @@ -55,10 +57,11 @@ "title": "Character", "type": "object", }, - "example": """{'name': 'Super Warrior', 'age': 26, 'armor': 'leather', 'armor': 10}""", + "samples": list( + map(json.dumps, json.load(open(SAMPLES_PATH / "rpg_characters.json"))) + ), }, - { - "name": "Simple nested schema", + "Simple nested schema": { "schema": { "$schema": "http://json-schema.org/draft-04/schema#", "title": "Schema for a recording", @@ -91,6 +94,8 @@ }, "required": ["id", "work", "recording_artists"], }, - "example": """{'id': 999, 'work': {'id': 1, 'name': 'Strasbourg Saint-Denis', 'composer': 'Roy Hargrove'}, 'recording_artists': [{'id': 2, 'name': 'Roy Hargrove', 'functions': ['Trumpet', 'Singing']}]}""", + "samples": list( + map(json.dumps, json.load(open(SAMPLES_PATH / "recording_schema.json"))) + ), }, -] +} diff --git a/src/samples/complex_str.json b/src/samples/complex_str.json new file mode 100644 index 0000000..8670c50 --- /dev/null +++ b/src/samples/complex_str.json @@ -0,0 +1 @@ +["falseJb", "false0IUnuntrueb_LgozC2VciR4TSU", "truefalsetruetrue", "60falsetrue0truefalsefalseucfalse", "GG6LOxmjtrue0", "ybZXifalsetrueIi3Ftrue", "falsetrueDlled4UiW0trueZJsNUjKfs", "4", "falseWkZpEfalse19falsefalsefalsetrue0", "true41falsectrue0falsetrue", "false11trues82", "true39falsetrue73true0falsetrue", "NDd9falsedjy3fGB", "true", "true58true0truefalse", "falset77_ZzU0sFE42etrue0D9", "52sYo7oF8YDPtrueu5x7eBuByBqZJNb00", "false22false8false", "falsefalsewkXfalseNl00true3", "FQJDLnTDVstku3J0X8d5RaerNJfaO96", "truefalsefalse0", "truehOfalse", "truefalseetCOz", "truefalsefalseOtruetrue0LHT0false", "0falsefalseWW3FVDzctruetruetrue", "false", "truetruefalsetruepZWNyfi12bkU", "truefalseigp1ub3", "truetrue64falsefalse", "SWAOdHtBfalse37UHNc1hlfAX_hEfalse7", "truetruefalseUMaFReibQfalse76_X8MWwTkRZfalse", "098truefalsetrue", "VNB0040rRrSOG048false", "xZRfalsetruetrue", "42", "falsetruetrueR", "Afalsetrue", "false0MUqmMTNtrueQLfwxtruetruetruefalse", "truearAcBkNR426yPWtruefalse37YgMuFwC2nfalse0", "uDvnVfalse2falsefalse24", "1iEwsRFaXzPj", "falsetruefalse40truefalsefalsetrueCVTyhXpeufalse", "iVdgt2_24trueO57", "un7HivkLu360falseXI9dfg0BEU53izLz11falsefalse", "Wfalse050false0", "grRmM2N7R0iQ0falsefalse0amOajE", "falsefalsefalsed70", "Fodkhdk1rXfalseV8fBRtrue034", "truek9gRy7Ll0qPx4gXTY_W", "truedI5xBI3cTi6", "falsetrueK541tVn1kofalsetrue", "QlWNmtruefalsevHmJX8i00falseap", "QuKVqKdmCfalsemx5RWQa", "R92truefalsetrue", "pRQW6krgFtrue0kqT96b2truelUarMp30v_68w66", "Nnfalse0", "false", "kVnGMeGfalse", "false", "533true096zyxAZXkVsV", "ZHGMEshg67", "27truefalsetrue", "0truetruetrueE1xfalse67", "false67DOVb2Ohcfalse", "false", "83truefalsefalsefalsefalse1843HAUZ", "falsetrueSgcSyFrMLtrue", "57truezn23BcwaTfalse", "BQsQY2W18false14", "false148trueIBjNKK7mWY", "036", "falsecZYAdPOjGkofalse2falsemN3ktruefalsetrue", "87", "true0falsefalse", "wtruetruetruetrue0true0", "truetruefalsetruefalsefalsetUxqN3BsGJ0", "trueryEUpMh0_UQjnA5AOhP6519", "falsefalsetrue0truetruefalsehLE2itrue57", "truefalsefalsefalse350", "zOyG0truefalsefalse", "falseYlQfg", "21Og", "0false00falsexA2mX7true", "96truetrue", "SFtg_HU_5GvFAkP0Pxw8K5ftruefalse", "ZtruGMWBuItruectW510falsetrue", "trueBvPmtrue00", "_fDdoTYwtrue", "295true97", "true", "false", "false0H0true000", "93ClaPkD41h7false76falsemtruePrRNiFsAOcY1YxC", "falsefalse170truerfmQImvL0", "false1", "dpvYu", "false0", "0true", "trueDpQs13MtrueEl9619true", "true46"] diff --git a/src/samples/gsm8k.json b/src/samples/gsm8k.json new file mode 100644 index 0000000..be9d3d7 --- /dev/null +++ b/src/samples/gsm8k.json @@ -0,0 +1 @@ +["Thus, there were 36 - 12 - 9. The answer is 42.", "So she must be applying makeup to 12 tubes of lip gloss * 3 people per tube of lip gloss. The answer is 42.", "To fix the rest of the bike as well, she needs 20 + 5. The answer is 42.", "Thus, she had X. The answer is 42.", "During two days the boat can transport a total of 48 * 2. The answer is 42.", "In total, they have 140+180. The answer is 42.", "So he got 10*5. The answer is 42.", "So from Monday through Sunday, there are 20 + 25 + 22 + 20 + 10. The answer is 42.", "Henry spent a total of 35+6. The answer is 42.", "Then we will divide each side by 5 to figure out how many nuggets Alyssa ate, 100/5. The answer is 42.", "If Tina sold $210 worth of candy and Marvin sold $70 in candy then 210 - 70. The answer is 42.", "That means he had 15 - 9. The answer is 42.", "After adding the photos they took on the second day to the galley, the number of photos will be 600+320. The answer is 42.", "Adding those marbles to the 20 she had before, she ends up with 40 marbles + 20 marbles. The answer is 42.", "Therefore, in total Ed has 5+10. The answer is 42.", "There are 3000/3*4. The answer is 42.", "The electric bill needs an extra stamp, so she needs 20 + 1. The answer is 42.", "This leaves 24 clownfish \u2013 8 clownfish. The answer is 42.", "Then add the initial number of objects she could juggle to find the total: 10 objects + 3 objects. The answer is 42.", "Adding the cupcakes for the P.E. class 50, she needs to make a total of 90 cupcakes + 50 cupcakes. The answer is 42.", "The bike\u2019s cost must be subtracted from her earnings made in a month to get how much money is left over after purchasing the bike: $1120 - $400. The answer is 42.", "The total time it took to fill up the tank is 4+2. The answer is 42.", "The number of board members who attended the meeting was 80-32. The answer is 42.", "The grocery store offer is $0.50 a can and the warehouse is $0.25 a can so the grocery store is .50-.25. The answer is 42.", "The second ball is twice the size of the first ball, so she used 9 * 2. The answer is 42.", "If she is currently 20 inches tall, her height after 10 years will be 20+30. The answer is 42.", "John has 5 more than 20 roommates which is 20+5. The answer is 42.", "In five minutes, the three cats meow 5*11. The answer is 42.", "Thus, the principal needs to book 30/5. The answer is 42.", "He gave away 10 cones because 50 / 5. The answer is 42.", "He was left with 110-55. The answer is 42.", "In total, Mitchell read 420+80. The answer is 42.", "So he paid 20*5.5. The answer is 42.", "This means, 36 - 27. The answer is 42.", "Therefore, their change is 500 - 426. The answer is 42.", "Dividing both sides by 4 we get x. The answer is 42.", "Therefore, Loraine used 12 + 8. The answer is 42.", "The washing machine can only hold 14 towels and they have 84 towels to wash which means there are 84/14. The answer is 42.", "So he worked for 5*8. The answer is 42.", "The library needs 20/4. The answer is 42.", "Since there is a total of 100 chocolate bars and four types of chocolate, we divide 100/4. The answer is 42.", "60 minutes are in 1 hour and it takes her 120 minutes so that\u2019s 120/60. The answer is 42.", "The total liters of fuel are 150+100. The answer is 42.", "If the size of the drift on the second day, after half of the snow had melted, was 10 inches, then the depth of the original drift created on day 1 was 2*10. The answer is 42.", "In total Ed and Jacob have 2 shells + 13 shells + 15 shells. The answer is 42.", "Donna has 200-160. The answer is 42.", "Bianca, Celeste and McClain worked a total of 3240 minutes last weekend.. The answer is 42.", "After throwing some in the trash, he now holds a total of 234 cards - 34 trashed cards. The answer is 42.", "If she needs to eat 6 more cups in the next 2 days then she will have to eat 6 cups / 2 days. The answer is 42.", "Since he got 4 cents as change it cost 135-4. The answer is 42.", "So, he will be able to finish typing the document in 810 words / 45 words/minute. The answer is 42.", "To close on a distance of 200 feet, it would take the cheetah 210/30. The answer is 42.", "So each writer from 4th to 18th will earn $330 / 15. The answer is 42.", "So, they still need to collect 100 - 73. The answer is 42.", "Therefore, she has to buy 11 - 3. The answer is 42.", "Therefore, they have 20 + 40. The answer is 42.", "That leaves 180 - 20 - 60 - 12. The answer is 42.", "This means that Sandy will have 500000 - 125000. The answer is 42.", "Jung is 2 years + 24 years. The answer is 42.", "Angie had 18 left after paying taxes, so her share of the taxes was 38- 18. The answer is 42.", "The number of pies that are not eaten with pie is 2000-1360. The answer is 42.", "Since 1 slice of pepperoni fell off Jelly's slice, this means there are 10-1. The answer is 42.", "The percentage that are green is 60 because .6 x 100%. The answer is 42.", "That means it cost him $5000 - $4000. The answer is 42.", "Donna dropped off five more than that, so she dropped off 66 + 5. The answer is 42.", "Then add 8 years to the older sister's birth year to find the grandmother's birth year: 1936 + 8. The answer is 42.", "Kris and her brother therefore blew up a total of 60 balloons from Kris + 180 balloons from Kris\u2019 brother. The answer is 42.", "Now add the court costs, lawyer fees and fine: $240 + $300 + $280. The answer is 42.", "Then find the chances of getting ill after receiving the vaccine: 30% * 50%. The answer is 42.", "At a rate of $5 per dog, Harry ears $5 * 42. The answer is 42.", "Triple the number of rabbits in the cage is 3*20. The answer is 42.", "Then subtract that number from the 27 seagulls to find the final number of seagulls: 27 seagulls - 9 seagulls. The answer is 42.", "Thus, Anne is carrying a total of 2 + 4. The answer is 42.", "There are 15 cups of lemonade in the pitcher because 12 x 1 and 1/4. The answer is 42.", "Thus, Georgia has 75 - 25. The answer is 42.", "The town has therefore added 14 trash cans on the streets + 28 trash cans at the back of stores. The answer is 42.", "She has 0 minutes left when she is done because 60 - 60. The answer is 42.", "So a table and 4 chairs cost $140 + $80. The answer is 42.", "She would need to baby-sit for $75/$15. The answer is 42.", "The total amount for 6 months is $600 * 6. The answer is 42.", "If the total number of candles was 72, then the number of blue candles is 72 candles - 45 candles. The answer is 42.", "Therefore, one deck of basketball cards costs $50/2. The answer is 42.", "Together, the three friends have 1020+640. The answer is 42.", "The total shoe size is 14 + 7. The answer is 42.", "The combined total number of stripes Vaishali has on all of her hats is 12+12+0+10. The answer is 42.", "Therefore, there are 25 - 7. The answer is 42.", "Her assignment will take 10 minutes to complete, so she needs 42 minutes + 10 minutes. The answer is 42.", "In 6 years he will be 12 + 6. The answer is 42.", "So he won 5000+70+160-400. The answer is 42.", "There are 30+20+30. The answer is 42.", "Michelangelo will have 16-3. The answer is 42.", "He can therefore make 500 meters of silk / 5 meters of silk per dress. The answer is 42.", "To determine the halfway point, we simply divide the total number of miles by 2, performing 30/2. The answer is 42.", "Compared to Veronica's flashlight, Velma's flashlight could be seen for a distance of 13,000-1,000. The answer is 42.", "For 91 days the bus system carries 1,200,000 * 91. The answer is 42.", "All the apartments can therefore accommodate 66 apartments * 4 residents/apartment. The answer is 42.", "In total, the bus carried 160+150. The answer is 42.", "Quinton then gave away 6 - 1 -1 -1 -1. The answer is 42.", "They will play 16 / 2. The answer is 42.", "100 cents make a dollar so 200 cents make 200/100. The answer is 42."] diff --git a/src/samples/long_integer.json b/src/samples/long_integer.json new file mode 100644 index 0000000..368f16d --- /dev/null +++ b/src/samples/long_integer.json @@ -0,0 +1 @@ +["+83094", "+1795091367092", "+42", "+929089011211089", "+26130955215791", "+7767", "+6759912978443", "+583634786789972", "+579578372355278", "+44823776970", "+6611156", "+279649300470", "+612623", "+94937014091346", "+33734690589606", "+5395287444852", "+157076793951", "+9563230", "+58630060740", "+4656577", "+134", "+37020491453584", "+170341651", "+67974", "+96524", "+4286169647603", "+391294983", "+856606", "+470498555", "+96362157", "+7250135946", "+117928236897", "+7959850718640", "+34227", "+619177965217", "+871596132581349", "+4636718456036", "+98798", "+6064130279", "+32438", "+53", "+5298655", "+213581994", "+71497", "+403383173625", "+87948252", "+82513793282189", "+6020207", "+347070153", "+56010933555", "+39", "+2740435060956", "+8283", "+41", "+8117493", "+565", "+17984995066427", "+538718279249", "+3668696927193", "+659772042831", "+6260995736", "+56963135", "+58801238412", "+26", "+93904", "+753963", "+807627773250", "+9803802", "+608439537", "+55850", "+539", "+12999556691", "+250951919292", "+6821851297836", "+3998189", "+8235", "+65369436", "+821565324", "+2273941809014", "+75", "+572269", "+2109495641802", "+78", "+99", "+894", "+255608621326220", "+653254404008", "+63292", "+9465729862115", "+85919674", "+745597", "+687839", "+77267545969564", "+6063933181660", "+99", "+41489130", "+850203071070", "+384911418317915", "+6591", "+78208"] diff --git a/src/samples/phone_number.json b/src/samples/phone_number.json new file mode 100644 index 0000000..5b6e7e4 --- /dev/null +++ b/src/samples/phone_number.json @@ -0,0 +1 @@ +["434-343-9148", "128-494-9907", "657-365-8439", "432-793-4855", "111-565-4271", "244-348-7992", "303-544-9416", "912-199-7614", "954-129-8168", "776-948-3054", "276-462-8686", "937-966-7114", "643-783-4739", "660-546-7625", "384-236-4915", "126-829-8164", "435-751-9927", "470-828-6067", "274-345-5918", "592-702-1968", "292-384-2045", "330-340-5998", "298-790-8428", "917-477-7580", "744-907-2393", "180-894-6340", "275-841-1372", "119-619-9023", "419-509-5784", "392-112-5642", "752-724-2694", "629-103-8243", "293-789-6042", "308-962-8007", "873-547-2093", "870-881-3681", "979-199-7098", "934-551-1425", "896-933-8900", "195-336-6465", "103-227-2058", "529-479-3535", "519-503-5547", "559-661-1628", "666-154-1681", "813-994-9711", "901-118-8425", "223-206-9602", "790-843-5305", "254-418-5789", "987-370-1324", "899-967-8604", "584-300-8426", "872-701-6873", "112-362-6758", "588-317-6624", "236-814-6863", "398-470-5695", "365-421-1324", "855-482-3102", "468-454-3626", "167-776-7704", "311-184-7212", "940-336-4415", "291-477-1089", "710-115-4845", "730-444-8704", "897-260-9342", "173-571-8984", "355-458-5562", "585-651-5358", "382-666-9903", "234-536-4577", "223-909-4090", "172-828-9865", "838-114-6644", "184-944-1873", "592-100-8935", "155-575-5627", "994-286-4773", "932-774-8834", "273-616-8842", "228-988-8592", "103-668-2053", "346-844-7934", "397-890-1324", "420-583-2647", "827-952-2518", "852-388-9116", "471-352-4364", "487-114-7852", "839-589-3983", "675-829-6655", "411-420-8391", "455-397-7021", "687-248-6139", "395-953-9849", "503-225-8605", "897-566-5596", "157-842-1230"] diff --git a/src/samples/recording_schema.json b/src/samples/recording_schema.json new file mode 100644 index 0000000..a37c812 --- /dev/null +++ b/src/samples/recording_schema.json @@ -0,0 +1 @@ +[{"id": 1, "work": {"id": 2, "name": "Piano Sonata No. 14", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Alice Johnson", "functions": ["perform", "record"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 11, "name": "Alice Johnson", "functions": ["piano", "compose"]}, {"id": 12, "name": "Bob Lee", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["record"]}, {"id": 5, "name": "Jane Smith", "functions": ["sing"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 42, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 59, "name": "Alice Wong", "functions": ["arrange", "perform"]}, {"id": 76, "name": "Bob Lee", "functions": ["orchestrate", "conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["composing", "conceiving"]}}, "recording_artists": [{"id": 4, "name": "Conductor1", "functions": ["conducting"]}, {"id": 5, "name": "Violinist1", "functions": ["playing violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "play"]}}, "recording_artists": [{"id": 4, "name": "Peter Schumann", "functions": ["perform"]}, {"id": 5, "name": "Vivian Schmitt", "functions": ["record"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform"]}, {"id": 5, "name": "Jane Smith", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Opus 1", "composer": {"id": 3, "name": "John Doe", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Jane Smith", "functions": ["sing"]}, {"id": 5, "name": "Sam Johnson", "functions": ["play piano"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "John Smith", "functions": ["perform"]}, {"id": 5, "name": "Jane Doe", "functions": ["produce"]}]}, {"id": 1, "work": {"id": 2, "name": "Etude Opus 10 No. 3", "composer": {"id": 3, "name": "Fr\u00e9d\u00e9ric Chopin", "functions": ["composer", "performer"]}}, "recording_artists": [{"id": 4, "name": "Joannina Bartosz", "functions": ["soloist", "recording artist"]}]}, {"id": 123, "work": {"id": 456, "name": "Example Work", "composer": {"id": 789, "name": "Example Composer", "functions": ["composing"]}}, "recording_artists": [{"id": 101, "name": "Example Artist 1", "functions": ["vocal"]}, {"id": 102, "name": "Example Artist 2", "functions": ["piano"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 5", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["composing", "conducting"]}}, "recording_artists": [{"id": 20, "name": "Conducted by Herbert von Karajan", "functions": ["conducting", "conceiving"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["sing", "compose"]}, {"id": 5, "name": "Jane Smith", "functions": ["play", "conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["sing", "conduct"]}, {"id": 5, "name": "Jane Smith", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["vocalist"]}, {"id": 5, "name": "Jane Smith", "functions": ["violinist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "John Doe Conductor", "functions": ["conduct"]}, {"id": 5, "name": "Jane Smith Violinist", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 4, "name": "Carlos Kleiber", "functions": ["conduct"]}, {"id": 5, "name": "Vienna Philharmonic Orchestra", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 42, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 1, "name": "John Doe", "functions": ["solo", "ensemble"]}, {"id": 2, "name": "Jane Smith", "functions": ["background", "lead"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["piano"]}, {"id": 5, "name": "Jane Doe", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform", "record"]}, {"id": 5, "name": "Jane Smith", "functions": ["produce", "mix"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["composer", "pianist"]}}, "recording_artists": [{"id": 4, "name": "Marin Alsop", "functions": ["conductor"]}, {"id": 5, "name": "Baltimore Symphony Orchestra", "functions": ["orchestra"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Daniel Barenboim", "functions": ["conduct", "piano"]}, {"id": 5, "name": "Chicago Symphony Orchestra", "functions": ["ensemble"]}]}, {"id": 1, "work": {"id": 2, "name": "Work A", "composer": {"id": 3, "name": "Composer B", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Artist C", "functions": ["solo", "ensemble"]}, {"id": 5, "name": "Artist D", "functions": ["director", "producer"]}]}, {"id": 12345, "work": {"id": 67890, "name": "Symphony No. 9", "composer": {"id": 1, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 2, "name": "John Smith", "functions": ["violin"]}, {"id": 3, "name": "Jane Doe", "functions": ["cello"]}]}, {"id": 1, "work": {"id": 2, "name": "Nocturne in E-flat major, Op. 9, No. 2", "composer": {"id": 3, "name": "Fr\u00e9d\u00e9ric Chopin", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "Joanna Korzuch", "functions": ["sing", "compose"]}, {"id": 5, "name": "Bogdan Kos", "functions": ["play"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 5, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 6, "name": "Alice", "functions": ["play", "compose"]}, {"id": 7, "name": "Bob", "functions": ["conduct"]}]}, {"id": 12345, "work": {"id": 67890, "name": "Symphony No. 5", "composer": {"id": 1, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 2, "name": "Simon Rattle", "functions": ["conduct"]}, {"id": 3, "name": "London Symphony Orchestra", "functions": ["play"]}]}, {"id": 1, "work": {"id": 2, "name": "Sonata No. 1", "composer": {"id": 3, "name": "John Doe", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 4, "name": "Jane Smith", "functions": ["soloist", "violin"]}, {"id": 5, "name": "Tom Johnson", "functions": ["cello"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Conductor A", "functions": ["conduct"]}, {"id": 5, "name": "Instrumentalist B", "functions": ["play"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["Compose", "Conduct"]}}, "recording_artists": [{"id": 11, "name": "John Doe", "functions": ["Conduct"]}, {"id": 12, "name": "Jane Smith", "functions": ["Play Violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "arrange", "conductor"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["soloist", "arrange"]}, {"id": 5, "name": "Jane Smith", "functions": ["soloist", "conductor"]}]}, {"id": 456, "work": {"id": 345, "name": "Moonlight Sonata", "composer": {"id": 22, "name": "Ludwig van Beethoven", "functions": ["composing", "playing"]}}, "recording_artists": [{"id": 12, "name": "John Doe", "functions": ["playing", "conducting"]}, {"id": 15, "name": "Jane Smith", "functions": ["playing", "composing"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "pianist"]}}, "recording_artists": [{"id": 4, "name": "Maria Joao Pires", "functions": ["pianist"]}, {"id": 5, "name": "Yo-Yo Ma", "functions": ["cello"]}]}, {"id": 1, "work": {"id": 2, "name": "Example Work", "composer": {"id": 3, "name": "Example Composer", "functions": ["composing"]}}, "recording_artists": [{"id": 4, "name": "Example Artist 1", "functions": ["singing"]}, {"id": 5, "name": "Example Artist 2", "functions": ["playing guitar"]}]}, {"id": 1, "work": {"id": 2, "name": "Nocturne in C# minor", "composer": {"id": 3, "name": "Fr\u00e9d\u00e9ric Chopin", "functions": ["compose", "perform", "teach"]}}, "recording_artists": [{"id": 3, "name": "Fr\u00e9d\u00e9ric Chopin", "functions": ["perform"]}, {"id": 4, "name": "Arthur Rubinstein", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Conductor A", "functions": ["conduct"]}, {"id": 5, "name": "Violinist B", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Orchestra ABC", "functions": ["perform"]}, {"id": 5, "name": "Conductor XYZ", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 5, "name": "Ludwig van Beethoven", "functions": ["composer", "pianist"]}}, "recording_artists": [{"id": 3, "name": "Daniel Barenboim", "functions": ["pianist"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 1, "name": "Alice", "functions": ["sing", "compose"]}, {"id": 2, "name": "Bob", "functions": ["play", "compose"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["composer", "conductor"]}}, "recording_artists": [{"id": 4, "name": "Maria Callas", "functions": ["soloist"]}, {"id": 5, "name": "Chicago Symphony Orchestra", "functions": ["ensemble"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Pavel Gililov", "functions": ["piano", "teacher"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform"]}, {"id": 5, "name": "Jane Smith", "functions": ["conductor"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Orpheus Orchestra", "functions": ["perform"]}, {"id": 5, "name": "James Gaffigan", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 201, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 202, "name": "Eugene Istomin", "functions": ["piano"]}, {"id": 203, "name": "Ronald Leonard", "functions": ["cello"]}, {"id": 204, "name": "Isaac Stern", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["soloist", "conductor"]}, {"id": 5, "name": "Jane Smith", "functions": ["violinist"]}]}, {"id": 1, "work": {"id": 2, "name": "Nocturne", "composer": {"id": 3, "name": "Chopin", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "Alice", "functions": ["record", "mix"]}, {"id": 5, "name": "Bob", "functions": ["direct"]}]}, {"id": 1, "work": {"id": 2, "name": "Nocturne in C#", "composer": {"id": 3, "name": "Chopin", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "Lang Lang", "functions": ["perform"]}, {"id": 5, "name": "Evgeny Kissin", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "Peter Schafrik", "functions": ["record", "mix"]}, {"id": 5, "name": "Nicole Paul", "functions": ["engineer"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Maestro Jones", "functions": ["conduct"]}, {"id": 5, "name": "Paula Smith", "functions": ["soloist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "George Solti", "functions": ["conduct"]}, {"id": 5, "name": "London Symphony Orchestra", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["play violin"]}, {"id": 5, "name": "Jane Smith", "functions": ["play piano"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Herbert von Karajan", "functions": ["conduct"]}, {"id": 5, "name": "Berlin Philharmonic Orchestra", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform", "conduct"]}, {"id": 5, "name": "Jane Smith", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform"]}, {"id": 5, "name": "Jane Smith", "functions": ["conductor"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "Alice", "functions": ["piano"]}, {"id": 5, "name": "Bob", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["sing", "play violin"]}, {"id": 5, "name": "Jane Smith", "functions": ["conduct", "compose"]}]}, {"id": 1, "work": {"id": 2, "name": "Opus 1", "composer": {"id": 3, "name": "John Doe", "functions": ["composition", "conducting"]}}, "recording_artists": [{"id": 3, "name": "John Doe", "functions": ["composition", "conducting"]}, {"id": 4, "name": "Jane Smith", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 1", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Conductor A", "functions": ["conduct"]}, {"id": 5, "name": "Conductor B", "functions": ["conduct", "compose"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "inspire"]}}, "recording_artists": [{"id": 4, "name": "Pianist John", "functions": ["perform", "teach"]}]}, {"id": 1, "work": {"id": 101, "name": "Moonlight Sonata", "composer": {"id": 11, "name": "Ludwig van Beethoven", "functions": ["compose", "arrange"]}}, "recording_artists": [{"id": 12, "name": "Peter Schrafft", "functions": ["play", "conduct"]}, {"id": 13, "name": "John Smith", "functions": ["record", "edit"]}]}, {"id": 1, "work": {"id": 2, "name": "Spring", "composer": {"id": 3, "name": "Vivaldi", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["sing", "arrange"]}, {"id": 5, "name": "Jane Smith", "functions": ["play violin", "compose"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "play"]}}, "recording_artists": [{"id": 4, "name": "Pianist 1", "functions": ["play"]}, {"id": 5, "name": "Pianist 2", "functions": ["play"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 1", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["composer", "pianist"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["violinist"]}, {"id": 5, "name": "Jane Smith", "functions": ["cellist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Smith", "functions": ["play violin"]}, {"id": 5, "name": "Jane Doe", "functions": ["play piano"]}]}, {"id": 123, "work": {"id": 456, "name": "Symphony No. 5", "composer": {"id": 789, "name": "Ludwig van Beethoven", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 101, "name": "Sarah Johnson", "functions": ["sing", "compose"]}, {"id": 102, "name": "John Smith", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 202, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 303, "name": "John Doe", "functions": ["sing", "play guitar"]}, {"id": 404, "name": "Jane Smith", "functions": ["play piano"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 3, "name": "Ludwig van Beethoven", "functions": ["conduct"]}, {"id": 4, "name": "Berlin Philharmonic Orchestra", "functions": ["play"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Conductor X", "functions": ["conduct"]}, {"id": 5, "name": "Violinist Y", "functions": ["play"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["perform"]}, {"id": 5, "name": "Jane Smith", "functions": ["record"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Beethoven", "functions": ["compose", "orchestrate"]}}, "recording_artists": [{"id": 4, "name": "Daniel Barenboim", "functions": ["conduct"]}, {"id": 5, "name": "Berlin Philharmonic", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 201, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 301, "name": "Conductor A", "functions": ["conduct"]}, {"id": 302, "name": "Violinist B", "functions": ["play"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "John Smith", "functions": ["play violin", "conduct"]}, {"id": 5, "name": "Jane Doe", "functions": ["sing", "compose"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 1, "name": "John Doe", "functions": ["plays", "sings"]}, {"id": 2, "name": "Jane Smith", "functions": ["conducts", "arranges"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Michael Gielen", "functions": ["conduct"]}, {"id": 5, "name": "Berlin Philharmonic", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "Peter Schaber", "functions": ["piano"]}, {"id": 5, "name": "John Smith", "functions": ["producer"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 201, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 301, "name": "John Doe", "functions": ["soloist"]}, {"id": 302, "name": "Jane Smith", "functions": ["violinist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Conductor A", "functions": ["conduct"]}, {"id": 5, "name": "Violinist B", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Romeo and Juliet", "composer": {"id": 3, "name": "Various arrangements", "functions": ["arranger", "orchestrator"]}}, "recording_artists": [{"id": 4, "name": "Sarah", "functions": ["singer", "whistler"]}, {"id": 5, "name": "John", "functions": ["guitarist", "composer"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Heifetz Orchestra", "functions": ["perform"]}]}, {"id": 12345, "work": {"id": 67890, "name": "Symphony No. 5", "composer": {"id": 1, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 2, "name": "John Doe", "functions": ["piano", "compose"]}, {"id": 3, "name": "Jane Smith", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Concert Orchestra", "functions": ["perform"]}, {"id": 5, "name": "Maestro Ivanov", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Example Work", "composer": {"id": 3, "name": "Example Composer", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Example Artist 1", "functions": ["solo", "ensemble"]}, {"id": 5, "name": "Example Artist 2", "functions": ["orchestra", "chorus"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "Mitsuko Uchida", "functions": ["piano", "teaching"]}, {"id": 5, "name": "Yo-Yo Ma", "functions": ["cello", "conduction"]}]}, {"id": 123, "work": {"id": 456, "name": "Nocturne in E-flat major", "composer": {"id": 789, "name": "Fr\u00e9d\u00e9ric Chopin", "functions": ["composing"]}}, "recording_artists": [{"id": 1234, "name": "John Smith", "functions": ["singing", "guitar"]}, {"id": 2345, "name": "Jane Doe", "functions": ["piano"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 10, "name": "Ludwig van Beethoven", "functions": ["compose", "arrange"]}}, "recording_artists": [{"id": 11, "name": "Concerto Philharmonic", "functions": ["perform"]}, {"id": 12, "name": "Aleksandra", "functions": ["soloist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Conductor A", "functions": ["conduct"]}, {"id": 5, "name": "Violinist B", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conductor"]}}, "recording_artists": [{"id": 4, "name": "Daniel Barenboim", "functions": ["conductor"]}, {"id": 5, "name": "Chicago Symphony Orchestra", "functions": ["perform"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "Orchestra London", "functions": ["play"]}, {"id": 5, "name": "Conductor Smith", "functions": ["conduct"]}]}, {"id": 123, "work": {"id": 456, "name": "Symphony No. 9", "composer": {"id": 789, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 101, "name": "John Doe", "functions": ["sing"]}, {"id": 102, "name": "Jane Smith", "functions": ["play violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "Ivan Ourinsky", "functions": ["piano"]}, {"id": 5, "name": "Victoria Bugden", "functions": ["violin"]}]}, {"id": 1, "work": {"id": 2, "name": "Serenade", "composer": {"id": 3, "name": "John Doe", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Jane Smith", "functions": ["play violin"]}, {"id": 5, "name": "Mark Johnson", "functions": ["play piano"]}]}, {"id": 1, "work": {"id": 2, "name": "Fourth Symphony", "composer": {"id": 3, "name": "Pyotr Ilyich Tchaikovsky", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Harry Potter", "functions": ["vocals"]}, {"id": 5, "name": "Ron Weasley", "functions": ["percussion"]}]}, {"id": 1, "work": {"id": 2, "name": "Example Work", "composer": {"id": 3, "name": "Example Composer", "functions": ["compose", "arrange"]}}, "recording_artists": [{"id": 4, "name": "Example Artist 1", "functions": ["solo", "ensemble"]}, {"id": 5, "name": "Example Artist 2", "functions": ["orchestra", "ensemble"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "conduct"]}}, "recording_artists": [{"id": 4, "name": "Jane Smith", "functions": ["sing", "compose"]}, {"id": 5, "name": "Sam Johnson", "functions": ["play", "conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 9", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["play violin"]}, {"id": 5, "name": "Jane Smith", "functions": ["conduct"]}]}, {"id": 42, "work": {"id": 101, "name": "Symphony No. 5", "composer": {"id": 7, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 25, "name": "Concerto Philharmonic", "functions": ["perform"]}, {"id": 26, "name": "Conductor Maestro", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 101, "name": "Symphony No. 9", "composer": {"id": 345, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 456, "name": "John Doe", "functions": ["soloist"]}]}, {"id": 1, "work": {"id": 2, "name": "Symphony No. 5", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose"]}}, "recording_artists": [{"id": 4, "name": "John Doe", "functions": ["play"]}, {"id": 5, "name": "Jane Smith", "functions": ["conduct"]}]}, {"id": 1, "work": {"id": 2, "name": "Moonlight Sonata", "composer": {"id": 3, "name": "Ludwig van Beethoven", "functions": ["compose", "piano"]}}, "recording_artists": [{"id": 4, "name": "Daniel Barenboim", "functions": ["conduct", "piano"]}, {"id": 5, "name": "Martha Argerich", "functions": ["piano"]}]}, {"id": 1, "work": {"id": 2, "name": "Song of the Wind", "composer": {"id": 3, "name": "Ludovico Einaudi", "functions": ["compose", "perform"]}}, "recording_artists": [{"id": 4, "name": "John Smith", "functions": ["vocal"]}, {"id": 5, "name": "Alice Johnson", "functions": ["guitar"]}]}] diff --git a/src/samples/rpg_characters.json b/src/samples/rpg_characters.json new file mode 100644 index 0000000..a0dc6af --- /dev/null +++ b/src/samples/rpg_characters.json @@ -0,0 +1 @@ +[{"name": "Gandalf", "age": 200, "armor": "chainmail", "strength": 150}, {"name": "Aldorin", "age": 26, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 300, "armor": "chainmail", "strength": 18}, {"name": "Arthur", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Aldor", "age": 27, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 300, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Arnor", "age": 27, "armor": "chainmail", "strength": 16}, {"name": "Thorn", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 200, "armor": "plate", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 15000}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 16}, {"name": "Arther", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 200, "armor": "chainmail", "strength": 1000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 15000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 200, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 14}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 14}, {"name": "Aldor", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 15000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 16}, {"name": "Aldor", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Gandalf", "age": 1000, "armor": "chainmail", "strength": 85}, {"name": "Gandalf", "age": 250, "armor": "chainmail", "strength": 50}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 15000}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Thor", "age": 25, "armor": "plate", "strength": 80}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Aldor", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldor", "age": 25, "armor": "chainmail", "strength": 18}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 16}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 28, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Gandalf", "age": 200, "armor": "chainmail", "strength": 10}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 250, "armor": "chainmail", "strength": 150}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 14}, {"name": "Aldorin", "age": 28, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Gandalf", "age": 1000, "armor": "chainmail", "strength": 75}, {"name": "Aragorn", "age": 30, "armor": "chainmail", "strength": 18}, {"name": "Alex", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 10}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 15000}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 250, "armor": "chainmail", "strength": 15000}, {"name": "Gandalf", "age": 1000, "armor": "chainmail", "strength": 80}, {"name": "Aldorin", "age": 27, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 85}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 14}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 1000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 75}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 250, "armor": "chainmail", "strength": 15}, {"name": "Gandalf", "age": 2000, "armor": "chainmail", "strength": 10000}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldor", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 12}, {"name": "Aldorin", "age": 25, "armor": "chainmail", "strength": 15}, {"name": "Aldon", "age": 25, "armor": "chainmail", "strength": 15}] diff --git a/src/samples/url.json b/src/samples/url.json new file mode 100644 index 0000000..e378faf --- /dev/null +++ b/src/samples/url.json @@ -0,0 +1 @@ +["https://www.vibilagare.se/nyheter/physical-buttons-outperform-touchscreens-new-cars-test-finds", "https://www.coindesk.com/business/2022/04/13/jack-dorseys-first-tweet-nft-went-on-sale-for-48m-it-ended-with-a-top-bid-of-just-280/", "https://gizmodo.com/tiktok-china-byte-dance-ban-viral-videos-privacy-1850034366", "https://arstechnica.com/science/2022/08/us-government-to-make-all-research-it-funds-open-access-on-publication/", "https://www.theverge.com/2021/2/1/22261178/robinhood-google-play-store-app-rating-one-star-reviews-stocks-gamestop-reddit", "https://www.theverge.com/2022/3/1/22957000/apple-russia-ukraine-invasion-halt-product-sales-app-store", "https://bc.ctvnews.ca/single-beaver-caused-mass-internet-cell-service-outages-in-northern-b-c-1.5944697", "https://beebom.com/youtube-co-founder-thinks-removing-dislike-count-wrong-decision/", "https://fortune.com/2022/03/24/anonymous-says-hacked-russia-central-bank-ukraine-cyber-war/", "https://www.bloomberg.com/news/articles/2022-09-28/nft-volumes-tumble-97-from-2022-highs-as-frenzy-fades-chart", "https://www.theverge.com/2022/3/4/22961286/bbc-news-blocked-in-russia-ukraine-invasion-shortwave-radio", "https://electrek.co/2022/07/07/wind-and-solar-produce-more-electricity-than-nuclear-for-the-first-time-in-the-us/", "https://www.zdnet.com/article/ukraine-asks-for-russia-to-be-kicked-off-the-internet/", "https://www.rte.ie/news/business/2022/0305/1284551-ukraine-reaction/", "https://electrek.co/2022/08/26/in-a-us-first-california-will-pilot-solar-panel-canopies-over-canals/", "https://www.independent.co.uk/tech/solar-panel-world-record-window-b2211057.html", "https://www.theverge.com/2021/1/10/22223956/parler-ceo-john-matze-lawyers-vendors-abandoning", "https://www.bbc.co.uk/news/technology-55569604", "https://www.theverge.com/2021/1/13/22230001/snapchat-trump-ban-permanently-account-january-20th-innaguration-snap", "https://www.reuters.com/business/verizon-raises-minimum-wage-20-an-hour-us-employees-2022-04-18/", "https://indianarrative.com/latest-news/youtube-deletes-new-trump-video-suspends-fresh-uploads-61281.html", "https://www.bbc.co.uk/news/technology-55598887", "https://screenshot-media.com/technology/social-media/facebook-banning-anti-vaxxers/", "https://www.engadget.com/facebook-bans-stop-the-steal-ahead-of-inaguration-214239970.html", "https://www.afr.com/policy/foreign-affairs/tiktok-s-alarming-excessive-data-collection-revealed-20220714-p5b1mz", "https://nltimes.nl/2022/10/09/dutch-employee-fired-us-firm-shutting-webcam-awarded-eu75000-court", "https://scitechdaily.com/mit-posts-free-plans-online-for-an-emergency-ventilator-that-can-be-built-for-100/", "https://scitechdaily.com/mit-scientists-develop-new-regenerative-drug-that-reverses-hearing-loss/", "https://www.cnbc.com/2022/03/23/why-apple-co-founder-steve-wozniak-deactivated-his-facebook-account.html", "https://gizmodo.com/full-human-genome-finally-mapped-1848732687", "https://arstechnica.com/science/2021/02/texas-power-grid-crumples-under-the-cold/", "https://www.cnbc.com/2022/02/18/fed-approves-rules-banning-its-officials-from-trading-stocks-bonds-and-also-cryptocurrencies.html?", "https://www.bbc.co.uk/news/technology-59638565", "https://interestingengineering.com/elon-musk-delivers-1000-ventilators-to-california-hospitals-to-treat-covid-19-patients?_source=newsletter&_campaign=JeMmXErgJ3gnE&_uid=ELe31LvRb6&_h=8dbd61de296e2550ced97fc2451e844130ee7e83&utm_source=newsletter&utm_medium=mailing&utm_campaign=Newsletter-24-03-2020", "https://www.engadget.com/apple-return-to-office-petition-093339546.html", "https://www.pcmag.com/news/pornhub-purges-10-million-videos-after-losing-credit-card-support", "https://www.theregister.com/2021/01/26/apple_italy_battery/", "https://www.thenationalnews.com/business/technology/2023/01/04/twitter-said-to-have-suffered-data-breach-as-hackers-expose-235-million-users-information/", "https://www.theverge.com/2022/3/21/22988901/fortnite-ukraine-relief", "https://screenshot-media.com/politics/human-rights/mackenzie-scott-donations/", "https://www.theverge.com/2022/3/5/22962822/internet-backbone-provider-cogent-shuts-off-service-russia", "https://arstechnica.com/tech-policy/2022/09/ntsb-wants-alcohol-detection-systems-installed-in-all-new-cars-in-us/", "https://www.pcmag.com/news/robinhood-now-faces-over-30-class-action-lawsuits-for-blocking-stock-buys", "https://arstechnica.com/tech-policy/2022/07/two-senators-propose-ban-on-data-caps-blasting-isps-for-predatory-limits/", "https://www.bbc.co.uk/news/technology-54838977", "https://www.reuters.com/article/us-amazon-com-parler/parler-loses-bid-to-have-service-restored-by-amazon-idUSKBN29Q2T3", "https://boingboing.net/2021/02/10/cops-playing-copyrighted-music-to-stop-video-of-them-being-posted-online.html", "https://gizmodo.com/get-ready-for-in-car-ads-1846888390", "https://metro.co.uk/2022/06/27/netflix-is-definietly-going-to-start-showing-adverts-exec-confirms-16896753/", "https://hothardware.com/news/crypto-crashed-hard-no-longer-profitable-mine-ethereum", "https://www.bbc.com/news/business-61976350", "https://fortune.com/2022/08/31/crypto-com-accidentally-transferred-10-million-to-thevamanogari-manivel/", "https://www.jpost.com/breaking-news/article-700940", "https://www.theregister.com/2022/10/18/firefox_106_arrives/?td=rt-3a", "https://www.theverge.com/2022/12/20/23518945/delta-free-in-flight-wi-fi-rewards-2023", "https://www.wittyscoop.com/facebook-lands-in-500-billion-trouble-for-collecting-user-data-through-instagram/", "https://www.zdnet.com/article/fcc-aggressively-moves-to-block-spam-calls/", "https://gizmodo.com/doctors-were-able-to-study-a-mans-brain-activity-as-he-1848590805", "https://www.usatoday.com/story/news/politics/2020/07/06/supreme-court-wont-allow-political-robocalls-cellphones/3209646001/", "https://www.reuters.com/technology/zuckerberg-loses-29-billion-day-meta-shares-crash-2022-02-03/", "https://www.pcgamer.com/its-official-nvidia-is-now-worth-more-than-meta/", "https://www.cbc.ca/news/canada/british-columbia/beaver-internet-down-tumbler-ridge-1.6001594", "https://arstechnica.com/tech-policy/2021/02/texas-couples-9546-power-bill-spurs-class-action-lawsuit-against-griddy/", "https://www.npr.org/2022/08/22/1118734792/michigan-man-isp-fiber-internet", "https://arstechnica.com/tech-policy/2021/01/airbnb-cancels-all-dc-area-reservations-ahead-of-biden-inauguration/", "https://www.wired.com/story/audit-slams-facebook-home-misinformation-hate/#intcid=recommendations_wired-homepage-right-rail_5a574487-25be-4a47-a69e-b67fa0f5429c_popular4-1", "https://wccftech.com/apple-google-microsoft-team-up-to-stop-right-to-repair-law", "https://nextshark.com/youtuber-exposes-scam-ring-by-hacking/", "https://www.cnn.com/2020/10/12/tech/microsoft-election-ransomware/index.html", "https://www.indiatoday.in/technology/news/story/facebook-bans-over-300-accounts-that-claimed-covid-19-vaccines-would-turn-humans-into-chimpanzees-1840478-2021-08-13", "https://www.independent.co.uk/life-style/gadgets-and-tech/facebook-down-instagram-whatsapp-today-b1932059.html", "https://www.indiatoday.in/technology/news/story/apple-becomes-first-tech-giant-to-explicitly-ban-caste-discrimination-trains-managers-on-indian-caste-system-1988183-2022-08-15", "https://www.techspot.com/news/97772-salesforce-which-laid-off-8000-workers-pays-creative.html", "https://mashable.com/article/amazon-jobs-marijuana-workers/?europe=true", "https://nextshark.com/scientists-develop-hepa-filters/", "https://www.datacenterdynamics.com/en/news/cisco-to-quit-russia-and-belarus-due-to-ukraine-war/", "https://www.wired.com/story/facial-recognition-identify-russian-soldiers/#intcid=_wired-verso-hp-trending_62557b0e-dc13-4e31-b93e-224971ac6f17_popular4-1", "https://www.buzzfeednews.com/article/emilybakerwhite/tiktok-tapes-us-user-data-china-bytedance-access", "https://english.kyodonews.net/news/2022/06/9a7dbced6c3a-amino-acids-found-in-asteroid-samples-collected-by-hayabusa2-probe.html", "https://www.reuters.com/technology/twitch-bans-russian-state-media-move-against-harmful-misinformation-spreaders-2022-03-03/", "https://www.newsweek.com/anonymous-leaks-personal-data-120k-russian-soldiers-fighting-ukraine-1694555", "https://www.sciencealert.com/japan-s-dropping-a-kaiju-sized-turbine-into-the-ocean-to-fish-for-limitless-energy", "https://www.tomshardware.com/news/supermassive-black-hole-consumed-100-million-cpu-hours", "https://techcrunch.com/2020/06/05/lidar-helps-uncover-an-ancient-kilometer-long-mayan-structure/", "https://www.cnbc.com/2023/02/10/us-sanctions-six-chinese-tech-companies-for-supporting-spy-balloon-programs.html", "https://www.theverge.com/2021/2/7/22271797/reddit-super-bowl-ad-five-seconds-gamestop", "https://www.theregister.com/2020/10/14/comcast_internet_interview_fail/", "https://arstechnica.com/tech-policy/2021/01/neo-nazi-robocaller-fined-10m-after-calls-targeting-black-politicians/", "https://arstechnica.com/gadgets/2022/05/remote-work-conflict-continues-at-apple-with-at-least-one-prominent-staff-departure/", "https://www.bloomberg.com/news/articles/2022-03-09/russians-are-finding-ways-around-putin-s-internet-blockade?srnd=technology-vp", "https://www.theverge.com/2022/10/15/23405389/att-illinois-23-million-investigation-bribe-corruption", "https://www.engadget.com/facebook-advertiser-boycott-honda-hershey-unilever-coca-cola-005146173.html", "https://www.reuters.com/article/us-usa-biden-cyber/after-big-hack-of-u-s-government-biden-enlists-world-class-cybersecurity-team-idUSKBN29R18I", "https://www.newsweek.com/nuclear-fusion-energy-milestone-ignition-confirmed-california-1733238", "https://arstechnica.com/cars/2022/06/ford-wants-to-sell-evs-online-only-with-no-dealer-markups-says-ceo-farley/", "https://www.independent.co.uk/life-style/gadgets-and-tech/trump-twitter-account-suspended-election-rules-b1620154.html", "https://www.insider.com/disaster-girl-made-half-million-selling-her-meme-as-nft-2021-4", "https://www.bostonglobe.com/2021/07/19/business/maine-move-make-big-companies-pay-all-their-packaging/", "https://www.newsweek.com/creators-russia-can-no-longer-cash-posting-material-youtube-1686600", "https://www.bbc.co.uk/news/technology-54509970"]