From 57786a76b21f0a603f8cacd24e5626273236593a Mon Sep 17 00:00:00 2001 From: "github-actions[bot]" <41898282+github-actions[bot]@users.noreply.github.com> Date: Wed, 27 Nov 2024 02:42:04 +0000 Subject: [PATCH] Deployed bbd54bc with MkDocs version: 1.6.1 --- .nojekyll | 0 404.html | 721 + advanced/custom-operators/index.html | 755 + advanced/extending-agents/index.html | 755 + advanced/performance-tuning/index.html | 755 + api-reference/cli/index.html | 1354 ++ api-reference/docetl/index.html | 8049 ++++++++ api-reference/operations/index.html | 16517 ++++++++++++++++ api-reference/optimizers/index.html | 11123 +++++++++++ api-reference/python/index.html | 3260 +++ assets/_mkdocstrings.css | 143 + assets/docetl-favicon-color.png | Bin 0 -> 1188 bytes assets/headerdiagram.png | Bin 0 -> 573787 bytes assets/images/favicon.png | Bin 0 -> 1870 bytes assets/javascripts/bundle.83f73b43.min.js | 16 + assets/javascripts/bundle.83f73b43.min.js.map | 7 + assets/javascripts/glightbox.min.js | 1 + assets/javascripts/lunr/min/lunr.ar.min.js | 1 + assets/javascripts/lunr/min/lunr.da.min.js | 18 + assets/javascripts/lunr/min/lunr.de.min.js | 18 + assets/javascripts/lunr/min/lunr.du.min.js | 18 + assets/javascripts/lunr/min/lunr.el.min.js | 1 + assets/javascripts/lunr/min/lunr.es.min.js | 18 + assets/javascripts/lunr/min/lunr.fi.min.js | 18 + assets/javascripts/lunr/min/lunr.fr.min.js | 18 + assets/javascripts/lunr/min/lunr.he.min.js | 1 + assets/javascripts/lunr/min/lunr.hi.min.js | 1 + assets/javascripts/lunr/min/lunr.hu.min.js | 18 + assets/javascripts/lunr/min/lunr.hy.min.js | 1 + assets/javascripts/lunr/min/lunr.it.min.js | 18 + assets/javascripts/lunr/min/lunr.ja.min.js | 1 + assets/javascripts/lunr/min/lunr.jp.min.js | 1 + assets/javascripts/lunr/min/lunr.kn.min.js | 1 + assets/javascripts/lunr/min/lunr.ko.min.js | 1 + assets/javascripts/lunr/min/lunr.multi.min.js | 1 + assets/javascripts/lunr/min/lunr.nl.min.js | 18 + assets/javascripts/lunr/min/lunr.no.min.js | 18 + assets/javascripts/lunr/min/lunr.pt.min.js | 18 + assets/javascripts/lunr/min/lunr.ro.min.js | 18 + assets/javascripts/lunr/min/lunr.ru.min.js | 18 + assets/javascripts/lunr/min/lunr.sa.min.js | 1 + .../lunr/min/lunr.stemmer.support.min.js | 1 + assets/javascripts/lunr/min/lunr.sv.min.js | 18 + assets/javascripts/lunr/min/lunr.ta.min.js | 1 + assets/javascripts/lunr/min/lunr.te.min.js | 1 + assets/javascripts/lunr/min/lunr.th.min.js | 1 + assets/javascripts/lunr/min/lunr.tr.min.js | 18 + assets/javascripts/lunr/min/lunr.vi.min.js | 1 + assets/javascripts/lunr/min/lunr.zh.min.js | 1 + assets/javascripts/lunr/tinyseg.js | 206 + assets/javascripts/lunr/wordcut.js | 6708 +++++++ .../workers/search.6ce7567c.min.js | 42 + .../workers/search.6ce7567c.min.js.map | 7 + assets/medical_transcripts.json | 437 + assets/readmefig.png | Bin 0 -> 377510 bytes assets/stylesheets/glightbox.min.css | 1 + assets/stylesheets/main.0253249f.min.css | 1 + assets/stylesheets/main.0253249f.min.css.map | 1 + assets/stylesheets/palette.06af60db.min.css | 1 + .../stylesheets/palette.06af60db.min.css.map | 1 + assets/tutorial/all-outputs.png | Bin 0 -> 602654 bytes assets/tutorial/console.png | Bin 0 -> 514925 bytes assets/tutorial/dataset-view.png | Bin 0 -> 671721 bytes assets/tutorial/initial-quotes.png | Bin 0 -> 584147 bytes assets/tutorial/operation-details.png | Bin 0 -> 335569 bytes assets/tutorial/playground-screenshot.png | Bin 0 -> 202071 bytes assets/tutorial/v2-quotes.png | Bin 0 -> 581948 bytes best-practices/index.html | 1990 ++ community/index.html | 941 + community/roadmap/index.html | 983 + concepts/operators/index.html | 2035 ++ concepts/optimization/index.html | 1897 ++ concepts/pipelines/index.html | 1902 ++ concepts/schemas/index.html | 1981 ++ .../annotating-legal-documents/index.html | 752 + .../characterizing-troll-behavior/index.html | 752 + examples/custom-parsing/index.html | 3096 +++ examples/mining-product-reviews/index.html | 1642 ++ examples/ollama/index.html | 1205 ++ .../presidential-debate-themes/index.html | 1618 ++ examples/rate-limiting/index.html | 1021 + examples/split-gather/index.html | 1550 ++ execution/running-pipelines/index.html | 923 + index.html | 1707 ++ installation/index.html | 1833 ++ objects.inv | Bin 0 -> 1317 bytes operators/cluster/index.html | 1970 ++ operators/code/index.html | 1965 ++ operators/equijoin/index.html | 1924 ++ operators/filter/index.html | 1903 ++ operators/gather/index.html | 2477 +++ operators/link-resolve/index.html | 900 + operators/map/index.html | 2225 +++ operators/parallel-map/index.html | 1961 ++ operators/reduce/index.html | 2261 +++ operators/resolve/index.html | 2023 ++ operators/sample/index.html | 1985 ++ operators/split/index.html | 2218 +++ operators/unnest/index.html | 2105 ++ optimization/configuration/index.html | 1879 ++ optimization/example/index.html | 2190 ++ optimization/overview/index.html | 1896 ++ optimization/python-api/index.html | 1743 ++ playground/features/index.html | 905 + playground/index.html | 1004 + playground/tutorial/index.html | 1053 + python-api/index.html | 1835 ++ search/search_index.json | 1 + sitemap.xml | 3 + sitemap.xml.gz | Bin 0 -> 127 bytes stylesheets/extra.css | 96 + tutorial/index.html | 1982 ++ 112 files changed, 119480 insertions(+) create mode 100644 .nojekyll create mode 100644 404.html create mode 100644 advanced/custom-operators/index.html create mode 100644 advanced/extending-agents/index.html create mode 100644 advanced/performance-tuning/index.html create mode 100644 api-reference/cli/index.html create mode 100644 api-reference/docetl/index.html create mode 100644 api-reference/operations/index.html create mode 100644 api-reference/optimizers/index.html create mode 100644 api-reference/python/index.html create mode 100644 assets/_mkdocstrings.css create mode 100644 assets/docetl-favicon-color.png create mode 100644 assets/headerdiagram.png create mode 100644 assets/images/favicon.png create mode 100644 assets/javascripts/bundle.83f73b43.min.js create mode 100644 assets/javascripts/bundle.83f73b43.min.js.map create mode 100644 assets/javascripts/glightbox.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ar.min.js create mode 100644 assets/javascripts/lunr/min/lunr.da.min.js create mode 100644 assets/javascripts/lunr/min/lunr.de.min.js create mode 100644 assets/javascripts/lunr/min/lunr.du.min.js create mode 100644 assets/javascripts/lunr/min/lunr.el.min.js create mode 100644 assets/javascripts/lunr/min/lunr.es.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.fr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.he.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hu.min.js create mode 100644 assets/javascripts/lunr/min/lunr.hy.min.js create mode 100644 assets/javascripts/lunr/min/lunr.it.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ja.min.js create mode 100644 assets/javascripts/lunr/min/lunr.jp.min.js create mode 100644 assets/javascripts/lunr/min/lunr.kn.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ko.min.js create mode 100644 assets/javascripts/lunr/min/lunr.multi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.nl.min.js create mode 100644 assets/javascripts/lunr/min/lunr.no.min.js create mode 100644 assets/javascripts/lunr/min/lunr.pt.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ro.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ru.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sa.min.js create mode 100644 assets/javascripts/lunr/min/lunr.stemmer.support.min.js create mode 100644 assets/javascripts/lunr/min/lunr.sv.min.js create mode 100644 assets/javascripts/lunr/min/lunr.ta.min.js create mode 100644 assets/javascripts/lunr/min/lunr.te.min.js create mode 100644 assets/javascripts/lunr/min/lunr.th.min.js create mode 100644 assets/javascripts/lunr/min/lunr.tr.min.js create mode 100644 assets/javascripts/lunr/min/lunr.vi.min.js create mode 100644 assets/javascripts/lunr/min/lunr.zh.min.js create mode 100644 assets/javascripts/lunr/tinyseg.js create mode 100644 assets/javascripts/lunr/wordcut.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js create mode 100644 assets/javascripts/workers/search.6ce7567c.min.js.map create mode 100644 assets/medical_transcripts.json create mode 100644 assets/readmefig.png create mode 100644 assets/stylesheets/glightbox.min.css create mode 100644 assets/stylesheets/main.0253249f.min.css create mode 100644 assets/stylesheets/main.0253249f.min.css.map create mode 100644 assets/stylesheets/palette.06af60db.min.css create mode 100644 assets/stylesheets/palette.06af60db.min.css.map create mode 100644 assets/tutorial/all-outputs.png create mode 100644 assets/tutorial/console.png create mode 100644 assets/tutorial/dataset-view.png create mode 100644 assets/tutorial/initial-quotes.png create mode 100644 assets/tutorial/operation-details.png create mode 100644 assets/tutorial/playground-screenshot.png create mode 100644 assets/tutorial/v2-quotes.png create mode 100644 best-practices/index.html create mode 100644 community/index.html create mode 100644 community/roadmap/index.html create mode 100644 concepts/operators/index.html create mode 100644 concepts/optimization/index.html create mode 100644 concepts/pipelines/index.html create mode 100644 concepts/schemas/index.html create mode 100644 examples/annotating-legal-documents/index.html create mode 100644 examples/characterizing-troll-behavior/index.html create mode 100644 examples/custom-parsing/index.html create mode 100644 examples/mining-product-reviews/index.html create mode 100644 examples/ollama/index.html create mode 100644 examples/presidential-debate-themes/index.html create mode 100644 examples/rate-limiting/index.html create mode 100644 examples/split-gather/index.html create mode 100644 execution/running-pipelines/index.html create mode 100644 index.html create mode 100644 installation/index.html create mode 100644 objects.inv create mode 100644 operators/cluster/index.html create mode 100644 operators/code/index.html create mode 100644 operators/equijoin/index.html create mode 100644 operators/filter/index.html create mode 100644 operators/gather/index.html create mode 100644 operators/link-resolve/index.html create mode 100644 operators/map/index.html create mode 100644 operators/parallel-map/index.html create mode 100644 operators/reduce/index.html create mode 100644 operators/resolve/index.html create mode 100644 operators/sample/index.html create mode 100644 operators/split/index.html create mode 100644 operators/unnest/index.html create mode 100644 optimization/configuration/index.html create mode 100644 optimization/example/index.html create mode 100644 optimization/overview/index.html create mode 100644 optimization/python-api/index.html create mode 100644 playground/features/index.html create mode 100644 playground/index.html create mode 100644 playground/tutorial/index.html create mode 100644 python-api/index.html create mode 100644 search/search_index.json create mode 100644 sitemap.xml create mode 100644 sitemap.xml.gz create mode 100644 stylesheets/extra.css create mode 100644 tutorial/index.html diff --git a/.nojekyll b/.nojekyll new file mode 100644 index 00000000..e69de29b diff --git a/404.html b/404.html new file mode 100644 index 00000000..1d6b31c7 --- /dev/null +++ b/404.html @@ -0,0 +1,721 @@ + + + +
+ + + + + + + + + + + + + + +docetl.cli.run(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'))
+
+Run the configuration specified in the YAML file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ yaml_file
+ |
+
+ Path
+ |
+
+
+
+ Path to the YAML file containing the pipeline configuration. + |
+
+ Argument(..., help='Path to the YAML file containing the pipeline configuration')
+ |
+
+ max_threads
+ |
+
+ Optional[int]
+ |
+
+
+
+ Maximum number of threads to use for running operations. + |
+
+ Option(None, help='Maximum number of threads to use for running operations')
+ |
+
docetl/cli.py
55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 |
|
docetl.cli.build(yaml_file=typer.Argument(..., help='Path to the YAML file containing the pipeline configuration'), max_threads=typer.Option(None, help='Maximum number of threads to use for running operations'), model=typer.Option('gpt-4o', help='Model to use for optimization'), resume=typer.Option(False, help='Resume optimization from a previous build that may have failed'), timeout=typer.Option(60, help='Timeout for optimization operations in seconds'))
+
+Build and optimize the configuration specified in the YAML file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ yaml_file
+ |
+
+ Path
+ |
+
+
+
+ Path to the YAML file containing the pipeline configuration. + |
+
+ Argument(..., help='Path to the YAML file containing the pipeline configuration')
+ |
+
+ max_threads
+ |
+
+ Optional[int]
+ |
+
+
+
+ Maximum number of threads to use for running operations. + |
+
+ Option(None, help='Maximum number of threads to use for running operations')
+ |
+
+ model
+ |
+
+ str
+ |
+
+
+
+ Model to use for optimization. Defaults to "gpt-4o". + |
+
+ Option('gpt-4o', help='Model to use for optimization')
+ |
+
+ resume
+ |
+
+ bool
+ |
+
+
+
+ Whether to resume optimization from a previous run. Defaults to False. + |
+
+ Option(False, help='Resume optimization from a previous build that may have failed')
+ |
+
+ timeout
+ |
+
+ int
+ |
+
+
+
+ Timeout for optimization operations in seconds. Defaults to 60. + |
+
+ Option(60, help='Timeout for optimization operations in seconds')
+ |
+
docetl/cli.py
15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 |
|
docetl.cli.clear_cache()
+
+Clear the LLM cache stored on disk.
+ +docetl/cli.py
83 +84 +85 +86 +87 +88 |
|
docetl.DSLRunner
+
+
+
+ Bases: ConfigWrapper
This class is responsible for running DocETL pipelines. It manages datasets, executes pipeline steps, and tracks +the cost of operations.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
config |
+
+ Dict
+ |
+
+
+
+ The loaded configuration from the YAML file. + |
+
default_model |
+
+ str
+ |
+
+
+
+ The default language model to use for operations. + |
+
max_threads |
+
+ int
+ |
+
+
+
+ Maximum number of threads for parallel processing. + |
+
console |
+
+ Console
+ |
+
+
+
+ Rich console for output formatting. + |
+
datasets |
+
+ Dict
+ |
+
+
+
+ Storage for loaded datasets. + |
+
docetl/runner.py
27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 |
|
__init__(config, max_threads=None, **kwargs)
+
+Initialize the DSLRunner with a YAML configuration file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ max_threads
+ |
+
+ int
+ |
+
+
+
+ Maximum number of threads to use. Defaults to None. + |
+
+ None
+ |
+
docetl/runner.py
67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 |
|
clear_intermediate()
+
+Clear the intermediate directory.
+ +docetl/runner.py
444 +445 +446 +447 +448 +449 +450 +451 +452 +453 |
|
execute_step(step, input_data)
+
+Execute a single step in the pipeline.
+This method runs all operations defined for a step, updating the progress +and calculating the cost.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ step
+ |
+
+ Dict
+ |
+
+
+
+ The step configuration. + |
+ + required + | +
+ input_data
+ |
+
+ Optional[List[Dict]]
+ |
+
+
+
+ Input data for the step. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the output data and the total cost of the step. + |
+
docetl/runner.py
310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 |
|
load()
+
+Load all datasets defined in the configuration.
+This method creates Dataset objects for each dataset in the configuration.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If an unsupported dataset type is encountered. + |
+
docetl/runner.py
250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 |
|
load_run_save()
+
+Execute the entire pipeline defined in the configuration.
+This method loads datasets, executes each step in the pipeline, saves the output, +and returns the total cost of execution.
+ + +Returns:
+Name | Type | +Description | +
---|---|---|
float |
+ float
+ |
+
+
+
+ The total cost of executing the pipeline. + |
+
docetl/runner.py
177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 |
|
run(datasets)
+
+Execute the entire pipeline defined in the configuration on some data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ datasets
+ |
+
+ dict[str, Dataset | List[Dict]]
+ |
+
+
+
+ input datasets to transform + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ (List[Dict], float)
+ |
+
+
+
+ The transformed data and the total cost of execution. + |
+
docetl/runner.py
205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 |
|
save(data)
+
+Save the final output of the pipeline.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The data to be saved. + |
+ + required + | +
Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If an unsupported output type is specified in the configuration. + |
+
docetl/runner.py
276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 |
|
syntax_check()
+
+Perform a syntax check on all operations defined in the configuration.
+This method validates each operation by attempting to instantiate it. +If any operation fails to instantiate, a ValueError is raised.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If any operation fails the syntax check. + |
+
docetl/runner.py
133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 |
|
docetl.Optimizer
+
+
+docetl/builder.py
81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 + 120 + 121 + 122 + 123 + 124 + 125 + 126 + 127 + 128 + 129 + 130 + 131 + 132 + 133 + 134 + 135 + 136 + 137 + 138 + 139 + 140 + 141 + 142 + 143 + 144 + 145 + 146 + 147 + 148 + 149 + 150 + 151 + 152 + 153 + 154 + 155 + 156 + 157 + 158 + 159 + 160 + 161 + 162 + 163 + 164 + 165 + 166 + 167 + 168 + 169 + 170 + 171 + 172 + 173 + 174 + 175 + 176 + 177 + 178 + 179 + 180 + 181 + 182 + 183 + 184 + 185 + 186 + 187 + 188 + 189 + 190 + 191 + 192 + 193 + 194 + 195 + 196 + 197 + 198 + 199 + 200 + 201 + 202 + 203 + 204 + 205 + 206 + 207 + 208 + 209 + 210 + 211 + 212 + 213 + 214 + 215 + 216 + 217 + 218 + 219 + 220 + 221 + 222 + 223 + 224 + 225 + 226 + 227 + 228 + 229 + 230 + 231 + 232 + 233 + 234 + 235 + 236 + 237 + 238 + 239 + 240 + 241 + 242 + 243 + 244 + 245 + 246 + 247 + 248 + 249 + 250 + 251 + 252 + 253 + 254 + 255 + 256 + 257 + 258 + 259 + 260 + 261 + 262 + 263 + 264 + 265 + 266 + 267 + 268 + 269 + 270 + 271 + 272 + 273 + 274 + 275 + 276 + 277 + 278 + 279 + 280 + 281 + 282 + 283 + 284 + 285 + 286 + 287 + 288 + 289 + 290 + 291 + 292 + 293 + 294 + 295 + 296 + 297 + 298 + 299 + 300 + 301 + 302 + 303 + 304 + 305 + 306 + 307 + 308 + 309 + 310 + 311 + 312 + 313 + 314 + 315 + 316 + 317 + 318 + 319 + 320 + 321 + 322 + 323 + 324 + 325 + 326 + 327 + 328 + 329 + 330 + 331 + 332 + 333 + 334 + 335 + 336 + 337 + 338 + 339 + 340 + 341 + 342 + 343 + 344 + 345 + 346 + 347 + 348 + 349 + 350 + 351 + 352 + 353 + 354 + 355 + 356 + 357 + 358 + 359 + 360 + 361 + 362 + 363 + 364 + 365 + 366 + 367 + 368 + 369 + 370 + 371 + 372 + 373 + 374 + 375 + 376 + 377 + 378 + 379 + 380 + 381 + 382 + 383 + 384 + 385 + 386 + 387 + 388 + 389 + 390 + 391 + 392 + 393 + 394 + 395 + 396 + 397 + 398 + 399 + 400 + 401 + 402 + 403 + 404 + 405 + 406 + 407 + 408 + 409 + 410 + 411 + 412 + 413 + 414 + 415 + 416 + 417 + 418 + 419 + 420 + 421 + 422 + 423 + 424 + 425 + 426 + 427 + 428 + 429 + 430 + 431 + 432 + 433 + 434 + 435 + 436 + 437 + 438 + 439 + 440 + 441 + 442 + 443 + 444 + 445 + 446 + 447 + 448 + 449 + 450 + 451 + 452 + 453 + 454 + 455 + 456 + 457 + 458 + 459 + 460 + 461 + 462 + 463 + 464 + 465 + 466 + 467 + 468 + 469 + 470 + 471 + 472 + 473 + 474 + 475 + 476 + 477 + 478 + 479 + 480 + 481 + 482 + 483 + 484 + 485 + 486 + 487 + 488 + 489 + 490 + 491 + 492 + 493 + 494 + 495 + 496 + 497 + 498 + 499 + 500 + 501 + 502 + 503 + 504 + 505 + 506 + 507 + 508 + 509 + 510 + 511 + 512 + 513 + 514 + 515 + 516 + 517 + 518 + 519 + 520 + 521 + 522 + 523 + 524 + 525 + 526 + 527 + 528 + 529 + 530 + 531 + 532 + 533 + 534 + 535 + 536 + 537 + 538 + 539 + 540 + 541 + 542 + 543 + 544 + 545 + 546 + 547 + 548 + 549 + 550 + 551 + 552 + 553 + 554 + 555 + 556 + 557 + 558 + 559 + 560 + 561 + 562 + 563 + 564 + 565 + 566 + 567 + 568 + 569 + 570 + 571 + 572 + 573 + 574 + 575 + 576 + 577 + 578 + 579 + 580 + 581 + 582 + 583 + 584 + 585 + 586 + 587 + 588 + 589 + 590 + 591 + 592 + 593 + 594 + 595 + 596 + 597 + 598 + 599 + 600 + 601 + 602 + 603 + 604 + 605 + 606 + 607 + 608 + 609 + 610 + 611 + 612 + 613 + 614 + 615 + 616 + 617 + 618 + 619 + 620 + 621 + 622 + 623 + 624 + 625 + 626 + 627 + 628 + 629 + 630 + 631 + 632 + 633 + 634 + 635 + 636 + 637 + 638 + 639 + 640 + 641 + 642 + 643 + 644 + 645 + 646 + 647 + 648 + 649 + 650 + 651 + 652 + 653 + 654 + 655 + 656 + 657 + 658 + 659 + 660 + 661 + 662 + 663 + 664 + 665 + 666 + 667 + 668 + 669 + 670 + 671 + 672 + 673 + 674 + 675 + 676 + 677 + 678 + 679 + 680 + 681 + 682 + 683 + 684 + 685 + 686 + 687 + 688 + 689 + 690 + 691 + 692 + 693 + 694 + 695 + 696 + 697 + 698 + 699 + 700 + 701 + 702 + 703 + 704 + 705 + 706 + 707 + 708 + 709 + 710 + 711 + 712 + 713 + 714 + 715 + 716 + 717 + 718 + 719 + 720 + 721 + 722 + 723 + 724 + 725 + 726 + 727 + 728 + 729 + 730 + 731 + 732 + 733 + 734 + 735 + 736 + 737 + 738 + 739 + 740 + 741 + 742 + 743 + 744 + 745 + 746 + 747 + 748 + 749 + 750 + 751 + 752 + 753 + 754 + 755 + 756 + 757 + 758 + 759 + 760 + 761 + 762 + 763 + 764 + 765 + 766 + 767 + 768 + 769 + 770 + 771 + 772 + 773 + 774 + 775 + 776 + 777 + 778 + 779 + 780 + 781 + 782 + 783 + 784 + 785 + 786 + 787 + 788 + 789 + 790 + 791 + 792 + 793 + 794 + 795 + 796 + 797 + 798 + 799 + 800 + 801 + 802 + 803 + 804 + 805 + 806 + 807 + 808 + 809 + 810 + 811 + 812 + 813 + 814 + 815 + 816 + 817 + 818 + 819 + 820 + 821 + 822 + 823 + 824 + 825 + 826 + 827 + 828 + 829 + 830 + 831 + 832 + 833 + 834 + 835 + 836 + 837 + 838 + 839 + 840 + 841 + 842 + 843 + 844 + 845 + 846 + 847 + 848 + 849 + 850 + 851 + 852 + 853 + 854 + 855 + 856 + 857 + 858 + 859 + 860 + 861 + 862 + 863 + 864 + 865 + 866 + 867 + 868 + 869 + 870 + 871 + 872 + 873 + 874 + 875 + 876 + 877 + 878 + 879 + 880 + 881 + 882 + 883 + 884 + 885 + 886 + 887 + 888 + 889 + 890 + 891 + 892 + 893 + 894 + 895 + 896 + 897 + 898 + 899 + 900 + 901 + 902 + 903 + 904 + 905 + 906 + 907 + 908 + 909 + 910 + 911 + 912 + 913 + 914 + 915 + 916 + 917 + 918 + 919 + 920 + 921 + 922 + 923 + 924 + 925 + 926 + 927 + 928 + 929 + 930 + 931 + 932 + 933 + 934 + 935 + 936 + 937 + 938 + 939 + 940 + 941 + 942 + 943 + 944 + 945 + 946 + 947 + 948 + 949 + 950 + 951 + 952 + 953 + 954 + 955 + 956 + 957 + 958 + 959 + 960 + 961 + 962 + 963 + 964 + 965 + 966 + 967 + 968 + 969 + 970 + 971 + 972 + 973 + 974 + 975 + 976 + 977 + 978 + 979 + 980 + 981 + 982 + 983 + 984 + 985 + 986 + 987 + 988 + 989 + 990 + 991 + 992 + 993 + 994 + 995 + 996 + 997 + 998 + 999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 |
|
__init__(runner, model='gpt-4o', resume=False, timeout=60)
+
+Initialize the Optimizer class.
+This method sets up the optimizer with the given configuration file and parameters. +It loads the configuration, initializes the console for output, sets up the LLM client, +and prepares various attributes for optimization.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ yaml_file
+ |
+
+ str
+ |
+
+
+
+ Path to the YAML configuration file. + |
+ + required + | +
+ max_threads
+ |
+
+ Optional[int]
+ |
+
+
+
+ Maximum number of threads to use for parallel processing. +If None, it will be set to (number of CPUs * 4). + |
+ + required + | +
+ model
+ |
+
+ str
+ |
+
+
+
+ The name of the language model to use. Defaults to "gpt-4o". + |
+
+ 'gpt-4o'
+ |
+
+ resume
+ |
+
+ bool
+ |
+
+
+
+ Whether to resume optimization from a previous run. Defaults to False. + |
+
+ False
+ |
+
+ timeout
+ |
+
+ int
+ |
+
+
+
+ Timeout in seconds for operations. Defaults to 60. + |
+
+ 60
+ |
+
Attributes:
+Name | +Type | +Description | +
---|---|---|
yaml_file_path |
+
+ str
+ |
+
+
+
+ Stores the path to the YAML file. + |
+
config |
+
+ Dict
+ |
+
+
+
+ Stores the loaded configuration from the YAML file. + |
+
console |
+
+ Console
+ |
+
+
+
+ Rich console for formatted output. + |
+
optimized_config |
+
+ Dict
+ |
+
+
+
+ A copy of the original config to be optimized. + |
+
llm_client |
+
+ LLMClient
+ |
+
+
+
+ Client for interacting with the language model. + |
+
max_threads |
+
+ int
+ |
+
+
+
+ Maximum number of threads for parallel processing. + |
+
operations_cost |
+
+ float
+ |
+
+
+
+ Tracks the total cost of operations. + |
+
timeout |
+
+ int
+ |
+
+
+
+ Timeout for operations in seconds. + |
+
selectivities |
+
+ defaultdict
+ |
+
+
+
+ Stores selectivity information for operations. +Selectivity is the ratio of output size to input size for an operation. +It's used to estimate how much data will flow through the pipeline after +each operation, which helps in optimizing subsequent operations and +determining appropriate sample sizes. For example, a selectivity of 0.5 +means an operation halves the size of its input data. + |
+
datasets |
+
+ Dict
+ |
+
+
+
+ Stores loaded datasets. + |
+
The method also calls print_optimizer_config() to display the initial configuration.
+ +docetl/builder.py
83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 |
|
clean_optimized_config()
+
+Remove _intermediates from each operation in the optimized config
+ +docetl/builder.py
1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 |
|
compute_sample_size(step_name, step_ops, op_config)
+
+Compute the sample size necessary for optimizing given operation based on upstream operations.
+This method calculates an appropriate sample size for an operation, taking into +account the selectivities of upstream operations in the same step. It uses a +predefined sample size map (SAMPLE_SIZE_MAP) as a starting point.
+For example, if we have a 'map' operation with a default sample size of 10, +and one upstream operation with a selectivity of 0.5, the computed sample size for the upstream operation would be: +10 / 0.5 = 20
+This ensures that after applying the selectivity of the upstream operation, +we still have a representative sample size for the current operation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ step_name
+ |
+
+ str
+ |
+
+
+
+ The name of the current step in the pipeline. + |
+ + required + | +
+ step_ops
+ |
+
+ List[str]
+ |
+
+
+
+ A list of all operations in the current step. + |
+ + required + | +
+ op_config
+ |
+
+ Dict[str, Any]
+ |
+
+
+
+ The configuration dictionary for the current operation. + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
int |
+ int
+ |
+
+
+
+ The computed sample size for the operation. + |
+
The method works as follows: +1. If there are no upstream operations, it returns the default sample size for the operation type. +2. Otherwise, it starts with the default sample size and adjusts it based on the selectivities + of upstream operations. +3. It iterates through upstream operations in reverse order, dividing the sample size by + each operation's selectivity. +4. The final result is rounded to the nearest integer.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the selectivity for any upstream operation is not found. + |
+
docetl/builder.py
214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 |
|
optimize()
+
+Optimize the entire pipeline defined in the configuration.
+This method is the main entry point for the optimization process. It iterates through +each step in the pipeline, optimizing from upstream to downstream, and constructs an +optimized version of the configuration.
+The optimization process includes: +1. Iterating through each step in the pipeline, from upstream to downstream. +2. Optimizing each step using the _optimize_step method. +3. Updating the optimized configuration with the new operations and steps. +4. Saving the optimized configuration to a file. +5. Logging the total costs (agent cost, operations cost, and total cost).
+ + +Returns:
+Type | +Description | +
---|---|
+ float
+ |
+
+
+
+ None + |
+
Side effects: +- Modifies self.optimized_config with the optimized pipeline and operations. +- Updates self.datasets with the results of each step. +- Calls _save_optimized_config to save the optimized configuration to a file. +- Logs cost information to the console.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If a step in the pipeline does not have a name. + |
+
Note: +- This method assumes that all necessary data and configurations are already + loaded and initialized in the Optimizer instance. +- The optimization process is performed step by step, from upstream to downstream, + with each step potentially depending on the results of previous steps.
+ +docetl/builder.py
573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 +698 +699 +700 +701 |
|
print_optimizer_config()
+
+Print the current configuration of the optimizer.
+This method uses the Rich console to display a formatted output of the optimizer's +configuration. It includes details such as the YAML file path, sample sizes for +different operation types, maximum number of threads, the language model being used, +and the timeout setting.
+The output is color-coded and formatted for easy readability, with a header and +separator lines to clearly delineate the configuration information.
+ +docetl/builder.py
196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 |
|
resolve_anchors(data)
+
+
+ staticmethod
+
+
+Recursively resolve all anchors and aliases in a nested data structure.
+This static method traverses through dictionaries and lists, resolving any YAML anchors and aliases.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ data
+ |
+ + | +
+
+
+ The data structure to resolve. Can be a dictionary, list, or any other type. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ | +
+
+
+ The resolved data structure with all anchors and aliases replaced by their actual values. + |
+
docetl/builder.py
1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 |
|
save_optimized_config(optimized_config_path)
+
+Save the optimized configuration to a YAML file.
+This method creates a copy of the optimized configuration, resolves all anchors and aliases, +and saves it to a new YAML file. The new file name is based on the original file name with '_opt' appended.
+ +docetl/builder.py
1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 |
|
should_optimize(step_name, op_name)
+
+Determine if an operation should be optimized. +We do this by running the operations on a sample of the input data and checking if the output is correct.
+ +docetl/builder.py
482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 |
|
syntax_check()
+
+Perform a syntax check on all operations defined in the configuration.
+This method validates each operation by attempting to instantiate it. +If any operation fails to instantiate, a ValueError is raised.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If any operation fails the syntax check. + |
+
docetl/builder.py
166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 |
|
docetl.operations.map.MapOperation
+
+
+
+ Bases: BaseOperation
docetl/operations/map.py
32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 |
|
execute(input_data)
+
+Executes the map operation on the provided input data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The input data to process. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the processed results and the total cost of the operation. + |
+
This method performs the following steps: +1. If a prompt is specified, it processes each input item using the specified prompt and LLM model +2. Applies gleaning if configured +3. Validates the output +4. If drop_keys is specified, it drops the specified keys from each document +5. Aggregates results and calculates total cost
+The method uses parallel processing to improve performance.
+ +docetl/operations/map.py
144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 |
|
syntax_check()
+
+Checks the configuration of the MapOperation for required keys and valid structure.
+
+
+
+Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or invalid in the configuration. + |
+
+ TypeError
+ |
+
+
+
+ If configuration values have incorrect types. + |
+
docetl/operations/map.py
71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 |
|
docetl.operations.resolve.ResolveOperation
+
+
+
+ Bases: BaseOperation
docetl/operations/resolve.py
31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 |
|
compare_pair(comparison_prompt, model, item1, item2, blocking_keys=[], timeout_seconds=120, max_retries_per_timeout=2)
+
+Compares two items using an LLM model to determine if they match.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ comparison_prompt
+ |
+
+ str
+ |
+
+
+
+ The prompt template for comparison. + |
+ + required + | +
+ model
+ |
+
+ str
+ |
+
+
+
+ The LLM model to use for comparison. + |
+ + required + | +
+ item1
+ |
+
+ Dict
+ |
+
+
+
+ The first item to compare. + |
+ + required + | +
+ item2
+ |
+
+ Dict
+ |
+
+
+
+ The second item to compare. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[bool, float, str]
+ |
+
+
+
+ Tuple[bool, float, str]: A tuple containing a boolean indicating whether the items match, the cost of the comparison, and the prompt. + |
+
docetl/operations/resolve.py
52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 |
|
execute(input_data)
+
+Executes the resolve operation on the provided dataset.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The dataset to resolve. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the resolved results and the total cost of the operation. + |
+
This method performs the following steps: +1. Initial blocking based on specified conditions and/or embedding similarity +2. Pairwise comparison of potentially matching entries using LLM +3. Clustering of matched entries +4. Resolution of each cluster into a single entry (if applicable) +5. Result aggregation and validation
+The method also calculates and logs statistics such as comparisons saved by blocking and self-join selectivity.
+ +docetl/operations/resolve.py
217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 +534 +535 +536 +537 +538 +539 +540 +541 +542 +543 +544 +545 +546 +547 +548 +549 +550 +551 +552 +553 +554 +555 +556 +557 +558 +559 +560 +561 +562 +563 +564 +565 +566 +567 +568 +569 +570 +571 +572 +573 +574 +575 +576 +577 +578 +579 +580 +581 +582 +583 +584 +585 +586 +587 +588 +589 +590 +591 +592 +593 +594 +595 +596 +597 +598 +599 +600 +601 +602 +603 +604 +605 +606 +607 +608 +609 +610 +611 +612 +613 +614 +615 +616 +617 +618 +619 +620 +621 +622 +623 +624 +625 +626 +627 +628 +629 +630 +631 +632 +633 +634 +635 +636 +637 +638 +639 +640 +641 +642 +643 +644 +645 +646 +647 +648 +649 +650 +651 +652 +653 +654 +655 +656 +657 +658 +659 +660 +661 +662 +663 +664 +665 +666 +667 +668 +669 +670 +671 +672 +673 +674 +675 +676 +677 +678 +679 +680 +681 +682 +683 +684 +685 +686 +687 +688 +689 +690 +691 +692 +693 +694 +695 +696 +697 |
|
syntax_check()
+
+Checks the configuration of the ResolveOperation for required keys and valid structure.
+This method performs the following checks: +1. Verifies the presence of required keys: 'comparison_prompt' and 'output'. +2. Ensures 'output' contains a 'schema' key. +3. Validates that 'schema' in 'output' is a non-empty dictionary. +4. Checks if 'comparison_prompt' is a valid Jinja2 template with 'input1' and 'input2' variables. +5. If 'resolution_prompt' is present, verifies it as a valid Jinja2 template with 'inputs' variable. +6. Optionally checks if 'model' is a string (if present). +7. Optionally checks 'blocking_keys' (if present, further checks are performed).
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing, if templates are invalid or missing required variables, + or if any other configuration aspect is incorrect or inconsistent. + |
+
+ TypeError
+ |
+
+
+
+ If the types of configuration values are incorrect, such as 'schema' not being a dict + or 'model' not being a string. + |
+
docetl/operations/resolve.py
102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 |
|
docetl.operations.reduce.ReduceOperation
+
+
+
+ Bases: BaseOperation
A class that implements a reduce operation on input data using language models.
+This class extends BaseOperation to provide functionality for reducing grouped data +using various strategies including batch reduce, incremental reduce, and parallel fold and merge.
+ + + + + + +docetl/operations/reduce.py
31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 + 120 + 121 + 122 + 123 + 124 + 125 + 126 + 127 + 128 + 129 + 130 + 131 + 132 + 133 + 134 + 135 + 136 + 137 + 138 + 139 + 140 + 141 + 142 + 143 + 144 + 145 + 146 + 147 + 148 + 149 + 150 + 151 + 152 + 153 + 154 + 155 + 156 + 157 + 158 + 159 + 160 + 161 + 162 + 163 + 164 + 165 + 166 + 167 + 168 + 169 + 170 + 171 + 172 + 173 + 174 + 175 + 176 + 177 + 178 + 179 + 180 + 181 + 182 + 183 + 184 + 185 + 186 + 187 + 188 + 189 + 190 + 191 + 192 + 193 + 194 + 195 + 196 + 197 + 198 + 199 + 200 + 201 + 202 + 203 + 204 + 205 + 206 + 207 + 208 + 209 + 210 + 211 + 212 + 213 + 214 + 215 + 216 + 217 + 218 + 219 + 220 + 221 + 222 + 223 + 224 + 225 + 226 + 227 + 228 + 229 + 230 + 231 + 232 + 233 + 234 + 235 + 236 + 237 + 238 + 239 + 240 + 241 + 242 + 243 + 244 + 245 + 246 + 247 + 248 + 249 + 250 + 251 + 252 + 253 + 254 + 255 + 256 + 257 + 258 + 259 + 260 + 261 + 262 + 263 + 264 + 265 + 266 + 267 + 268 + 269 + 270 + 271 + 272 + 273 + 274 + 275 + 276 + 277 + 278 + 279 + 280 + 281 + 282 + 283 + 284 + 285 + 286 + 287 + 288 + 289 + 290 + 291 + 292 + 293 + 294 + 295 + 296 + 297 + 298 + 299 + 300 + 301 + 302 + 303 + 304 + 305 + 306 + 307 + 308 + 309 + 310 + 311 + 312 + 313 + 314 + 315 + 316 + 317 + 318 + 319 + 320 + 321 + 322 + 323 + 324 + 325 + 326 + 327 + 328 + 329 + 330 + 331 + 332 + 333 + 334 + 335 + 336 + 337 + 338 + 339 + 340 + 341 + 342 + 343 + 344 + 345 + 346 + 347 + 348 + 349 + 350 + 351 + 352 + 353 + 354 + 355 + 356 + 357 + 358 + 359 + 360 + 361 + 362 + 363 + 364 + 365 + 366 + 367 + 368 + 369 + 370 + 371 + 372 + 373 + 374 + 375 + 376 + 377 + 378 + 379 + 380 + 381 + 382 + 383 + 384 + 385 + 386 + 387 + 388 + 389 + 390 + 391 + 392 + 393 + 394 + 395 + 396 + 397 + 398 + 399 + 400 + 401 + 402 + 403 + 404 + 405 + 406 + 407 + 408 + 409 + 410 + 411 + 412 + 413 + 414 + 415 + 416 + 417 + 418 + 419 + 420 + 421 + 422 + 423 + 424 + 425 + 426 + 427 + 428 + 429 + 430 + 431 + 432 + 433 + 434 + 435 + 436 + 437 + 438 + 439 + 440 + 441 + 442 + 443 + 444 + 445 + 446 + 447 + 448 + 449 + 450 + 451 + 452 + 453 + 454 + 455 + 456 + 457 + 458 + 459 + 460 + 461 + 462 + 463 + 464 + 465 + 466 + 467 + 468 + 469 + 470 + 471 + 472 + 473 + 474 + 475 + 476 + 477 + 478 + 479 + 480 + 481 + 482 + 483 + 484 + 485 + 486 + 487 + 488 + 489 + 490 + 491 + 492 + 493 + 494 + 495 + 496 + 497 + 498 + 499 + 500 + 501 + 502 + 503 + 504 + 505 + 506 + 507 + 508 + 509 + 510 + 511 + 512 + 513 + 514 + 515 + 516 + 517 + 518 + 519 + 520 + 521 + 522 + 523 + 524 + 525 + 526 + 527 + 528 + 529 + 530 + 531 + 532 + 533 + 534 + 535 + 536 + 537 + 538 + 539 + 540 + 541 + 542 + 543 + 544 + 545 + 546 + 547 + 548 + 549 + 550 + 551 + 552 + 553 + 554 + 555 + 556 + 557 + 558 + 559 + 560 + 561 + 562 + 563 + 564 + 565 + 566 + 567 + 568 + 569 + 570 + 571 + 572 + 573 + 574 + 575 + 576 + 577 + 578 + 579 + 580 + 581 + 582 + 583 + 584 + 585 + 586 + 587 + 588 + 589 + 590 + 591 + 592 + 593 + 594 + 595 + 596 + 597 + 598 + 599 + 600 + 601 + 602 + 603 + 604 + 605 + 606 + 607 + 608 + 609 + 610 + 611 + 612 + 613 + 614 + 615 + 616 + 617 + 618 + 619 + 620 + 621 + 622 + 623 + 624 + 625 + 626 + 627 + 628 + 629 + 630 + 631 + 632 + 633 + 634 + 635 + 636 + 637 + 638 + 639 + 640 + 641 + 642 + 643 + 644 + 645 + 646 + 647 + 648 + 649 + 650 + 651 + 652 + 653 + 654 + 655 + 656 + 657 + 658 + 659 + 660 + 661 + 662 + 663 + 664 + 665 + 666 + 667 + 668 + 669 + 670 + 671 + 672 + 673 + 674 + 675 + 676 + 677 + 678 + 679 + 680 + 681 + 682 + 683 + 684 + 685 + 686 + 687 + 688 + 689 + 690 + 691 + 692 + 693 + 694 + 695 + 696 + 697 + 698 + 699 + 700 + 701 + 702 + 703 + 704 + 705 + 706 + 707 + 708 + 709 + 710 + 711 + 712 + 713 + 714 + 715 + 716 + 717 + 718 + 719 + 720 + 721 + 722 + 723 + 724 + 725 + 726 + 727 + 728 + 729 + 730 + 731 + 732 + 733 + 734 + 735 + 736 + 737 + 738 + 739 + 740 + 741 + 742 + 743 + 744 + 745 + 746 + 747 + 748 + 749 + 750 + 751 + 752 + 753 + 754 + 755 + 756 + 757 + 758 + 759 + 760 + 761 + 762 + 763 + 764 + 765 + 766 + 767 + 768 + 769 + 770 + 771 + 772 + 773 + 774 + 775 + 776 + 777 + 778 + 779 + 780 + 781 + 782 + 783 + 784 + 785 + 786 + 787 + 788 + 789 + 790 + 791 + 792 + 793 + 794 + 795 + 796 + 797 + 798 + 799 + 800 + 801 + 802 + 803 + 804 + 805 + 806 + 807 + 808 + 809 + 810 + 811 + 812 + 813 + 814 + 815 + 816 + 817 + 818 + 819 + 820 + 821 + 822 + 823 + 824 + 825 + 826 + 827 + 828 + 829 + 830 + 831 + 832 + 833 + 834 + 835 + 836 + 837 + 838 + 839 + 840 + 841 + 842 + 843 + 844 + 845 + 846 + 847 + 848 + 849 + 850 + 851 + 852 + 853 + 854 + 855 + 856 + 857 + 858 + 859 + 860 + 861 + 862 + 863 + 864 + 865 + 866 + 867 + 868 + 869 + 870 + 871 + 872 + 873 + 874 + 875 + 876 + 877 + 878 + 879 + 880 + 881 + 882 + 883 + 884 + 885 + 886 + 887 + 888 + 889 + 890 + 891 + 892 + 893 + 894 + 895 + 896 + 897 + 898 + 899 + 900 + 901 + 902 + 903 + 904 + 905 + 906 + 907 + 908 + 909 + 910 + 911 + 912 + 913 + 914 + 915 + 916 + 917 + 918 + 919 + 920 + 921 + 922 + 923 + 924 + 925 + 926 + 927 + 928 + 929 + 930 + 931 + 932 + 933 + 934 + 935 + 936 + 937 + 938 + 939 + 940 + 941 + 942 + 943 + 944 + 945 + 946 + 947 + 948 + 949 + 950 + 951 + 952 + 953 + 954 + 955 + 956 + 957 + 958 + 959 + 960 + 961 + 962 + 963 + 964 + 965 + 966 + 967 + 968 + 969 + 970 + 971 + 972 + 973 + 974 + 975 + 976 + 977 + 978 + 979 + 980 + 981 + 982 + 983 + 984 + 985 + 986 + 987 + 988 + 989 + 990 + 991 + 992 + 993 + 994 + 995 + 996 + 997 + 998 + 999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 |
|
__init__(*args, **kwargs)
+
+Initialize the ReduceOperation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *args
+ |
+ + | +
+
+
+ Variable length argument list. + |
+
+ ()
+ |
+
+ **kwargs
+ |
+ + | +
+
+
+ Arbitrary keyword arguments. + |
+
+ {}
+ |
+
docetl/operations/reduce.py
58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 |
|
execute(input_data)
+
+Execute the reduce operation on the provided input data.
+This method sorts and groups the input data by the reduce key(s), then processes each group +using either parallel fold and merge, incremental reduce, or batch reduce strategies.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The input data to process. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the processed results and the total cost of the operation. + |
+
docetl/operations/reduce.py
297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 |
|
get_fold_time()
+
+Get the average fold time or a default value.
+ + +Returns:
+Type | +Description | +
---|---|
+ float
+ |
+
+
+
+ Tuple[float, bool]: A tuple containing the average fold time (or default) and a boolean + |
+
+ bool
+ |
+
+
+
+ indicating whether the default value was used. + |
+
docetl/operations/reduce.py
900 +901 +902 +903 +904 +905 +906 +907 +908 +909 +910 +911 +912 +913 |
|
get_merge_time()
+
+Get the average merge time or a default value.
+ + +Returns:
+Type | +Description | +
---|---|
+ float
+ |
+
+
+
+ Tuple[float, bool]: A tuple containing the average merge time (or default) and a boolean + |
+
+ bool
+ |
+
+
+
+ indicating whether the default value was used. + |
+
docetl/operations/reduce.py
915 +916 +917 +918 +919 +920 +921 +922 +923 +924 +925 +926 +927 +928 |
|
syntax_check()
+
+Perform comprehensive syntax checks on the configuration of the ReduceOperation.
+This method validates the presence and correctness of all required configuration keys, Jinja2 templates, and ensures the correct +structure and types of the entire configuration.
+The method performs the following checks: +1. Verifies the presence of all required keys in the configuration. +2. Validates the structure and content of the 'output' configuration, including its 'schema'. +3. Checks if the main 'prompt' is a valid Jinja2 template and contains the required 'inputs' variable. +4. If 'merge_prompt' is specified, ensures that 'fold_prompt' is also present. +5. If 'fold_prompt' is present, verifies the existence of 'fold_batch_size'. +6. Validates the 'fold_prompt' as a Jinja2 template with required variables 'inputs' and 'output'. +7. If present, checks 'merge_prompt' as a valid Jinja2 template with required 'outputs' variable. +8. Verifies types of various configuration inputs (e.g., 'fold_batch_size' as int). +9. Checks for the presence and validity of optional configurations like 'model'.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If any required configuration is missing, if templates are invalid or missing required + variables, or if any other configuration aspect is incorrect or inconsistent. + |
+
+ TypeError
+ |
+
+
+
+ If any configuration value has an incorrect type, such as 'schema' not being a dict + or 'fold_batch_size' not being an integer. + |
+
docetl/operations/reduce.py
80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 |
|
docetl.operations.map.ParallelMapOperation
+
+
+
+ Bases: BaseOperation
docetl/operations/map.py
319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 |
|
execute(input_data)
+
+Executes the parallel map operation on the provided input data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The input data to process. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the processed results and the total cost of the operation. + |
+
This method performs the following steps: +1. If prompts are specified, it processes each input item using multiple prompts in parallel +2. Aggregates results from different prompts for each input item +3. Validates the combined output for each item +4. If drop_keys is specified, it drops the specified keys from each document +5. Calculates total cost of the operation
+ +docetl/operations/map.py
416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 +474 +475 +476 +477 +478 +479 +480 +481 +482 +483 +484 +485 +486 +487 +488 +489 +490 +491 +492 +493 +494 +495 +496 +497 +498 +499 +500 +501 +502 +503 +504 +505 +506 +507 +508 +509 +510 +511 +512 +513 +514 +515 +516 +517 +518 +519 +520 +521 +522 +523 +524 +525 +526 +527 +528 +529 +530 +531 +532 +533 |
|
syntax_check()
+
+Checks the configuration of the ParallelMapOperation for required keys and valid structure.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or if the configuration structure is invalid. + |
+
+ TypeError
+ |
+
+
+
+ If the configuration values have incorrect types. + |
+
docetl/operations/map.py
333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 |
|
docetl.operations.filter.FilterOperation
+
+
+
+ Bases: MapOperation
docetl/operations/filter.py
11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 |
|
execute(input_data, is_build=False)
+
+Executes the filter operation on the input data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ A list of dictionaries to process. + |
+ + required + | +
+ is_build
+ |
+
+ bool
+ |
+
+
+
+ Whether the operation is being executed in the build phase. Defaults to False. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[Dict]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the filtered list of dictionaries + |
+
+ float
+ |
+
+
+
+ and the total cost of the operation. + |
+
This method performs the following steps: +1. Processes each input item using an LLM model +2. Validates the output +3. Filters the results based on the specified filter key +4. Calculates the total cost of the operation
+The method uses multi-threading to process items in parallel, improving performance +for large datasets.
+Usage: +
from docetl.operations import FilterOperation
+
+config = {
+ "prompt": "Determine if the following item is important: {{input}}",
+ "output": {
+ "schema": {"is_important": "bool"}
+ },
+ "model": "gpt-3.5-turbo"
+}
+filter_op = FilterOperation(config)
+input_data = [
+ {"id": 1, "text": "Critical update"},
+ {"id": 2, "text": "Regular maintenance"}
+]
+results, cost = filter_op.execute(input_data)
+print(f"Filtered results: {results}")
+print(f"Total cost: {cost}")
+
docetl/operations/filter.py
59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 |
|
syntax_check()
+
+Checks the configuration of the FilterOperation for required keys and valid structure.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or if the output schema structure is invalid. + |
+
+ TypeError
+ |
+
+
+
+ If the schema in the output configuration is not a dictionary or if the schema value is not of type bool. + |
+
This method checks for the following: +- Presence of required keys: 'prompt' and 'output' +- Presence of 'schema' in the 'output' configuration +- The 'schema' is a non-empty dictionary with exactly one key-value pair +- The value in the schema is of type bool
+ +docetl/operations/filter.py
15 +16 +17 +18 +19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 |
|
docetl.operations.equijoin.EquijoinOperation
+
+
+
+ Bases: BaseOperation
docetl/operations/equijoin.py
53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 |
|
compare_pair(comparison_prompt, model, item1, item2, timeout_seconds=120, max_retries_per_timeout=2)
+
+Compares two items using an LLM model to determine if they match.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ comparison_prompt
+ |
+
+ str
+ |
+
+
+
+ The prompt template for comparison. + |
+ + required + | +
+ model
+ |
+
+ str
+ |
+
+
+
+ The LLM model to use for comparison. + |
+ + required + | +
+ item1
+ |
+
+ Dict
+ |
+
+
+
+ The first item to compare. + |
+ + required + | +
+ item2
+ |
+
+ Dict
+ |
+
+
+
+ The second item to compare. + |
+ + required + | +
+ timeout_seconds
+ |
+
+ int
+ |
+
+
+
+ The timeout for the LLM call in seconds. + |
+
+ 120
+ |
+
+ max_retries_per_timeout
+ |
+
+ int
+ |
+
+
+
+ The maximum number of retries per timeout. + |
+
+ 2
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Tuple[bool, float]
+ |
+
+
+
+ Tuple[bool, float]: A tuple containing a boolean indicating whether the items match and the cost of the comparison. + |
+
docetl/operations/equijoin.py
73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 |
|
execute(left_data, right_data)
+
+Executes the equijoin operation on the provided datasets.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ left_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The left dataset to join. + |
+ + required + | +
+ right_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The right dataset to join. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the joined results and the total cost of the operation. + |
+
Usage: +
from docetl.operations import EquijoinOperation
+
+config = {
+ "blocking_keys": {
+ "left": ["id"],
+ "right": ["user_id"]
+ },
+ "limits": {
+ "left": 1,
+ "right": 1
+ },
+ "comparison_prompt": "Compare {{left}} and {{right}} and determine if they match.",
+ "blocking_threshold": 0.8,
+ "blocking_conditions": ["left['id'] == right['user_id']"],
+ "limit_comparisons": 1000
+}
+equijoin_op = EquijoinOperation(config)
+left_data = [{"id": 1, "name": "Alice"}, {"id": 2, "name": "Bob"}]
+right_data = [{"user_id": 1, "age": 30}, {"user_id": 2, "age": 25}]
+results, cost = equijoin_op.execute(left_data, right_data)
+print(f"Joined results: {results}")
+print(f"Total cost: {cost}")
+
This method performs the following steps: +1. Initial blocking based on specified conditions (if any) +2. Embedding-based blocking (if threshold is provided) +3. LLM-based comparison for blocked pairs +4. Result aggregation and validation
+The method also calculates and logs statistics such as comparisons saved by blocking and join selectivity.
+ +docetl/operations/equijoin.py
152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 +470 +471 +472 +473 |
|
syntax_check()
+
+Checks the configuration of the EquijoinOperation for required keys and valid structure.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or if the blocking_keys structure is invalid. + |
+
+ Specifically
+ |
+
+
+
+
+ |
+
docetl/operations/equijoin.py
114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 |
|
docetl.operations.cluster.ClusterOperation
+
+
+
+ Bases: BaseOperation
docetl/operations/cluster.py
10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 |
|
execute(input_data, is_build=False)
+
+Executes the cluster operation on the input data. Modifies the +input data and returns it in place.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ A list of dictionaries to process. + |
+ + required + | +
+ is_build
+ |
+
+ bool
+ |
+
+
+
+ Whether the operation is being executed +in the build phase. Defaults to False. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the clustered +list of dictionaries and the total cost of the operation. + |
+
docetl/operations/cluster.py
75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 |
|
syntax_check()
+
+Checks the configuration of the ClusterOperation for required keys and valid structure.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or invalid in the configuration. + |
+
+ TypeError
+ |
+
+
+
+ If configuration values have incorrect types. + |
+
docetl/operations/cluster.py
21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 |
|
docetl.operations.split.SplitOperation
+
+
+
+ Bases: BaseOperation
A class that implements a split operation on input data, dividing it into manageable chunks.
+This class extends BaseOperation to: +1. Split input data into chunks of specified size based on the 'split_key' and 'token_count' configuration. +2. Assign unique identifiers to each original document and number chunks sequentially. +3. Return results containing: + - {split_key}_chunk: The content of the split chunk. + - {name}_id: A unique identifier for each original document. + - {name}_chunk_num: The sequential number of the chunk within its original document.
+ + + + + + +docetl/operations/split.py
9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 |
|
docetl.operations.gather.GatherOperation
+
+
+
+ Bases: BaseOperation
A class that implements a gather operation on input data, adding contextual information from surrounding chunks.
+This class extends BaseOperation to: +1. Group chunks by their document ID. +2. Order chunks within each group. +3. Add peripheral context to each chunk based on the configuration. +4. Include headers for each chunk and its upward hierarchy. +5. Return results containing the rendered chunks with added context, including information about skipped characters and headers.
+ + + + + + +docetl/operations/gather.py
6 + 7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 |
|
__init__(*args, **kwargs)
+
+Initialize the GatherOperation.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ *args
+ |
+
+ Any
+ |
+
+
+
+ Variable length argument list. + |
+
+ ()
+ |
+
+ **kwargs
+ |
+
+ Any
+ |
+
+
+
+ Arbitrary keyword arguments. + |
+
+ {}
+ |
+
docetl/operations/gather.py
26 +27 +28 +29 +30 +31 +32 +33 +34 |
|
execute(input_data)
+
+Execute the gather operation on the input data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ The input data to process. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ Tuple[List[Dict], float]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the processed results and the cost of the operation. + |
+
docetl/operations/gather.py
72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 |
|
process_peripheral_chunks(chunks, config, content_key, order_key, reverse=False)
+
+Process peripheral chunks according to the configuration.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ chunks
+ |
+
+ List[Dict]
+ |
+
+
+
+ List of chunks to process. + |
+ + required + | +
+ config
+ |
+
+ Dict
+ |
+
+
+
+ Configuration for processing peripheral chunks. + |
+ + required + | +
+ content_key
+ |
+
+ str
+ |
+
+
+
+ Key for the content in each chunk. + |
+ + required + | +
+ order_key
+ |
+
+ str
+ |
+
+
+
+ Key for the order of each chunk. + |
+ + required + | +
+ reverse
+ |
+
+ bool
+ |
+
+
+
+ Whether to process chunks in reverse order. Defaults to False. + |
+
+ False
+ |
+
Returns:
+Type | +Description | +
---|---|
+ List[str]
+ |
+
+
+
+ List[str]: List of processed chunk strings. + |
+
docetl/operations/gather.py
191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 |
|
render_chunk_with_context(chunks, current_index, peripheral_config, content_key, order_key, main_chunk_start, main_chunk_end, doc_header_key)
+
+Render a chunk with its peripheral context and headers.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ chunks
+ |
+
+ List[Dict]
+ |
+
+
+
+ List of all chunks in the document. + |
+ + required + | +
+ current_index
+ |
+
+ int
+ |
+
+
+
+ Index of the current chunk being processed. + |
+ + required + | +
+ peripheral_config
+ |
+
+ Dict
+ |
+
+
+
+ Configuration for peripheral chunks. + |
+ + required + | +
+ content_key
+ |
+
+ str
+ |
+
+
+
+ Key for the content in each chunk. + |
+ + required + | +
+ order_key
+ |
+
+ str
+ |
+
+
+
+ Key for the order of each chunk. + |
+ + required + | +
+ main_chunk_start
+ |
+
+ str
+ |
+
+
+
+ String to mark the start of the main chunk. + |
+ + required + | +
+ main_chunk_end
+ |
+
+ str
+ |
+
+
+
+ String to mark the end of the main chunk. + |
+ + required + | +
+ doc_header_key
+ |
+
+ str
+ |
+
+
+
+ The key for the headers in the current chunk. + |
+ + required + | +
Returns:
+Name | Type | +Description | +
---|---|---|
str |
+ str
+ |
+
+
+
+ Renderted chunk with context and headers. + |
+
docetl/operations/gather.py
126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 |
|
render_hierarchy_headers(current_chunk, chunks, doc_header_key)
+
+Render headers for the current chunk's hierarchy.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ current_chunk
+ |
+
+ Dict
+ |
+
+
+
+ The current chunk being processed. + |
+ + required + | +
+ chunks
+ |
+
+ List[Dict]
+ |
+
+
+
+ List of chunks up to and including the current chunk. + |
+ + required + | +
+ doc_header_key
+ |
+
+ str
+ |
+
+
+
+ The key for the headers in the current chunk. + |
+ + required + | +
Returns: + str: Renderted headers in the current chunk's hierarchy.
+ +docetl/operations/gather.py
270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 |
|
syntax_check()
+
+Perform a syntax check on the operation configuration.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If required keys are missing or if there are configuration errors. + |
+
+ TypeError
+ |
+
+
+
+ If main_chunk_start or main_chunk_end are not strings. + |
+
docetl/operations/gather.py
36 +37 +38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 |
|
docetl.operations.unnest.UnnestOperation
+
+
+
+ Bases: BaseOperation
A class that represents an operation to unnest a list-like or dictionary value in a dictionary into multiple dictionaries.
+This operation takes a list of dictionaries and a specified key, and creates new dictionaries based on the value type: +- For list-like values: Creates a new dictionary for each element in the list, copying all other key-value pairs. +- For dictionary values: Expands specified fields from the nested dictionary into the parent dictionary.
+ + +BaseOperation
+Usage: +
from docetl.operations import UnnestOperation
+
+# Unnesting a list
+config_list = {"unnest_key": "tags"}
+input_data_list = [
+ {"id": 1, "tags": ["a", "b", "c"]},
+ {"id": 2, "tags": ["d", "e"]}
+]
+
+unnest_op_list = UnnestOperation(config_list)
+result_list, _ = unnest_op_list.execute(input_data_list)
+
+# Result will be:
+# [
+# {"id": 1, "tags": "a"},
+# {"id": 1, "tags": "b"},
+# {"id": 1, "tags": "c"},
+# {"id": 2, "tags": "d"},
+# {"id": 2, "tags": "e"}
+# ]
+
+# Unnesting a dictionary
+config_dict = {"unnest_key": "user", "expand_fields": ["name", "age"]}
+input_data_dict = [
+ {"id": 1, "user": {"name": "Alice", "age": 30, "email": "alice@example.com"}},
+ {"id": 2, "user": {"name": "Bob", "age": 25, "email": "bob@example.com"}}
+]
+
+unnest_op_dict = UnnestOperation(config_dict)
+result_dict, _ = unnest_op_dict.execute(input_data_dict)
+
+# Result will be:
+# [
+# {"id": 1, "name": "Alice", "age": 30, "user": {"name": "Alice", "age": 30, "email": "alice@example.com"}},
+# {"id": 2, "name": "Bob", "age": 25, "user": {"name": "Bob", "age": 25, "email": "bob@example.com"}}
+# ]
+
docetl/operations/unnest.py
7 + 8 + 9 + 10 + 11 + 12 + 13 + 14 + 15 + 16 + 17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 |
|
execute(input_data)
+
+Executes the unnest operation on the input data.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ input_data
+ |
+
+ List[Dict]
+ |
+
+
+
+ A list of dictionaries to process. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ List[Dict]
+ |
+
+
+
+ Tuple[List[Dict], float]: A tuple containing the processed list of dictionaries + |
+
+ float
+ |
+
+
+
+ and a float value (always 0 in this implementation). + |
+
Raises:
+Type | +Description | +
---|---|
+ KeyError
+ |
+
+
+
+ If the specified unnest_key is not found in an input dictionary. + |
+
+ TypeError
+ |
+
+
+
+ If the value of the unnest_key is not iterable (list, tuple, set, or dict). + |
+
+ ValueError
+ |
+
+
+
+ If unnesting a dictionary and 'expand_fields' is not provided in the config. + |
+
The operation supports unnesting of both list-like values and dictionary values:
+For list-like values (list, tuple, set): + Each element in the list becomes a separate dictionary in the output.
+For dictionary values: + The operation expands specified fields from the nested dictionary into the parent dictionary. + The 'expand_fields' config parameter must be provided to specify which fields to expand.
+Examples: +
# Unnesting a list
+unnest_op = UnnestOperation({"unnest_key": "colors"})
+input_data = [
+ {"id": 1, "colors": ["red", "blue"]},
+ {"id": 2, "colors": ["green"]}
+]
+result, _ = unnest_op.execute(input_data)
+# Result will be:
+# [
+# {"id": 1, "colors": "red"},
+# {"id": 1, "colors": "blue"},
+# {"id": 2, "colors": "green"}
+# ]
+
+# Unnesting a dictionary
+unnest_op = UnnestOperation({"unnest_key": "details", "expand_fields": ["color", "size"]})
+input_data = [
+ {"id": 1, "details": {"color": "red", "size": "large", "stock": 5}},
+ {"id": 2, "details": {"color": "blue", "size": "medium", "stock": 3}}
+]
+result, _ = unnest_op.execute(input_data)
+# Result will be:
+# [
+# {"id": 1, "details": {"color": "red", "size": "large", "stock": 5}, "color": "red", "size": "large"},
+# {"id": 2, "details": {"color": "blue", "size": "medium", "stock": 3}, "color": "blue", "size": "medium"}
+# ]
+
Note: When unnesting dictionaries, the original nested dictionary is preserved in the output, +and the specified fields are expanded into the parent dictionary.
+ +docetl/operations/unnest.py
82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 |
|
syntax_check()
+
+Checks if the required configuration key is present in the operation's config.
+ + +Raises:
+Type | +Description | +
---|---|
+ ValueError
+ |
+
+
+
+ If the required 'unnest_key' is missing from the configuration. + |
+
docetl/operations/unnest.py
67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 |
|
docetl.optimizers.map_optimizer.optimizer.MapOptimizer
+
+
+A class for optimizing map operations in data processing pipelines.
+This optimizer analyzes the input operation configuration and data, +and generates optimized plans for executing the operation. It can +create plans for chunking, metadata extraction, gleaning, chain +decomposition, and parallel execution.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
config |
+
+ Dict[str, Any]
+ |
+
+
+
+ The configuration dictionary for the optimizer. + |
+
console |
+
+ Console
+ |
+
+
+
+ A Rich console object for pretty printing. + |
+
llm_client |
+
+ LLMClient
+ |
+
+
+
+ A client for interacting with a language model. + |
+
_run_operation |
+
+ Callable
+ |
+
+
+
+ A function to execute operations. + |
+
max_threads |
+
+ int
+ |
+
+
+
+ The maximum number of threads to use for parallel execution. + |
+
timeout |
+
+ int
+ |
+
+
+
+ The timeout in seconds for operation execution. + |
+
docetl/optimizers/map_optimizer/optimizer.py
21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 |
|
__init__(runner, config, console, llm_client, max_threads, run_operation, timeout=10, is_filter=False)
+
+Initialize the MapOptimizer.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ config
+ |
+
+ Dict[str, Any]
+ |
+
+
+
+ The configuration dictionary for the optimizer. + |
+ + required + | +
+ console
+ |
+
+ Console
+ |
+
+
+
+ A Rich console object for pretty printing. + |
+ + required + | +
+ llm_client
+ |
+
+ LLMClient
+ |
+
+
+
+ A client for interacting with a language model. + |
+ + required + | +
+ max_threads
+ |
+
+ int
+ |
+
+
+
+ The maximum number of threads to use for parallel execution. + |
+ + required + | +
+ run_operation
+ |
+
+ Callable
+ |
+
+
+
+ A function to execute operations. + |
+ + required + | +
+ timeout
+ |
+
+ int
+ |
+
+
+
+ The timeout in seconds for operation execution. Defaults to 10. + |
+
+ 10
+ |
+
+ is_filter
+ |
+
+ bool
+ |
+
+
+
+ If True, the operation is a filter operation. Defaults to False. + |
+
+ False
+ |
+
docetl/optimizers/map_optimizer/optimizer.py
40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 |
|
optimize(op_config, input_data)
+
+Optimize the given operation configuration for the input data. +This method analyzes the operation and input data, generates various +optimization plans, evaluates them, and returns the best plan along +with its output. A key part of this process is creating a custom +validator prompt for evaluation. The validator prompt is generated +based on the specific task, input data, and output data. It serves +as a critical tool for assessing the quality and correctness of +each optimization plan's output. This custom prompt ensures that +the evaluation is tailored to the unique requirements and nuances +of the given operation. The types of optimization plans include:
+Improved Prompt Plan: Enhances the original prompt based on evaluation, aiming to improve output quality.
+Chunk Size Plan: Splits input data into chunks of different sizes, + processes each chunk separately, and then combines the results. This + can improve performance for large inputs.
+Gleaning Plans: Implements an iterative refinement process where the + output is validated and improved over multiple rounds, enhancing accuracy.
+Chain Decomposition Plan: Breaks down complex operations into a series + of simpler sub-operations, potentially improving overall performance + and interpretability.
+Parallel Map Plan: Decomposes the task into subtasks that can be + executed in parallel, potentially speeding up processing for + independent operations.
+The method generates these plans, evaluates their performance using +a custom validator, and selects the best performing plan based on +output quality and execution time.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ op_config
+ |
+
+ Dict[str, Any]
+ |
+
+
+
+ The configuration of the operation to optimize. + |
+ + required + | +
+ input_data
+ |
+
+ List[Dict[str, Any]]
+ |
+
+
+
+ The input data for the operation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ List[Dict[str, Any]]
+ |
+
+
+
+ Tuple[List[Dict[str, Any]], List[Dict[str, Any]], float]: A tuple containing + |
+
+ List[Dict[str, Any]]
+ |
+
+
+
+ the best optimization plan and its output. The plan is a list of + |
+
+ float
+ |
+
+
+
+ operation configurations that achieve the best performance. + |
+
+ Tuple[List[Dict[str, Any]], List[Dict[str, Any]], float]
+ |
+
+
+
+ The cost is the cost of the optimizer (from possibly synthesizing resolves). + |
+
docetl/optimizers/map_optimizer/optimizer.py
208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 +338 +339 +340 +341 +342 +343 +344 +345 +346 +347 +348 +349 +350 +351 +352 +353 +354 +355 +356 +357 +358 +359 +360 +361 +362 +363 +364 +365 +366 +367 +368 +369 +370 +371 +372 +373 +374 +375 +376 +377 +378 +379 +380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 +438 +439 +440 +441 +442 +443 +444 +445 +446 +447 +448 +449 +450 +451 +452 +453 +454 +455 +456 +457 +458 +459 +460 +461 +462 +463 +464 +465 +466 +467 +468 +469 |
|
should_optimize(op_config, input_data)
+
+Determine if the given operation configuration should be optimized.
+ +docetl/optimizers/map_optimizer/optimizer.py
89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 |
|
docetl.optimizers.reduce_optimizer.ReduceOptimizer
+
+
+A class that optimizes reduce operations in data processing pipelines.
+This optimizer analyzes the input and output of a reduce operation, creates and evaluates +multiple reduce plans, and selects the best plan for optimizing the operation's performance.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
config |
+
+ Dict[str, Any]
+ |
+
+
+
+ Configuration dictionary for the optimizer. + |
+
console |
+
+ Console
+ |
+
+
+
+ Rich console object for pretty printing. + |
+
llm_client |
+
+ LLMClient
+ |
+
+
+
+ Client for interacting with a language model. + |
+
_run_operation |
+
+ Callable
+ |
+
+
+
+ Function to run an operation. + |
+
max_threads |
+
+ int
+ |
+
+
+
+ Maximum number of threads to use for parallel processing. + |
+
num_fold_prompts |
+
+ int
+ |
+
+
+
+ Number of fold prompts to generate. + |
+
num_samples_in_validation |
+
+ int
+ |
+
+
+
+ Number of samples to use in validation. + |
+
docetl/optimizers/reduce_optimizer.py
21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 + 120 + 121 + 122 + 123 + 124 + 125 + 126 + 127 + 128 + 129 + 130 + 131 + 132 + 133 + 134 + 135 + 136 + 137 + 138 + 139 + 140 + 141 + 142 + 143 + 144 + 145 + 146 + 147 + 148 + 149 + 150 + 151 + 152 + 153 + 154 + 155 + 156 + 157 + 158 + 159 + 160 + 161 + 162 + 163 + 164 + 165 + 166 + 167 + 168 + 169 + 170 + 171 + 172 + 173 + 174 + 175 + 176 + 177 + 178 + 179 + 180 + 181 + 182 + 183 + 184 + 185 + 186 + 187 + 188 + 189 + 190 + 191 + 192 + 193 + 194 + 195 + 196 + 197 + 198 + 199 + 200 + 201 + 202 + 203 + 204 + 205 + 206 + 207 + 208 + 209 + 210 + 211 + 212 + 213 + 214 + 215 + 216 + 217 + 218 + 219 + 220 + 221 + 222 + 223 + 224 + 225 + 226 + 227 + 228 + 229 + 230 + 231 + 232 + 233 + 234 + 235 + 236 + 237 + 238 + 239 + 240 + 241 + 242 + 243 + 244 + 245 + 246 + 247 + 248 + 249 + 250 + 251 + 252 + 253 + 254 + 255 + 256 + 257 + 258 + 259 + 260 + 261 + 262 + 263 + 264 + 265 + 266 + 267 + 268 + 269 + 270 + 271 + 272 + 273 + 274 + 275 + 276 + 277 + 278 + 279 + 280 + 281 + 282 + 283 + 284 + 285 + 286 + 287 + 288 + 289 + 290 + 291 + 292 + 293 + 294 + 295 + 296 + 297 + 298 + 299 + 300 + 301 + 302 + 303 + 304 + 305 + 306 + 307 + 308 + 309 + 310 + 311 + 312 + 313 + 314 + 315 + 316 + 317 + 318 + 319 + 320 + 321 + 322 + 323 + 324 + 325 + 326 + 327 + 328 + 329 + 330 + 331 + 332 + 333 + 334 + 335 + 336 + 337 + 338 + 339 + 340 + 341 + 342 + 343 + 344 + 345 + 346 + 347 + 348 + 349 + 350 + 351 + 352 + 353 + 354 + 355 + 356 + 357 + 358 + 359 + 360 + 361 + 362 + 363 + 364 + 365 + 366 + 367 + 368 + 369 + 370 + 371 + 372 + 373 + 374 + 375 + 376 + 377 + 378 + 379 + 380 + 381 + 382 + 383 + 384 + 385 + 386 + 387 + 388 + 389 + 390 + 391 + 392 + 393 + 394 + 395 + 396 + 397 + 398 + 399 + 400 + 401 + 402 + 403 + 404 + 405 + 406 + 407 + 408 + 409 + 410 + 411 + 412 + 413 + 414 + 415 + 416 + 417 + 418 + 419 + 420 + 421 + 422 + 423 + 424 + 425 + 426 + 427 + 428 + 429 + 430 + 431 + 432 + 433 + 434 + 435 + 436 + 437 + 438 + 439 + 440 + 441 + 442 + 443 + 444 + 445 + 446 + 447 + 448 + 449 + 450 + 451 + 452 + 453 + 454 + 455 + 456 + 457 + 458 + 459 + 460 + 461 + 462 + 463 + 464 + 465 + 466 + 467 + 468 + 469 + 470 + 471 + 472 + 473 + 474 + 475 + 476 + 477 + 478 + 479 + 480 + 481 + 482 + 483 + 484 + 485 + 486 + 487 + 488 + 489 + 490 + 491 + 492 + 493 + 494 + 495 + 496 + 497 + 498 + 499 + 500 + 501 + 502 + 503 + 504 + 505 + 506 + 507 + 508 + 509 + 510 + 511 + 512 + 513 + 514 + 515 + 516 + 517 + 518 + 519 + 520 + 521 + 522 + 523 + 524 + 525 + 526 + 527 + 528 + 529 + 530 + 531 + 532 + 533 + 534 + 535 + 536 + 537 + 538 + 539 + 540 + 541 + 542 + 543 + 544 + 545 + 546 + 547 + 548 + 549 + 550 + 551 + 552 + 553 + 554 + 555 + 556 + 557 + 558 + 559 + 560 + 561 + 562 + 563 + 564 + 565 + 566 + 567 + 568 + 569 + 570 + 571 + 572 + 573 + 574 + 575 + 576 + 577 + 578 + 579 + 580 + 581 + 582 + 583 + 584 + 585 + 586 + 587 + 588 + 589 + 590 + 591 + 592 + 593 + 594 + 595 + 596 + 597 + 598 + 599 + 600 + 601 + 602 + 603 + 604 + 605 + 606 + 607 + 608 + 609 + 610 + 611 + 612 + 613 + 614 + 615 + 616 + 617 + 618 + 619 + 620 + 621 + 622 + 623 + 624 + 625 + 626 + 627 + 628 + 629 + 630 + 631 + 632 + 633 + 634 + 635 + 636 + 637 + 638 + 639 + 640 + 641 + 642 + 643 + 644 + 645 + 646 + 647 + 648 + 649 + 650 + 651 + 652 + 653 + 654 + 655 + 656 + 657 + 658 + 659 + 660 + 661 + 662 + 663 + 664 + 665 + 666 + 667 + 668 + 669 + 670 + 671 + 672 + 673 + 674 + 675 + 676 + 677 + 678 + 679 + 680 + 681 + 682 + 683 + 684 + 685 + 686 + 687 + 688 + 689 + 690 + 691 + 692 + 693 + 694 + 695 + 696 + 697 + 698 + 699 + 700 + 701 + 702 + 703 + 704 + 705 + 706 + 707 + 708 + 709 + 710 + 711 + 712 + 713 + 714 + 715 + 716 + 717 + 718 + 719 + 720 + 721 + 722 + 723 + 724 + 725 + 726 + 727 + 728 + 729 + 730 + 731 + 732 + 733 + 734 + 735 + 736 + 737 + 738 + 739 + 740 + 741 + 742 + 743 + 744 + 745 + 746 + 747 + 748 + 749 + 750 + 751 + 752 + 753 + 754 + 755 + 756 + 757 + 758 + 759 + 760 + 761 + 762 + 763 + 764 + 765 + 766 + 767 + 768 + 769 + 770 + 771 + 772 + 773 + 774 + 775 + 776 + 777 + 778 + 779 + 780 + 781 + 782 + 783 + 784 + 785 + 786 + 787 + 788 + 789 + 790 + 791 + 792 + 793 + 794 + 795 + 796 + 797 + 798 + 799 + 800 + 801 + 802 + 803 + 804 + 805 + 806 + 807 + 808 + 809 + 810 + 811 + 812 + 813 + 814 + 815 + 816 + 817 + 818 + 819 + 820 + 821 + 822 + 823 + 824 + 825 + 826 + 827 + 828 + 829 + 830 + 831 + 832 + 833 + 834 + 835 + 836 + 837 + 838 + 839 + 840 + 841 + 842 + 843 + 844 + 845 + 846 + 847 + 848 + 849 + 850 + 851 + 852 + 853 + 854 + 855 + 856 + 857 + 858 + 859 + 860 + 861 + 862 + 863 + 864 + 865 + 866 + 867 + 868 + 869 + 870 + 871 + 872 + 873 + 874 + 875 + 876 + 877 + 878 + 879 + 880 + 881 + 882 + 883 + 884 + 885 + 886 + 887 + 888 + 889 + 890 + 891 + 892 + 893 + 894 + 895 + 896 + 897 + 898 + 899 + 900 + 901 + 902 + 903 + 904 + 905 + 906 + 907 + 908 + 909 + 910 + 911 + 912 + 913 + 914 + 915 + 916 + 917 + 918 + 919 + 920 + 921 + 922 + 923 + 924 + 925 + 926 + 927 + 928 + 929 + 930 + 931 + 932 + 933 + 934 + 935 + 936 + 937 + 938 + 939 + 940 + 941 + 942 + 943 + 944 + 945 + 946 + 947 + 948 + 949 + 950 + 951 + 952 + 953 + 954 + 955 + 956 + 957 + 958 + 959 + 960 + 961 + 962 + 963 + 964 + 965 + 966 + 967 + 968 + 969 + 970 + 971 + 972 + 973 + 974 + 975 + 976 + 977 + 978 + 979 + 980 + 981 + 982 + 983 + 984 + 985 + 986 + 987 + 988 + 989 + 990 + 991 + 992 + 993 + 994 + 995 + 996 + 997 + 998 + 999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 +1726 +1727 +1728 +1729 +1730 +1731 +1732 +1733 +1734 +1735 +1736 +1737 +1738 +1739 +1740 +1741 +1742 +1743 +1744 +1745 +1746 +1747 +1748 +1749 +1750 +1751 +1752 +1753 +1754 +1755 +1756 +1757 +1758 +1759 +1760 +1761 +1762 +1763 +1764 +1765 +1766 +1767 +1768 +1769 +1770 +1771 +1772 +1773 +1774 +1775 +1776 +1777 +1778 +1779 +1780 +1781 +1782 +1783 +1784 +1785 +1786 +1787 +1788 +1789 +1790 +1791 +1792 +1793 +1794 +1795 +1796 +1797 +1798 +1799 +1800 +1801 +1802 +1803 +1804 +1805 +1806 +1807 +1808 +1809 +1810 +1811 +1812 +1813 +1814 +1815 +1816 +1817 +1818 +1819 +1820 +1821 +1822 +1823 +1824 +1825 +1826 +1827 +1828 +1829 +1830 +1831 +1832 +1833 +1834 +1835 +1836 +1837 +1838 +1839 +1840 +1841 +1842 +1843 +1844 +1845 +1846 +1847 +1848 +1849 +1850 +1851 +1852 +1853 +1854 +1855 +1856 +1857 +1858 +1859 +1860 +1861 +1862 +1863 +1864 +1865 +1866 +1867 +1868 +1869 +1870 +1871 +1872 +1873 +1874 +1875 +1876 +1877 +1878 +1879 |
|
__init__(runner, config, console, llm_client, max_threads, run_operation, num_fold_prompts=1, num_samples_in_validation=10, status=None)
+
+Initialize the ReduceOptimizer.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ config
+ |
+
+ Dict[str, Any]
+ |
+
+
+
+ Configuration dictionary for the optimizer. + |
+ + required + | +
+ console
+ |
+
+ Console
+ |
+
+
+
+ Rich console object for pretty printing. + |
+ + required + | +
+ llm_client
+ |
+
+ LLMClient
+ |
+
+
+
+ Client for interacting with a language model. + |
+ + required + | +
+ max_threads
+ |
+
+ int
+ |
+
+
+
+ Maximum number of threads to use for parallel processing. + |
+ + required + | +
+ run_operation
+ |
+
+ Callable
+ |
+
+
+
+ Function to run an operation. + |
+ + required + | +
+ num_fold_prompts
+ |
+
+ int
+ |
+
+
+
+ Number of fold prompts to generate. Defaults to 1. + |
+
+ 1
+ |
+
+ num_samples_in_validation
+ |
+
+ int
+ |
+
+
+
+ Number of samples to use in validation. Defaults to 10. + |
+
+ 10
+ |
+
docetl/optimizers/reduce_optimizer.py
38 +39 +40 +41 +42 +43 +44 +45 +46 +47 +48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 |
|
optimize(op_config, input_data, level=1)
+
+Optimize the reduce operation based on the given configuration and input data.
+This method performs the following steps: +1. Run the original operation +2. Generate a validator prompt +3. Validate the output +4. If improvement is needed: + a. Evaluate if decomposition is beneficial + b. If decomposition is beneficial, recursively optimize each sub-operation + c. If not, proceed with single operation optimization +5. Run the optimized operation(s)
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ op_config
+ |
+
+ Dict[str, Any]
+ |
+
+
+
+ Configuration for the reduce operation. + |
+ + required + | +
+ input_data
+ |
+
+ List[Dict[str, Any]]
+ |
+
+
+
+ Input data for the reduce operation. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ List[Dict[str, Any]]
+ |
+
+
+
+ Tuple[List[Dict[str, Any]], List[Dict[str, Any]], float]: A tuple containing the list of optimized configurations + |
+
+ List[Dict[str, Any]]
+ |
+
+
+
+ and the list of outputs from the optimized operation(s), and the cost of the operation due to synthesizing any resolve operations. + |
+
docetl/optimizers/reduce_optimizer.py
142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 |
|
docetl.optimizers.join_optimizer.JoinOptimizer
+
+
+docetl/optimizers/join_optimizer.py
17 + 18 + 19 + 20 + 21 + 22 + 23 + 24 + 25 + 26 + 27 + 28 + 29 + 30 + 31 + 32 + 33 + 34 + 35 + 36 + 37 + 38 + 39 + 40 + 41 + 42 + 43 + 44 + 45 + 46 + 47 + 48 + 49 + 50 + 51 + 52 + 53 + 54 + 55 + 56 + 57 + 58 + 59 + 60 + 61 + 62 + 63 + 64 + 65 + 66 + 67 + 68 + 69 + 70 + 71 + 72 + 73 + 74 + 75 + 76 + 77 + 78 + 79 + 80 + 81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 + 100 + 101 + 102 + 103 + 104 + 105 + 106 + 107 + 108 + 109 + 110 + 111 + 112 + 113 + 114 + 115 + 116 + 117 + 118 + 119 + 120 + 121 + 122 + 123 + 124 + 125 + 126 + 127 + 128 + 129 + 130 + 131 + 132 + 133 + 134 + 135 + 136 + 137 + 138 + 139 + 140 + 141 + 142 + 143 + 144 + 145 + 146 + 147 + 148 + 149 + 150 + 151 + 152 + 153 + 154 + 155 + 156 + 157 + 158 + 159 + 160 + 161 + 162 + 163 + 164 + 165 + 166 + 167 + 168 + 169 + 170 + 171 + 172 + 173 + 174 + 175 + 176 + 177 + 178 + 179 + 180 + 181 + 182 + 183 + 184 + 185 + 186 + 187 + 188 + 189 + 190 + 191 + 192 + 193 + 194 + 195 + 196 + 197 + 198 + 199 + 200 + 201 + 202 + 203 + 204 + 205 + 206 + 207 + 208 + 209 + 210 + 211 + 212 + 213 + 214 + 215 + 216 + 217 + 218 + 219 + 220 + 221 + 222 + 223 + 224 + 225 + 226 + 227 + 228 + 229 + 230 + 231 + 232 + 233 + 234 + 235 + 236 + 237 + 238 + 239 + 240 + 241 + 242 + 243 + 244 + 245 + 246 + 247 + 248 + 249 + 250 + 251 + 252 + 253 + 254 + 255 + 256 + 257 + 258 + 259 + 260 + 261 + 262 + 263 + 264 + 265 + 266 + 267 + 268 + 269 + 270 + 271 + 272 + 273 + 274 + 275 + 276 + 277 + 278 + 279 + 280 + 281 + 282 + 283 + 284 + 285 + 286 + 287 + 288 + 289 + 290 + 291 + 292 + 293 + 294 + 295 + 296 + 297 + 298 + 299 + 300 + 301 + 302 + 303 + 304 + 305 + 306 + 307 + 308 + 309 + 310 + 311 + 312 + 313 + 314 + 315 + 316 + 317 + 318 + 319 + 320 + 321 + 322 + 323 + 324 + 325 + 326 + 327 + 328 + 329 + 330 + 331 + 332 + 333 + 334 + 335 + 336 + 337 + 338 + 339 + 340 + 341 + 342 + 343 + 344 + 345 + 346 + 347 + 348 + 349 + 350 + 351 + 352 + 353 + 354 + 355 + 356 + 357 + 358 + 359 + 360 + 361 + 362 + 363 + 364 + 365 + 366 + 367 + 368 + 369 + 370 + 371 + 372 + 373 + 374 + 375 + 376 + 377 + 378 + 379 + 380 + 381 + 382 + 383 + 384 + 385 + 386 + 387 + 388 + 389 + 390 + 391 + 392 + 393 + 394 + 395 + 396 + 397 + 398 + 399 + 400 + 401 + 402 + 403 + 404 + 405 + 406 + 407 + 408 + 409 + 410 + 411 + 412 + 413 + 414 + 415 + 416 + 417 + 418 + 419 + 420 + 421 + 422 + 423 + 424 + 425 + 426 + 427 + 428 + 429 + 430 + 431 + 432 + 433 + 434 + 435 + 436 + 437 + 438 + 439 + 440 + 441 + 442 + 443 + 444 + 445 + 446 + 447 + 448 + 449 + 450 + 451 + 452 + 453 + 454 + 455 + 456 + 457 + 458 + 459 + 460 + 461 + 462 + 463 + 464 + 465 + 466 + 467 + 468 + 469 + 470 + 471 + 472 + 473 + 474 + 475 + 476 + 477 + 478 + 479 + 480 + 481 + 482 + 483 + 484 + 485 + 486 + 487 + 488 + 489 + 490 + 491 + 492 + 493 + 494 + 495 + 496 + 497 + 498 + 499 + 500 + 501 + 502 + 503 + 504 + 505 + 506 + 507 + 508 + 509 + 510 + 511 + 512 + 513 + 514 + 515 + 516 + 517 + 518 + 519 + 520 + 521 + 522 + 523 + 524 + 525 + 526 + 527 + 528 + 529 + 530 + 531 + 532 + 533 + 534 + 535 + 536 + 537 + 538 + 539 + 540 + 541 + 542 + 543 + 544 + 545 + 546 + 547 + 548 + 549 + 550 + 551 + 552 + 553 + 554 + 555 + 556 + 557 + 558 + 559 + 560 + 561 + 562 + 563 + 564 + 565 + 566 + 567 + 568 + 569 + 570 + 571 + 572 + 573 + 574 + 575 + 576 + 577 + 578 + 579 + 580 + 581 + 582 + 583 + 584 + 585 + 586 + 587 + 588 + 589 + 590 + 591 + 592 + 593 + 594 + 595 + 596 + 597 + 598 + 599 + 600 + 601 + 602 + 603 + 604 + 605 + 606 + 607 + 608 + 609 + 610 + 611 + 612 + 613 + 614 + 615 + 616 + 617 + 618 + 619 + 620 + 621 + 622 + 623 + 624 + 625 + 626 + 627 + 628 + 629 + 630 + 631 + 632 + 633 + 634 + 635 + 636 + 637 + 638 + 639 + 640 + 641 + 642 + 643 + 644 + 645 + 646 + 647 + 648 + 649 + 650 + 651 + 652 + 653 + 654 + 655 + 656 + 657 + 658 + 659 + 660 + 661 + 662 + 663 + 664 + 665 + 666 + 667 + 668 + 669 + 670 + 671 + 672 + 673 + 674 + 675 + 676 + 677 + 678 + 679 + 680 + 681 + 682 + 683 + 684 + 685 + 686 + 687 + 688 + 689 + 690 + 691 + 692 + 693 + 694 + 695 + 696 + 697 + 698 + 699 + 700 + 701 + 702 + 703 + 704 + 705 + 706 + 707 + 708 + 709 + 710 + 711 + 712 + 713 + 714 + 715 + 716 + 717 + 718 + 719 + 720 + 721 + 722 + 723 + 724 + 725 + 726 + 727 + 728 + 729 + 730 + 731 + 732 + 733 + 734 + 735 + 736 + 737 + 738 + 739 + 740 + 741 + 742 + 743 + 744 + 745 + 746 + 747 + 748 + 749 + 750 + 751 + 752 + 753 + 754 + 755 + 756 + 757 + 758 + 759 + 760 + 761 + 762 + 763 + 764 + 765 + 766 + 767 + 768 + 769 + 770 + 771 + 772 + 773 + 774 + 775 + 776 + 777 + 778 + 779 + 780 + 781 + 782 + 783 + 784 + 785 + 786 + 787 + 788 + 789 + 790 + 791 + 792 + 793 + 794 + 795 + 796 + 797 + 798 + 799 + 800 + 801 + 802 + 803 + 804 + 805 + 806 + 807 + 808 + 809 + 810 + 811 + 812 + 813 + 814 + 815 + 816 + 817 + 818 + 819 + 820 + 821 + 822 + 823 + 824 + 825 + 826 + 827 + 828 + 829 + 830 + 831 + 832 + 833 + 834 + 835 + 836 + 837 + 838 + 839 + 840 + 841 + 842 + 843 + 844 + 845 + 846 + 847 + 848 + 849 + 850 + 851 + 852 + 853 + 854 + 855 + 856 + 857 + 858 + 859 + 860 + 861 + 862 + 863 + 864 + 865 + 866 + 867 + 868 + 869 + 870 + 871 + 872 + 873 + 874 + 875 + 876 + 877 + 878 + 879 + 880 + 881 + 882 + 883 + 884 + 885 + 886 + 887 + 888 + 889 + 890 + 891 + 892 + 893 + 894 + 895 + 896 + 897 + 898 + 899 + 900 + 901 + 902 + 903 + 904 + 905 + 906 + 907 + 908 + 909 + 910 + 911 + 912 + 913 + 914 + 915 + 916 + 917 + 918 + 919 + 920 + 921 + 922 + 923 + 924 + 925 + 926 + 927 + 928 + 929 + 930 + 931 + 932 + 933 + 934 + 935 + 936 + 937 + 938 + 939 + 940 + 941 + 942 + 943 + 944 + 945 + 946 + 947 + 948 + 949 + 950 + 951 + 952 + 953 + 954 + 955 + 956 + 957 + 958 + 959 + 960 + 961 + 962 + 963 + 964 + 965 + 966 + 967 + 968 + 969 + 970 + 971 + 972 + 973 + 974 + 975 + 976 + 977 + 978 + 979 + 980 + 981 + 982 + 983 + 984 + 985 + 986 + 987 + 988 + 989 + 990 + 991 + 992 + 993 + 994 + 995 + 996 + 997 + 998 + 999 +1000 +1001 +1002 +1003 +1004 +1005 +1006 +1007 +1008 +1009 +1010 +1011 +1012 +1013 +1014 +1015 +1016 +1017 +1018 +1019 +1020 +1021 +1022 +1023 +1024 +1025 +1026 +1027 +1028 +1029 +1030 +1031 +1032 +1033 +1034 +1035 +1036 +1037 +1038 +1039 +1040 +1041 +1042 +1043 +1044 +1045 +1046 +1047 +1048 +1049 +1050 +1051 +1052 +1053 +1054 +1055 +1056 +1057 +1058 +1059 +1060 +1061 +1062 +1063 +1064 +1065 +1066 +1067 +1068 +1069 +1070 +1071 +1072 +1073 +1074 +1075 +1076 +1077 +1078 +1079 +1080 +1081 +1082 +1083 +1084 +1085 +1086 +1087 +1088 +1089 +1090 +1091 +1092 +1093 +1094 +1095 +1096 +1097 +1098 +1099 +1100 +1101 +1102 +1103 +1104 +1105 +1106 +1107 +1108 +1109 +1110 +1111 +1112 +1113 +1114 +1115 +1116 +1117 +1118 +1119 +1120 +1121 +1122 +1123 +1124 +1125 +1126 +1127 +1128 +1129 +1130 +1131 +1132 +1133 +1134 +1135 +1136 +1137 +1138 +1139 +1140 +1141 +1142 +1143 +1144 +1145 +1146 +1147 +1148 +1149 +1150 +1151 +1152 +1153 +1154 +1155 +1156 +1157 +1158 +1159 +1160 +1161 +1162 +1163 +1164 +1165 +1166 +1167 +1168 +1169 +1170 +1171 +1172 +1173 +1174 +1175 +1176 +1177 +1178 +1179 +1180 +1181 +1182 +1183 +1184 +1185 +1186 +1187 +1188 +1189 +1190 +1191 +1192 +1193 +1194 +1195 +1196 +1197 +1198 +1199 +1200 +1201 +1202 +1203 +1204 +1205 +1206 +1207 +1208 +1209 +1210 +1211 +1212 +1213 +1214 +1215 +1216 +1217 +1218 +1219 +1220 +1221 +1222 +1223 +1224 +1225 +1226 +1227 +1228 +1229 +1230 +1231 +1232 +1233 +1234 +1235 +1236 +1237 +1238 +1239 +1240 +1241 +1242 +1243 +1244 +1245 +1246 +1247 +1248 +1249 +1250 +1251 +1252 +1253 +1254 +1255 +1256 +1257 +1258 +1259 +1260 +1261 +1262 +1263 +1264 +1265 +1266 +1267 +1268 +1269 +1270 +1271 +1272 +1273 +1274 +1275 +1276 +1277 +1278 +1279 +1280 +1281 +1282 +1283 +1284 +1285 +1286 +1287 +1288 +1289 +1290 +1291 +1292 +1293 +1294 +1295 +1296 +1297 +1298 +1299 +1300 +1301 +1302 +1303 +1304 +1305 +1306 +1307 +1308 +1309 +1310 +1311 +1312 +1313 +1314 +1315 +1316 +1317 +1318 +1319 +1320 +1321 +1322 +1323 +1324 +1325 +1326 +1327 +1328 +1329 +1330 +1331 +1332 +1333 +1334 +1335 +1336 +1337 +1338 +1339 +1340 +1341 +1342 +1343 +1344 +1345 +1346 +1347 +1348 +1349 +1350 +1351 +1352 +1353 +1354 +1355 +1356 +1357 +1358 +1359 +1360 +1361 +1362 +1363 +1364 +1365 +1366 +1367 +1368 +1369 +1370 +1371 +1372 +1373 +1374 +1375 +1376 +1377 +1378 +1379 +1380 +1381 +1382 +1383 +1384 +1385 +1386 +1387 +1388 +1389 +1390 +1391 +1392 +1393 +1394 +1395 +1396 +1397 +1398 +1399 +1400 +1401 +1402 +1403 +1404 +1405 +1406 +1407 +1408 +1409 +1410 +1411 +1412 +1413 +1414 +1415 +1416 +1417 +1418 +1419 +1420 +1421 +1422 +1423 +1424 +1425 +1426 +1427 +1428 +1429 +1430 +1431 +1432 +1433 +1434 +1435 +1436 +1437 +1438 +1439 +1440 +1441 +1442 +1443 +1444 +1445 +1446 +1447 +1448 +1449 +1450 +1451 +1452 +1453 +1454 +1455 +1456 +1457 +1458 +1459 +1460 +1461 +1462 +1463 +1464 +1465 +1466 +1467 +1468 +1469 +1470 +1471 +1472 +1473 +1474 +1475 +1476 +1477 +1478 +1479 +1480 +1481 +1482 +1483 +1484 +1485 +1486 +1487 +1488 +1489 +1490 +1491 +1492 +1493 +1494 +1495 +1496 +1497 +1498 +1499 +1500 +1501 +1502 +1503 +1504 +1505 +1506 +1507 +1508 +1509 +1510 +1511 +1512 +1513 +1514 +1515 +1516 +1517 +1518 +1519 +1520 +1521 +1522 +1523 +1524 +1525 +1526 +1527 +1528 +1529 +1530 +1531 +1532 +1533 +1534 +1535 +1536 +1537 +1538 +1539 +1540 +1541 +1542 +1543 +1544 +1545 +1546 +1547 +1548 +1549 +1550 +1551 +1552 +1553 +1554 +1555 +1556 +1557 +1558 +1559 +1560 +1561 +1562 +1563 +1564 +1565 +1566 +1567 +1568 +1569 +1570 +1571 +1572 +1573 +1574 +1575 +1576 +1577 +1578 +1579 +1580 +1581 +1582 +1583 +1584 +1585 +1586 +1587 +1588 +1589 +1590 +1591 +1592 +1593 +1594 +1595 +1596 +1597 +1598 +1599 +1600 +1601 +1602 +1603 +1604 +1605 +1606 +1607 +1608 +1609 +1610 +1611 +1612 +1613 +1614 +1615 +1616 +1617 +1618 +1619 +1620 +1621 +1622 +1623 +1624 +1625 +1626 +1627 +1628 +1629 +1630 +1631 +1632 +1633 +1634 +1635 +1636 +1637 +1638 +1639 +1640 +1641 +1642 +1643 +1644 +1645 +1646 +1647 +1648 +1649 +1650 +1651 +1652 +1653 +1654 +1655 +1656 +1657 +1658 +1659 +1660 +1661 +1662 +1663 +1664 +1665 +1666 +1667 +1668 +1669 +1670 +1671 +1672 +1673 +1674 +1675 +1676 +1677 +1678 +1679 +1680 +1681 +1682 +1683 +1684 +1685 +1686 +1687 +1688 +1689 +1690 +1691 +1692 +1693 +1694 +1695 +1696 +1697 +1698 +1699 +1700 +1701 +1702 +1703 +1704 +1705 +1706 +1707 +1708 +1709 +1710 +1711 +1712 +1713 +1714 +1715 +1716 +1717 +1718 +1719 +1720 +1721 +1722 +1723 +1724 +1725 |
|
should_optimize(input_data)
+
+Determine if the given operation configuration should be optimized.
+ +docetl/optimizers/join_optimizer.py
380 +381 +382 +383 +384 +385 +386 +387 +388 +389 +390 +391 +392 +393 +394 +395 +396 +397 +398 +399 +400 +401 +402 +403 +404 +405 +406 +407 +408 +409 +410 +411 +412 +413 +414 +415 +416 +417 +418 +419 +420 +421 +422 +423 +424 +425 +426 +427 +428 +429 +430 +431 +432 +433 +434 +435 +436 +437 |
|
docetl.schemas.MapOp = map.MapOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.ResolveOp = resolve.ResolveOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.ReduceOp = reduce.ReduceOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.ParallelMapOp = map.ParallelMapOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.FilterOp = filter.FilterOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.EquijoinOp = equijoin.EquijoinOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.SplitOp = split.SplitOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.GatherOp = gather.GatherOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.UnnestOp = unnest.UnnestOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.SampleOp = sample.SampleOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.ClusterOp = cluster.ClusterOperation.schema
+
+
+ module-attribute
+
+
+docetl.schemas.Dataset = dataset.Dataset.schema
+
+
+ module-attribute
+
+
+docetl.schemas.ParsingTool
+
+
+
+ Bases: BaseModel
Represents a parsing tool used for custom data parsing in the pipeline.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
name |
+
+ str
+ |
+
+
+
+ The name of the parsing tool. This should be unique within the pipeline configuration. + |
+
function_code |
+
+ str
+ |
+
+
+
+ The Python code defining the parsing function. This code will be executed + to parse the input data according to the specified logic. It should return a list of strings, where each string is its own document. + |
+
parsing_tools:
+ - name: ocr_parser
+ function_code: |
+ import pytesseract
+ from pdf2image import convert_from_path
+ def ocr_parser(filename: str) -> List[str]:
+ images = convert_from_path(filename)
+ text = ""
+ for image in images:
+ text += pytesseract.image_to_string(image)
+ return [text]
+
docetl/base_schemas.py
19 +20 +21 +22 +23 +24 +25 +26 +27 +28 +29 +30 +31 +32 +33 +34 +35 +36 +37 +38 +39 +40 +41 +42 +43 +44 +45 |
|
docetl.schemas.PipelineStep
+
+
+
+ Bases: BaseModel
Represents a step in the pipeline.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
name |
+
+ str
+ |
+
+
+
+ The name of the step. + |
+
operations |
+
+ List[Union[Dict[str, Any], str]]
+ |
+
+
+
+ A list of operations to be applied in this step. +Each operation can be either a string (the name of the operation) or a dictionary +(for more complex configurations). + |
+
input |
+
+ Optional[str]
+ |
+
+
+
+ The input for this step. It can be either the name of a dataset +or the name of a previous step. If not provided, the step will use the output +of the previous step as its input. + |
+
# Simple step with a single operation
+process_step = PipelineStep(
+ name="process_step",
+ input="my_dataset",
+ operations=["process"]
+)
+
+# Step with multiple operations
+summarize_step = PipelineStep(
+ name="summarize_step",
+ input="process_step",
+ operations=["summarize"]
+)
+
+# Step with a more complex operation configuration
+custom_step = PipelineStep(
+ name="custom_step",
+ input="previous_step",
+ operations=[
+ {
+ "custom_operation": {
+ "model": "gpt-4",
+ "prompt": "Perform a custom analysis on the following text:"
+ }
+ }
+ ]
+)
+
These examples show different ways to configure pipeline steps, from simple +single-operation steps to more complex configurations with custom parameters.
+ + + + + + +docetl/base_schemas.py
48 +49 +50 +51 +52 +53 +54 +55 +56 +57 +58 +59 +60 +61 +62 +63 +64 +65 +66 +67 +68 +69 +70 +71 +72 +73 +74 +75 +76 +77 +78 +79 +80 +81 +82 +83 +84 +85 +86 +87 +88 +89 +90 +91 +92 +93 +94 +95 +96 +97 +98 |
|
docetl.schemas.PipelineOutput
+
+
+
+ Bases: BaseModel
Represents the output configuration for a pipeline.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
type |
+
+ str
+ |
+
+
+
+ The type of output. This could be 'file', 'database', etc. + |
+
path |
+
+ str
+ |
+
+
+
+ The path where the output will be stored. This could be a file path, + database connection string, etc., depending on the type. + |
+
intermediate_dir |
+
+ Optional[str]
+ |
+
+
+
+ The directory to store intermediate results, + if applicable. Defaults to None. + |
+
output = PipelineOutput(
+ type="file",
+ path="/path/to/output.json",
+ intermediate_dir="/path/to/intermediate/results"
+)
+
docetl/base_schemas.py
100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 |
|
docetl.api.Pipeline
+
+
+Represents a complete document processing pipeline.
+ + +Attributes:
+Name | +Type | +Description | +
---|---|---|
name |
+
+ str
+ |
+
+
+
+ The name of the pipeline. + |
+
datasets |
+
+ Dict[str, Dataset]
+ |
+
+
+
+ A dictionary of datasets used in the pipeline, + where keys are dataset names and values are Dataset objects. + |
+
operations |
+
+ List[OpType]
+ |
+
+
+
+ A list of operations to be performed in the pipeline. + |
+
steps |
+
+ List[PipelineStep]
+ |
+
+
+
+ A list of steps that make up the pipeline. + |
+
output |
+
+ PipelineOutput
+ |
+
+
+
+ The output configuration for the pipeline. + |
+
parsing_tools |
+
+ List[ParsingTool]
+ |
+
+
+
+ A list of parsing tools used in the pipeline. + Defaults to an empty list. + |
+
default_model |
+
+ Optional[str]
+ |
+
+
+
+ The default language model to use for operations + that require one. Defaults to None. + |
+
def custom_parser(text: str) -> List[str]:
+ # this will convert the text in the column to uppercase
+ # You should return a list of strings, where each string is a separate document
+ return [text.upper()]
+
+pipeline = Pipeline(
+ name="document_processing_pipeline",
+ datasets={
+ "input_data": Dataset(type="file", path="/path/to/input.json", parsing=[{"name": "custom_parser", "input_key": "content", "output_key": "uppercase_content"}]),
+ },
+ parsing_tools=[custom_parser],
+ operations=[
+ MapOp(
+ name="process",
+ type="map",
+ prompt="Determine what type of document this is: {{ input.uppercase_content }}",
+ output={"schema": {"document_type": "string"}}
+ ),
+ ReduceOp(
+ name="summarize",
+ type="reduce",
+ reduce_key="document_type",
+ prompt="Summarize the processed contents: {% for item in inputs %}{{ item.uppercase_content }} {% endfor %}",
+ output={"schema": {"summary": "string"}}
+ )
+ ],
+ steps=[
+ PipelineStep(name="process_step", input="input_data", operations=["process"]),
+ PipelineStep(name="summarize_step", input="process_step", operations=["summarize"])
+ ],
+ output=PipelineOutput(type="file", path="/path/to/output.json"),
+ default_model="gpt-4o-mini"
+)
+
This example shows a complete pipeline configuration with datasets, operations, +steps, and output settings.
+ + + + + + +docetl/api.py
81 + 82 + 83 + 84 + 85 + 86 + 87 + 88 + 89 + 90 + 91 + 92 + 93 + 94 + 95 + 96 + 97 + 98 + 99 +100 +101 +102 +103 +104 +105 +106 +107 +108 +109 +110 +111 +112 +113 +114 +115 +116 +117 +118 +119 +120 +121 +122 +123 +124 +125 +126 +127 +128 +129 +130 +131 +132 +133 +134 +135 +136 +137 +138 +139 +140 +141 +142 +143 +144 +145 +146 +147 +148 +149 +150 +151 +152 +153 +154 +155 +156 +157 +158 +159 +160 +161 +162 +163 +164 +165 +166 +167 +168 +169 +170 +171 +172 +173 +174 +175 +176 +177 +178 +179 +180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 +219 +220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 +239 +240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 +255 +256 +257 +258 +259 +260 +261 +262 +263 +264 +265 +266 +267 +268 +269 +270 +271 +272 +273 +274 +275 +276 +277 +278 +279 +280 +281 +282 +283 +284 +285 +286 +287 +288 +289 +290 +291 +292 +293 +294 +295 +296 +297 +298 +299 +300 +301 +302 +303 +304 +305 +306 +307 +308 +309 +310 +311 +312 +313 +314 +315 +316 +317 +318 +319 +320 +321 +322 +323 +324 +325 +326 +327 +328 +329 +330 +331 +332 +333 +334 +335 +336 +337 |
|
optimize(max_threads=None, model='gpt-4o', resume=False, timeout=60)
+
+Optimize the pipeline using the Optimizer.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ max_threads
+ |
+
+ Optional[int]
+ |
+
+
+
+ Maximum number of threads to use for optimization. + |
+
+ None
+ |
+
+ model
+ |
+
+ str
+ |
+
+
+
+ The model to use for optimization. Defaults to "gpt-4o". + |
+
+ 'gpt-4o'
+ |
+
+ resume
+ |
+
+ bool
+ |
+
+
+
+ Whether to resume optimization from a previous state. Defaults to False. + |
+
+ False
+ |
+
+ timeout
+ |
+
+ int
+ |
+
+
+
+ Timeout for optimization in seconds. Defaults to 60. + |
+
+ 60
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
Pipeline |
+ Pipeline
+ |
+
+
+
+ An optimized version of the pipeline. + |
+
docetl/api.py
180 +181 +182 +183 +184 +185 +186 +187 +188 +189 +190 +191 +192 +193 +194 +195 +196 +197 +198 +199 +200 +201 +202 +203 +204 +205 +206 +207 +208 +209 +210 +211 +212 +213 +214 +215 +216 +217 +218 |
|
run(max_threads=None)
+
+Run the pipeline using the DSLRunner.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ max_threads
+ |
+
+ Optional[int]
+ |
+
+
+
+ Maximum number of threads to use for execution. + |
+
+ None
+ |
+
Returns:
+Name | Type | +Description | +
---|---|---|
float |
+ float
+ |
+
+
+
+ The total cost of running the pipeline. + |
+
docetl/api.py
220 +221 +222 +223 +224 +225 +226 +227 +228 +229 +230 +231 +232 +233 +234 +235 +236 +237 +238 |
|
to_yaml(path)
+
+Convert the Pipeline object to a YAML string and save it to a file.
+ + +Parameters:
+Name | +Type | +Description | +Default | +
---|---|---|---|
+ path
+ |
+
+ str
+ |
+
+
+
+ Path to save the YAML file. + |
+ + required + | +
Returns:
+Type | +Description | +
---|---|
+ None
+ |
+
+
+
+ None + |
+
docetl/api.py
240 +241 +242 +243 +244 +245 +246 +247 +248 +249 +250 +251 +252 +253 +254 |
|
8i0kok zAtgDiWNiG3yfP9r0R#pR{tk#&zdwfso}JeV!9P?&kdQ;9!4Py60-n$242vtLsWX5$ zkp%z?8^C}7)-Ux~+^}0D^W|N(^RJzvi!qTdLoVe$=ej)tPWdE|b0&Y{Fy>FG&zZo{ zcS?mD+uDaUL`RgKE&=5XR5h>iiHQtnm5Nm~o&3gB__ma>`~M?HP|D2d4*}gW{WBnI zhHPHU@iJVRIJ!Lpze~Nc82l|*3k@uztt=kQ{VL2QeDUsgR5<*InW%3qx9~@0phgj` z)^4r1D3lLwK9%V)AcJ6Az(!&oY&Pxv-tISqo&Cea=!I0}dTo6}rJ^luW!xt4uXP$% zQ_=mgQm9caF_F@IuQ&cUgtb*B0*0J%M4qQxhjiGWrtFdgOY?lpUevp};xJjmzlgVH z%l8Gm9z+4Q*3b%0o4*YYAb6?7d+z=Op&A}s6j)hDHU{T!QLj{%++Kri;vBIW`tp4c zr1|rIKUliIGS}Atqn>K=nBhC|DzH=J59npBYhWk>NafMDkCZ05ydvJZ|3j38j%u(# z_ZN{&_y=6t<({l5l#-g7BPLI2zym;_p$2LkgJo~*3A8kiquF`nscmD9)y1cu5K^E) z-(X?aqs%0;mF4FIj|q9h#;T9bo+(w!= &W@ z?(TE8+uQCRc3R(@R{0QSym}(#R!Ij>(~*k}wHpbMD8y<>hr+pLlZzD+NRU~0?JS(; ze|#$c;0IFQc(?KUT $_0py(V<4dLugIyifU_Zw7fQuDoi} zstj@4b_{uA`i_E-$q(WB{sgw*JQ-w=A#UjtXC?t%51n?%%0qmTcJBXzar|{M@5s4{ zR1dLRebVU#`RG1rIR?AVwultf&WL34{Mz%rFHuA5p^Q>4=GdL%`9)5IUEBW!HCVvA ztVv2Vm1&{^D6)65Yd(0>|B1oULstQ{_ Dz> zw@LW__ebXj`k)wyyQI0F2Owx_|GI=f<6^iSx418qSEtAW0;Gd`#)gLS_* 5!;hJHqoa<68tc>+)KPY@AYi+x9P(Q70iJuqAJjHSRUm zRr2rD-Q8d!^_)ICpr^|2MeX)?Q|R M)>zb8P77`G6Mt90f5!2 zlh9*32?Kiw;>6>yS^lR5cK~z@-g*bSau)Xf1K9<}_XM1#6~mOS*xptEs);xAf52=1 zT;Tqw58-${-(u`n>%R6gA<$pU%w`G&ySc}l`lrJ&j33E#rKF7~%6KS)#;>gZulU4@ zOuS-3eKvF(d_;-dtEsx}?YN9^aO-mn8w<)gfQxDpF9KP(+VEoHpkej3hriMZ=ev)p z2soxa`kZqQ&X@p=8-#)? 82jvj-o6Y(R2_1z@4 LKKqX@|9ck4i@fol)O7~-#PLmmHsc^AXV(j za(8q~q-#q^^T9ojU<+1K`@%}d0UDGf^uh x+Kf7Qf( z{wSZLn6}popSdfGyX4n6?qd5XTbnQ_jf)~Hw4B0wD{i39{#e5zJCulmbRv(8?C`R@ zG5jfU+0b;Z7%?z&fCy#|K>mN30}X&ayP1!E!Z~$RvBPj|jIEtMU-Od>ZD-`Xrck!< z|4Sz@W`USOZ$)Kt7}*itJvBSACtL$T*Xs|(em7`A4plMiTuephmG&$YjIr{6+dbl? z4lu>KCYNx$mHRrbCwrIly^rfouKXw}*-3otN5jjmE>q|)BnTH4j2@VhJZP9_=H+)Z zI4(0n?0hd#N|W!#spB~u<)y$h_Is{zcy*%ePyjyw{*eMDhXbbIR3-NKS2%V 2%N?}b507_$R}* Ac<91Q+7cHF#v4k`Sb|66fO6;^w!ZvKxp=-9C-RY~sL&b$&ng@$d1vk#G2 z0k^3ss~Bup3uwi`(EN=iwgYvd^y6Emn981TFhuwZMJGO-y{xfA((?&vurCiV!?1ag z#gn%2l2?nu4$swnwHOx-*@K?>*%3Dn7Kx;u#?^EyVxv-rNl#L~(>Hj^oPZ&(!L1;o z19RbbxwX?HFTDSHJyagyxXCR8BmNHqy#&&7NOgl!irb5ZKG=M!Tyl2dJi=qEi|)HE zYuawd)X%lCZ{sf1isPG&L>i$ R7_Y*yLxU79}TwzK7Ebo90m)vg&W;MxVn=z+;9f;ZzKs!G%s zl!!{WN4v(~p?pzo_h7f?rgLi8+XKun{~mOePWU!4E*NYYbGL0*f?0pvc tws_y`nd}4#P)c0=a o{uOTp_P~N8&83 z?~O0)t*qtcvS_!Wd_3B;NS%W3ita7BIXzYAl2@YOSZ_nSfqLH{EwoA?w?&zBRvczB z?3C$miDX(mOBB}!^@%XhFYaClV%a2cPe$N#%J8G#K;liDc(Gl71hQTc*NI7>Zu5UG zq^$m7VMjaK`f7Ltf%xLtE28BL&bQI<(w)0YtJJSdFB!YN&I;CusNM=OHQ7DnI! j+T2WC6-|Oh5&$ z@__9r@WAcfdj5HBRY2u9>N2&T;&CnnFV=*|V9MQt%Y@T%4CHp3wfv7h-+t~h^~`## z!vbJ)`&}5c3_fgQIOL=n@%?$3S|yOV4-f9#MG09GG#iO$+Xz~9cCO0WbT+q!)}Y<8 z^-LX5vVfW`zk9w@t>@nx&S{Y;CS@kmRu=}%3Idfsan=v3A8$uHX3hJ~4zAUcr*|92 z2EcMHPY?C_p~1jKM-8)!wzvyzsz|>jWSp)}?BD?ft~-*Zf| jaR4q|K6H8B3e~e*1_u)q1i0@DFNu>2_Wp`oXe=1XEXk0D0f#SX0 z7r=Z7=fPJ5Lx!V<(g{G?ImG@Pjt$)1P3;>Wo7<_J^VRa2pw^^o`0lP9OtevJcHnV@ zeF=Ksa(Y4(8H>4>cUFx5F%0SgCV$0j-~BIBG>M#AKvql<-?E%hc~2eR?e6z)^T`>& z8qFV&lxKM=W%bcXI*2D^a3kcAN0s^czkUMJU>-O)O9N5N8e|PJy}5Sf;~U4a0yEX~ z?>ovtVhRQ?JI#Ago6k?5<@9@LDb0u3#t6uaihHZ{PYB{YyyvG(U{hCl1qf0!$6w6) zPV0+i pMRjIz5M&wEl$b#*g*;iei00ffd}ExJYfq+dGg>xjmU-s3%`* z5x6Ujl1KX|6@J+U_SK#HAsm1n>RHlR1g40ovC>k>Kj!^IC1*d@x{agyD@<(Z2iEm_ zb(oWS;2rKW6TBg4GuOL6Mj&5*0R-F*#@!-+&aHpBsLF1xru8pHak|MUyHGZ5gb?bl z0}hDH453xT`Pk4y9=FJWp@B?#-G_zvQ89TnA~5CEuOW~{*lV)(@6)nMB{t`@-Dy}o zh13xT2CBS9H3TwBAz*yNkMA lNeuGscX0(|FwDY>ZyH$BGCCMDlm?e9Bc ziYCgqLj!T2A!nUcLAY42IZt$2j!j@@)Z py45=`3cB z1o|^(GvrUE<` }rCnfY*jM ?thudo%XW^{h $Iss;Sg^IcC}MGmi?T9! z80&(Z-?{_;rz4ZYd|dMM_UDtO_KWr|lZ`Y*lJ4*?Ud+TFWQuyL_;NM5Re~MmGk05- zN%X=xWDj`H4S_BmLZ0@ViGjI|rE5$H^zvrQN~;YVxM a2f5iQE zisi`IMIF8Oo{LSEy_ L#6-gkd6>_8xZ+k^kgUw2_^{4x^)-VDGHXwKsz$)+E@^xSg)gGTLfbjXRX z Y*M@NJcx4kiM zJAC^qfiw5?^Syd^T*Hwilt78PHVn0DB%I75)D vV@~CL@lJDq#5(P zQWRlAb_uia0OWXa4UeV0HbEtuBV;?;>zKNtt`y?de8B&4y$w?=0BQVz` 1NhPizr6eYiO z1N9joqEFHXKSP(Ep^yq^zc9_~@};@QU#k0kTM4fIwkTPjv{#dsKDCo?4+mwksDn2; z9mM=4*ID2v&ZvG6rCQ@)-u-9C+X&)0q<}qOqYwolu0Z ExE<44w`RV8 z$;A8 p<7QgCgD)rQ7Oa@y~{G}7x|a4YN(qs zl`R;B2%IR5ACgDUVN4q(-fq}Y)%u@+TtwCOURgh=D6 -8N%)%346YX>Q(Ef^Io&AnDo&`hp1f>Ry95UenSLq z9cDeGaJ~Z)%hW b zCoL>06Rhd@&W>QnE2Ne=r}ALj+hPcMpqgZ}ZWj%|W3*JJLo;$Ln0JBd{$un1vA5%e zWR-r3KRi3*f$H@tjkfsTa=z)BApgM1cQfDA44GN{z_pb;L;ac#6Ufp@ljDfTaXWJ8 z=FdUfa!66=ew0`y8&^hlRfWR5xX=_8=#S~SA76hmqw+})?;z3g%X5@eFI9j}^loJ! zsFUfxHJhOaJsCsAj}844`(u4Jnw4$>KO0A}t>>BKpVXB0WvyhTLm%zI`p00NexSHT zL4VI17QEWs8@20{Bm>6{qIBE?yk$4j)B2^TvcxvC1~Yw|4>?e1aB3sAt$;Hx2#%YW z$q6*famgq!gp1|lA~5!ieT6-~tHVz$xviTa`=JoSg`+i@jQ_N4^1wbdYS2Bv5C;da zNnYVgOQ?61ftIost$1Xy^|G~ZOF|sIB3 Fqz+?@1Ac)rN-Jy!QUrz@25JQ4sr<2>{mOemKj)1`PYY$}L|gZZ zFi{ gV`BR4Z=qJerpDR0SK6G1S!<9#u0AQ~j~iLuK9Rm(3cC9d z
yf7v7n_C6lvAajUJeO#qYDN zB53!HD>#-SnRiD{4D79~Z^l-v1EcUJY>5#=kl$33CN%H;kaSR+gP=7el@)0o-uyTI zLRZ8Sy~+ MZ+r7-~P&jQ7 ! zwK=eTl}jS5vf;)i_!R18@?lyc&zTej(|(FSZ#jD&5D+xo>0$YZXlHvw9DGV-e#^1F zRT555SJ83o?CD0atp j3tzqWhJKo#7}cLCxhKlpQb#wDsPh*av}Yl)PUwGtyQ3 zF2CLRr-4}=qWp4t?^yr!HD#qnjKdd+<|`Y!seD?DeohP-UJm_zyY)!X8@HAnWYP~+ z;}|m5Vb0e&H8k$-k@uno#SFU$iFvcPP`P-6{r7i`8lqVpPWEPnK=v);O#Hn?Sha9N zf?@yOyrdJx{+nrGm{DYDv~pTkfm4sw7*$E14>KcXwVxy@$={&W;+i!3{kD&HCu 27hEf8yKBI1 z=j!L9?7vYMwRoIz_x_(;5_Hx&466HPyn-_PFUx4NzqepKPMmO0c$h2cw$KsXxAz!O zh(MheDE+~7N&(M&rX>@?QN8{8{iR7VI^tD4X9CcRfqa6FHt+-mLi*6KXkrNbNL{0} zy)^VSF_bRu<5ME((b7}i0+uxTz;~6pbC$ShASoOWsd}B}fdpb5Pj^2A)pgjua*9c^ zPXE`@TP#H8Nj=z77olF>bN|nCr{SjZ4$g1 yi6x?WDJbeYo*#pQ1O!77O3IOWHnp zH3Lsx=O&VS+Wd;jNl?WIs3aDCeM&Ur@7NXNhROrs9y^+pvF}IZl*ZV%--oCBT+x+m zY|jvStN*^tkC{qnO){C2BoxAJ@sMMmkeIK6tuF8bE^BU==!m6 GzywpZ9&yXqci}J~7{i?zeIE-AsD_eQIV41~k8Dxno>z z#>W?zmA*7J>cm7r( r-5Jx?$- z%=P0}`{6L AxD+ &Q+qDO>VkdmQbTzP8<%R$^eJ{?3kTMul$og0onRE{7HywG@P z!c&^-z1KKHRB++RydK`grLCR7DqWlOp|4Fs%A*d`X>PZGmwnS Q|j}1SN zc8%?i(k@61z@n?I{T9y-haqC}fx+Lrm!@2h#Nhoz3BOOI;gi|VR?<_aO8YbD-y+0V z->VTN4K(e{B?2Ogccz8<_G721NOZ(?8`fw}9c{{f^(;vrU8@Tc3de+#Qcr}4UDok# zk|z90y5Y4iGJY?SqUh~OO_Bs2&Ga}1zb?bpE>Nh3HF 9ibpi8EvIm1} zv*0xxN++=~f8!kKj;`zlprdF`s9A<0xb5S3Qy~&`-2&-j?La0_!-h3pIW65zZ7iF9 z0=LV fr{9fUk>U%5)9*9f)qgzGJ6P~G#quj zqwL>($DPT-6>X7KEDRrP8r=#ts%xAKdT$tYH>;3)1aVw+kCO)l2y*97 zDU-T+vpSyXfRp)8%*W2&KMpDw0ppId=tmq#WGPFCu?zIj>y9alP1OBR13rG%$0?ad z)mF46UACfldmH)+mP7hBc?q7Y9aS+>j+)5U;4j1%$DSn}$2O{!lq796o>fcEWffX` zZkMjxVMX?v-fsSw)z%5iQu4W5Z*w1ez`+pdSqE9Edmo*uLqnGb%*n@JxGAR?5ED?&Yx*I7)@pH+>L9{5Wi3V z%6&_CjbgLF>EC%?oS)u~ v*8VIjeiPRcmm;F^Bs)KGi z-H8;?y#1o-6=gG{W`Eb C|Bl|Ap)|A~P&HjfrY(FFRy#&8I7SgOI 3)@}Kmb2JHS-3&AP?H6Og4kDKbJ 3{)w^UbONkKU;i3J zN+#YMTjJE}>u7!%Fe1fj(6Q$r3>|2G^Fj@`5Rt&^N6qu504EN9xE7Yc1TH-Idl`~Q zSuW!FH$YF)`9!#J>G$?=J>zD4yGL$JQ|VJ89ld}x@m|$uBxm9P_gNfPEES@A6fj5A zhkQzW)qPGnOY7Cr$~iaN%XjE~$x}I0f^)l~_}gy&+`PL~JZU4Vgx>sb-?S#P1wIk; zx<4-{EcK^4{=NA~4cXzatB7ir$_>Ik#)qw2XICpvwcfB;8rDesT&r^4ew0RS%@u%^ z85ztVeN$?C3_aPI{ UGXrF>P6vHVb2#L3qenXH->W(^;>Rj%r9N(dGhvE zghXi97!}8dgZFmtx=ViM5%x>O9j*T?1tkb#`Vp1=^~hlI6&<&MB&WYh@xhYc9xI1O z^_5arP{|9A4nq*5y~!Mrh5Np(EbtY0%T$< z_8?BcX<{XOJ&HymR$$Jl;l^4A2C{jgZ}EwJ5&QCo4cZij;H$p+)_EWHj8ig<(|jmn zF^|g!UxMWC$%qe*)sgmJlI|UUy3fE!c1b*mE39%LH3oxRoVJ|LRl&+I$84a7b)iQ^ z+R=K+SghZ%0fl`co#K4}dV=#!9iHRC`TpL3<^Rl}qbt5r_aF}pOU+~4=VWsKa*)W| zO1gwA)bKO1BjY@BOlSQ04WH_Z7N2F%luCd#;O%cRMl`-LOXF+4+RcN#DAf^0#3uB@ zd9ixZ?a}faS?Xs<+5Wmrrq5{WIOr{dSeDb0QPPZex^4&go@86EdHpaqo9-{LaN~tU z)Q*^v!&0x_?R*xSs ?IET_}9ew$6hl}a?cOQ0L@V@O$I|McX$F3s&~ZH ztNMkce(M@LYx1@3?*Q)BfahVTFGHIz1AN}}h=zhW5qL55Pe!5omC_Bw=U}5_94zXG zv-ynXRnFc!l05h{Vdj7PYE>~CVp7 zhgVG-i{@5&?y97`y2fNUJnoAQ@s-iEtaA#vgB`yT9bLRRVCr{y>Q2#k7)FUFK9?O^ z0nEI;^dS1R*H!Dk$$Oaf9tb7L;3w%cN0`5w=3>FY)c!groXuSYprR+re}dX}_UQ?n z&J+Al@UqHm%0F&2e^;Z443wLqv; P8YN_Zc8H z0$;Jtn|e7D-YW F=Z{&9g3^ct00%6kpq8322*|DTfdHAqfFgtPa& zZ&VZTtQ8XCj*;nrNW8R%w?_o}HVF78Z6sxT9BpHV=6Q%xN1nojvQC)6b44E|l{8l^ zR%$Wu8!%Bp6Zhuii{KDO!saLsY$Ty8I-aJ4weIJ@(yK?S(FMil#=gFWSC83sv)`-F z8jL)-bmNwuP>_$FT<7iUEmeftmZ5kLQ0u{(m)~)w$ztMB76BpVI&3bB1ES0%fGh# zj)rFe*>4YHU;WnTFQN??GGqBXe00<1a$R^E|G=P;dTzP%Pkd!a5N%MwpGDaYh#+qf z+^`E8sZ3H~6e8`Ed%Y9UUwN{u`5R5;D>Hr9o&2?q#c2KFRyE# ^l;ETK=x~ a}AaVERT4ps}R%?yGUyI%9GYMS9Zzp1CXWz zE^Spj6D37;$^T^m^ueAI?_MtgY3Drx_ lkJtp8KTFZVl}R$cq6ymg_?SH; z@y4=@_~~aKB=k4x`Rl` Y5B2bQ7T7h~c{$Iags!0hQq zNn7!2U!ntU+qQIry3J7qs-h`u6QM^7sNx;{RGheZDNw~zaVur|iz*mW=9_ +7YZ=FUQKKj#YZI()@nKpffmyuiw z#z!ApRrUv60=xr8*C8G7PB#@`)t{c5$aDLGxZ+eMjTB1f_2BZ&{c-Bf3)QQ{*BU_1 zh%v3e6@Fi2Q3@WCfq`w*HnCi>AbXUX?Pgtd{BrP$coBND}Rq`W1VW@_g)D#n%xpLXjfy4VV zT2H8*qi?DyN+`#+pCwm9&=_!d_Ip##Ge9WreU9rw=0U?*= zgI?>Pzd5m&4Z?ja{;=E)quUeWnlBr<;rc}Vg^8tHIO+KSH4FV*9!rIXsi?Nxt^IJW zh?n6Cs{4c5jku1t)31NrJrg 9HIZK@qjU+tHsA2bE%3+~@#QUfU&;lPHvztP0U zxhhBNrrHjfr|;LJ0d@s$+=^fKe8Ni~Jt{q@DvsPPS~)z}d|LgyIG&+ 9i>1$6Rk_zY1%JbH>5xs~maQal1K;y26w7m#w86 _aoIa!j7oG>g2x z^yc<#>>_>jAD#d94?ZAek8C*NDKWCa gFGeQ+8kg_c=8)Sb=xF}PbAN})Nprso c7XILa03U`^VQk^J&A;c!*l5E*zV0v|FvUBp$*R_{lzLfO$XO!{E z%I>7q{m}7N%@1v_=^|%>?PE{(riJ2|#4 YSO_@pcIie}BoXojZPbtHknm%`18u z8Y#suG-lM!4miod1G)o37F2Gj#fsz&^eMed?K+!CysX2fbedD|a-ID#JLvsj^L{ma zV;|k=$WhniHvg#2G^)oG58J@r^7T0)VjaV?RQ(l7n9Ne=t!rM#6yMgSd~=%`PqLUM z?QD#gll8VoFSJy>I%fSS=PSDJGg7BK_u*(=`GKe5Kq+)GbS2cCCeR*#zT(&fzfLTn zWOJAv{h}n|O0M^Qt!QR3l;qsutj5V}|4#Rhk5q{we(?u}O84Ctu4w4!D8HM%^xX@e z^T6$DrKqDa8NXg-MK%=~_4Uw-r;6^M-n%f)*1L1AIoAH@W|t68Xs&aamW*f#A*j7* zeDh_^iFgA8o1iqaK|Tq3?Q8d{ok1G{nAf{K$2oiHh{!y=zg{%6uQaj~uurqSgRHmq zf3wZ_hfAGsP(5?dF pd3YjO|JLoPRlSOaBgFOKRQ;tC0PQ_IcnD2G*Ilh8zUJ{Gr}&ePL#!udhebH_p?( z{_6ep+Ve(R#^*cUr CA+3k5>ojZ!*1S;LZ{4&euP`uH<@mS z(CHn|=i+lwq$f#Ti-H@I8MttuS)^+oK`+_U;Uf;$5gfjAT$aDzrW7hiYR;Z6JdX$z zSJ<`N!7>SFi=EB`oYNrv?NWIMYg}O_T0e);^FPh=MQu9fr?|JCPuzFtVT~V3gaX81 z{SI^0oLATRYOJCaW@#rS0#6O4^Dh%mFL%dg2 P(+SQyS_bD? z6ch-2vNn|CeztzR{&c@HEXscNWcN+)#zbvX^WmP{Old)kT|7*)Sx*6HWLD=QxC cYc=jms2%FzEcMd&My{Sbr{Lj%c?zy`p8fU^TV^w1pb6$l6|IaAT|e z@L; |E!aJ2Cv^5)^1{sj|K1)b-(L#dlM#03UR=>9`7ZUaq^Ar{}mgs)lSF%0IoU zb9RJNY6U9lZ(Xj7+T4dYV2u1XQCTB%ODlTX+UY*KGaBI9LJi5Gljj2s>}4pd@3$6H z_sE4PvqudBTz6g1Pj-dO3&e+_iB5Pn2@KP8T9usq>tF>sd<;FmW|CD)@z^>{1C0*U z8n%*0RqJv%bPuqU b*&KV`N?VX47)%8$8*OZ}EZS2jJ&%=TaK6 z$!9nAAn4t{1Ny8cA9N<|XSIH-k$*95r3Y>!1)<^rdn(5s#oyZWCf$-b(akGgSg+wi z{}C4M7wJYT9-Z#v99dCF& zRPbY8kH+*!jEKJz*(r%s%x9ebJJ0;j^XmDoT}KFT)x)tEa%NE<+o?a4=CZkf#cXrb z#vxY(UkDNBi0c`YLCoO}?R!pKF)n4LsLx3=2nFWgTZ&Vc@};_=BcFV_wyDj3z9it* zZ6m~5cet#f1RZgp?AN8GLQ>9j6RTj2h2T?przC})h!ljg;*z0pN=TwK(TF*4Ts;3W zz5u=hJ1saE$cEN@ 3NpSjin+{FqU*B^5DbIg0 )o}SDva^N-DU@<@EB|S|h6q@5Kue z6%LC0OL!|;+AE(oWv&k7X^A%PKXDrQS@^HaS`V|@M^2qGUiFgrM9`!zKG#$2*7VIU z=L0NKUsQhC8Wh^93O@UOH620ju$jsSX4|Z5v4 `C-d+I0YH*J;*WxQh6 z{KwmQg45OBtW2t2hEG$@WX`evY!`I2I#J28Xqi A3v$*%Xg4_QPl_n(t@m?QHIVXC<@Q0lBy|0hT)CO5M zhP}RBWmb*5mGM!E{>$sX-1@?mnTgEiWyZsOA$QxnY+U|8GIleB_?|9ttG>ev2Kpd) z?^Yf%2^Zu)PUF|f^g _seGE752+NVXMPz9Oj9Y3@K6ILH*bkjxQAaetNh!z z{ Q>()G!NY>j(dde$+x!)~ z_S>>dhaNO`>@p#_>7s^e-}5~q?FUmScX!e&3mWoeoiiUFjVK#j{pmRTQKU(6cs+Oa zX jH-H6AMtc0%Tg5C zK4o$o|J`ft<8dfDqo1TJmNDZx< u+MDz6Y* 8 zDc}4$^EwOJ3>r9I@(J#n1EQ(ezAyDjWu7kB_K2=~w)gu`cRI0k%&+Uf@K+Os>%^7$ zTRlOe8`X?uOT*uxB|D){d~mV$5$gF|?jE<~*6*1N 08-AA=A z=jOkct@(C7e0B#Kf_iqCT9_Pf*q9dGYI1o{93trSp!*qJWVfZk|HIu|ezp0uVY|3n zTfAt2;_eg(-eQH~6nFRH5*&&a*W#1{#arCHKnd>dPJjRbLbCIF_TFo~f5ZE6zGNl$ z$~|+ LGXel^zoE?W`J3~tqitG$3_4$}S1mdvP06T|!9^(H z$bhUmV}7lb7YG|pZ ?TU{>*;*M0V!5x)&R1G~O}|o4dI5*P zhZ&obeXS-v?u>>geA4hUF2?<-jV(s`vY|hVe}%a}f6yC6#uF+yf)*-h*a$WZzCz_D zOjt&DQvG_S&HuJlBz#4G&I&s-YPW9?YTuGX?qKqEeuR8)r>?_?rB;ZI2s_TDule8n z6rF=%-uyNdN8T?PVY@^>)+hh%6gQ)KtE)+;O7?(kG>hKLPjgcPGPN5RA0Rgdptkoy z#z~e<^zXu>D|Tr=6_HpA?=^kIi_H wer=cQXq)_P#mU5 &;WSqsvmb}I8Mfa<>H())udt3(=%1ZSd;4UwIGg|a DbQNgElZt35@m?e^Htyy5wvGpVFFqTKQunx_)3!(TtHR;2jrqd%IX=uZPj R-Ir2P)Kz&u{8qbGGRVhPMV)9fYJLBdteSAE=( z(L{7r(Ig^mXn1$hT|J96IdqY7forWE*Vp*GDu+DZZF)iI`w_$d9pbOZ74@^8MMo9_ zb_fu#KGOV^8jR`UG90)5J(bm$j*VJh83BK8_%|m(X_slA<*)qi8`jX3D;uck!~Mm2 zk)$+(+i!pL>ZJ q;vnq6Xcf~u)4?Mbsm{NsXd&PA_pwqynH@ctG@#1xtB?lbx6 zxAvZ=w)>pFWnYpw3;wlGPJvBk(&n`vEyoiRFgOwY2L4aP?d0S%nUBboe!l(IDOHT= zQ-S{h`|-^Haqn*DH+Zj~&KOT_4~kOgx)xbnRb9_BIbKunHSC7h+&F%}Jj*n7@_UDj z#XI%p;!T#gtifjKadYFNFzatv)b%`uXs}$o#_jhFb+}g4^bZosagm`{Gs@TyG{Z zkS$EFR5wEJuh!jXoSesJx)E^7ZHNQP2GyS+X#HM33z>)3U#@W+>}p8McIo(2#$A>z zucgKT{F=tdC(y}Gkux(IozK{yvi9Ed{YFH%ZvhN83o5Eq ?X3?Ns2xF*}{*x3C0%TW2~o{Q3OrLy?+ z#g+$(UQ3-xi!2%H8S9gLffo#jtOWmt!qNBOPnS{rl^E{d^hMm3%Ak4g+zL24pL%bs z`v-AnXgSwgWWr#3uMpZyrlNiX?oYxyUt!7xl%hV8r287<$p8pylEi2|;{YE4x(ive zQo!xLepJxnA5{urI3caXAg=T>K`!uBXU~h_qDeliZgi`SE!l|j;0|I!j;RS$Y9|Sw z^oOFqNZTkGtQ`&VT|JJB_XpMs>UkAXPNH^DC9K|NmYg!LV1PzFlE1RWdAU__r3LYU zkmc+>5gUSg7ZWB*B38N$K9r4cr>7W2PMUO4F>_1PR=uJw)?^8|CLLd*4rj1#KQPiK zKyAu8aW)v9JE1$OjUnfc=CHM-vr-@Wzpj2n?jdb|^pS7H7ULp$V@=jxgopLsPksS% zU=zw|JZ}%i0hm}I6tRIxs69RViu1NYemKRf*+N=L{_WHeQXE*rC`zp2JBmsz(IQWt z(lTQHF z`q@>N$+fl_EC|Tu$sSTX9Ue^*tw2fD{e1Us>cP@s@eX}!n__er+xjHE^7_QdMTcF3 z7i~;l7wW`Kwo^44E;iQ?eB O*gTj-0iS^o@24@o`lJ$1^yX9ROGcd7Sg zACa|UDq)QEbOqiI)blAyRU8cT!VN1xp@kk;)KL4K)~{essLCcUGN6%3JzC&*hC2%G zE9z=NVZ{}Cr_*A*iYia!?}FZ(d e;ComJZ86)FkdtVrf~{^K!aOOtND2}DFt z8`X8(eV)=IqAa>I-&pB4zbD*~eJ*!8tOXOO=}~1;C?$xvd>#hM= Ki|2FCY2% zU7`d=xtuJ%+UVUx8iu-p%KuWH|8>%4pIxKRAZPg}?a*lkR+F~br{{ Iik9<)k|dYI9I3g`=rmrRHsrY&3f>2Nvq^DuWhDCQ866Af3MX(R&=O)V z <#vR`s(R>E5Xkf+bAwRB}9q>_t=Ue>(@LYz!+BhBu@m>c#`%72$ryr+-J z`b0i3B;+~h2`gIM*ZSO#Ch&4B{GS38g(6G%V;2LpsKcB6hTN=T)8}?ugcqA>YauL% z?U=`DgT^QwM$Fkht{fJQxo_68!GR0r!)b17G|^C1O;gkqG7G+)pCMAY7Oc_>lY2*` z(#7qV|5K}6Ko6vsh0Mv}NokK|^2TRe?e8Dtxn&m1$H-QSC|tDO+@ZXN92D(b2L1B; zhel`;fkim4x?||P>$!+nXHlmX4db_9ztc41oNwswq$Cw snp~MyDJZk!gF>B`o5nZc za9_smqxI?gk?sH +j%koV;22ZJy>fH>4&Grfr@qy3m=~EhJx) zR-^gIYtl%dQlI1ad)pD*xIgxxQmcHw;XQqZQ=P*Ld84(N*xkqN5qdJjpZb@%fs0-l zULn8u4md6kLtj%zT~mzmlI?Pi?q`YWCx2@Ld;W|RFTMI2_Owb&C8EIN=) sV{XxEW1F(=r67M|iL*Gnb~8bz?KS%u@cFbbyZYp0e(HJrLUZe&-|?s&JjGV> zJeDHZkv Gi zE)01QP3L#v-YrJdk347*A#EI=UL@0ceYFF2ZhcN#-827O1$(;a*r%D`PfX;gSEGhk zQjjEgf0yt{%{8cW?U^cP^NTG|E-S_30}HvVkpBLHdGK0AcNIrUPbq<>c-LvBtTNz^ zG}>2Ld=z&KZ>JEV#-yw@!?kyaku>=!fZ1f-eU|Y064CM6OGginoQs)EMxUrVj&yW$ z#oO`pBzQ)guwW3dtzV<38#gfJDN2A-zMUaQcd^mcVT{ciibO04{9xpC^ckNn2>eM_ zaXyLs8_86D4fKtG9Y*JtnYd1Jm``_UA-$bZl1hppJ*gJF9o zlmr|h1--}#HcXh R0)krb1@i9ea7^0kcr_qR= @r#0VC(WKVv9BBU^WRi6|W& zyTcrt!-@2DbW-W~dlc8Y9)rlKNw_>P2A!G!dAfpAI`BbQz+G*S%;r{Ibe&oB`TVUR z3h$reS=(NQW+Q502@NR&F$8yQH !xqErf`T*~9 zPkMOXr`-OM2nctBa`gja!~W}w!q4(hgZcWEHW572%sG->xdhtLTG x)V+;A4JU4rw !n7ccsT;ipXVXgmGvi_*nx22PD`c(T=FiO=Nrkaya;EUo_$gE}(Ic+fAUw z^q^t $%7~Qt|@)z4Xvq}u4j@H{sN?uPTMT0;WZ{dk4jRRIrW~sUeg2I zqc~cs+fd>dYKIZH8 v`egXnV#D}qQulGW=}>0VM{fJ8hS?qW>qo38CiD!KHIbtX zeba?`n3)AjBR&LYyWm=9MysKNc7sYNN%Qzu-z0+0&rH*BB*LHSXTHXIVU^KlJyDC? zPP64(8aY7xXbEOKN?;ww8NQsXnhn>z=b;~244ayo=CWc`$o`nFj;vyk#{~5nEwN;1 zuF587ZR<;yOx?O@_O!RhcWn8o>m9zu>E*&YhkEL3)D`wAmY!EES^elcm=3WuA;zPq z`Lw+Y(vn|%>6@)Ao(vmpKe^>{8;Mxy-kW|0p5s5s2K23Y(CExu^OZWYff_WgOd72{ zsKh eEwzL~q}@VSpZd!-8(=jNq8b6{b~XU9dP#Yf92?VW$NxXg=Zr z98~O4r6p%WY3o_SL2V&3@)NSJXAFtPC1QJjmmCbl_=tDh#;6158L0B6Mb{hT44T{5 z^IV&8VIpd&jdBUM@7=nyUGu3WYMJe$^wT4n#~q~2c;1WuMJeh(Ks@*&W1YJfh^$h- zc0SzYfrWbg(M1NAA8e$W-w(V!%)1q%aort6Z#f?FoD?Ta8M)K)R^yVVV>a233X9`B zs2r?#Aw*32+(Up+6zSbkdECwXP>GE^v}zq%rSp5o=dQ|J;M9*^#i{r6r=7ifHGgyP z5yO+k@IXy`>pxxT-eP?nSxQ({XrY=}TVM1M^jJ#Pzwi7RuMa9`#ha+Ub`rb0OYSGS z4i&w9SsE^hy@%Fi&?-gMGQ30VGy^y+ _ERoqBYX3CdQl>8WE!c&;dS`iI&`J21dOk KXJpRW5<%!QvM4@l*oO+J`$or+A2Qt2Lpj;-FPois2yh}&) zE`|l61x3qk{hKaTvZ`IceF%^4-yhGks_yl&VBju>47F`ZmeET5bdkm;X1LbX@Y6fn zNIWl);S_o^%Xcm(GKxB;LEX+)N?cNFgKTSpp9j7XpwoBBEolS V(#aPWC7KOx#C+iVxcgmVDe~2E4t2Fwq%Z8KQ(x140+2z z35*e_jzZ{O%|V@xGzp?1QKpd&UP2dZZ7Y-YfZI6+xkpeQZyoiRzYk7gppZ`izWjJ9 zOMJt>ZOOL18C7ecPUR1v6)R(VgTpoxI~^(Xh_Bj)m0qFzDTo`C%@eX#zhYNZhe`NU z@u@K_hWbL-G#Dup!-PpX#;OdG+t2s9*~0ISm)dOJfExZ1n>x7bp$A{Spg(&rGO->O zflUG`G3q|fG+pVV{CyJf*6LSCGj~*8A29o{Rfucb^7Am6f-oz>B!mEE_Lr?ghBn%! zH<39Lv|;w;HXvFOeJ@|<5fMC9*nOppsQS5^_Y$>G`$FBuKpx*5qV#YgDR)FTeva;B z8Q4wztsG#%ABr_Y{JIgC{#__(uFBA8T pE6joFEB)b;4dO(k0io^&e8|~;^BPWUgld7!7*71ZgdOh| z4&db#eZvctBxHK1WR{gQf+>cLR04LNHLf6Bm+}El9ptxk1%5o-0o*6LVf|DH|M|M@ zp%K4OO2}TQ9`f#STI0Aror$CC#`42AOsL?1&8gt3T= ifwFMko=b8!@Q=Nr>;;7%Z}*|GBJ_rqG|l;q5aFU7Mn+A)lMo?(X~Ai7g*AI zt4zjw254D-RmlY=fg+X8-!2>8jlW4_bB)#%fEKd#SQJ%UR3<}g3_W2+|GZCefD zy%at4M#jJA{OP121q~FtJpc@gQ9;#}yE|D{z5jGc81WaCEzHh*9C M3Kvv;IoB8OLjg!s;jeTEnP}{eS9I#;V(}2xrXzk5eXU zHTzJ5Mr-!RF5}NK%Ob%mU)D1@8E_^H&p-c}k0_C9&MZc&!cCCZLDOzfWx~y -Cm>HP-F2o}AnijuB(^|^U?0Rj4ymf Urh2!M zuS_mLTF?2i*T)EXWT)j88g)Z4CUw8T U4)!fU@5*MHB0npR%zr zin^^OH1KHGh4XGHYmx+l5dxNx0puR9D !hPfuJRK%GP?t7i8_oB*W5F@yT|-AU91tGO6T dL9?yA|@0SltZ(NuNDYL!C VR-nY5RLrf_K4;fNM+F#S 4 hZM&4IPGCzW)4CGpW_* zCP%XLdG@N-ke7DeWen2vtL4)qt<-*wGp&_BQtr}v=_OUmS@bq;g6IQKrea0V3sCDi zVQ~Rp+~o{1?c%?Q91a*+oUi
$I@ZE+j^CK;X;7Vud!aJKucn^+>e z?J|eG>QLTaFN>;jNyD+xmO*oz jMeBh`u4qo%K%?`pXyyCNPJraSiWVb z1gbRJdbr1wlstN2Q1 D}m{B5V%+x}Tw^x<}eO#p3u zluiXmf*rUQ78W;rNnsi!yMUr`WB#Q~HtKN99wjK?m@7tfCSg$F_FM6%BJaF~_W9SV z`6#)2f#Fz>(8xH)i65{f{L+8>`X;CvX*-^~iK09C* yo7Hk%;wajR)+^P zRapAUaMQkY+cT^5-&tgE>Sk7ZxMH_}jj&DI&aTH9UNw$q=$lk3iOtNlfoDqY^WG50 zj^YonNxf<#Tyv!qWPfUPd9!-35H3l4bYSEfy}0RhEw%A4eco8CS0?$JHKn?pIj0CU z#aCc=( 0_P@{&*m%$MK4}i%TC-APq<@_BM9c?ql8`J* zO-pOIWg}-eVpf3a!_h?6?vp1wI`Wm^vpn{P?u>{VtldiKtz{eW<)R)_24HrdWai$J z9ZUE9L)XM5+{LIlXNOD1Fnsg1QkgmAXI|$U!|{f*xIMCF?o6u(GvdNw)Mwd|M}nht zrDT-C9-y%~aKa#opm5WATtBy=CX{A@o{w!(PYga0T^D+`B+!pY(|FdX?YI1%40tfO z8oMSU3$p8m51)b`og>dxRypCob7(SV4{!HA9Gw^Xw3VUa<`+^|pa!GIhc|!$wrBQj z_ojE}A#I3_C!4+OPp*$-^v-51r0>+%o>d+J 0`JOOZ$kja-67sUC{ZHJ77n2RZo=d;NsC&)xqCR^{i(@cQqb!SIU3-6}GdZXOxKk z x(j(ZyRk+cI(&(q `{3{TDEn3y;A%)mci`bcOd9`*SEbtG~` zw>hNfWqM3`lQUi?#m{W5OENb#N&JXmMqXU?Y}VTT!#X x`Rf4(M%9gW?5+tTyQjiRWL0P(sMI2KM7 zv`gaKBuWK0*~Sm)K3-vW$zn;VExFaY-=QBG>kmGCjrEd-az>i6()5&)aqQDRm-lOy z AYw`o~P<(;+nd3vURD5g-kA9VNi>-25V0FE@pOB z$KZf-L*NdB%<$z!$+qjCq6DDpZ4nP?`bOgXSGO|_!8_sr=Z%W2qB=SlVV=kbqx^F) zc!0QC6gN>1w4+A!%2;*+!#*1k&R>LL(FiI7x?W~Uw?1rD=k5|ep5=P`8hzSl^@}!D zCZIu}`i<=Gc(pg~ac8m;bc<4o@e1LU9V%V}9+IEEf9Ds#+1|4Q*xvaYh&x2lY5L#D zGz*kPxV;3fXT*%p?hZ`@MuzX{rIaXVKtO`2FJN)kF)vrEL$s0i8VUlpvZHveyNB4{ zc@t7DnS!%yrVc)Ch0$dVVs09uMwD@m`+NnCp?3qt5ghWQA^8CWCpl_VSDY&)ExBXU znoCalR(*aNraJiW%y^f0mD|X%D@W%aSGkPj3HN)7K>Q<02CCmwd3(s$p$9F9T2Lv8 zy4cQgI*{MvQB5~M`>G~wkFhU!EK=D*#lpGGkY8H1)tk1ZvznN=8fiCD4U!9b{JU_{ zV1HN8X^(S=VO7iRd!PIDf3KcCY_R0N;phKz7 D30tlze 3jY2W(_KDUSZsim(?AY=pqXkt74w~;rPv}2UlX-PbLB$ z&n55P1cd-|Jc@*fj|5D#vw2kMxnP{;dvxe^%4u)gN%|!*lx1J#pqf1eV8CXfQ%&!a zQ9>T++Tg9Sp7HB?e65JwD|$ld?XIiG)Am1QVccTxJii$){r;;wfN0pt<9}pI{*IuN z3hblwOeQy9CZd(F OybXVYK9#L+Jm65b>vS_>gArYsk0H>SmMXXl?Umf1LP=P|H1*S}g( zEWV@*(r1zi5)FwkTIF-IA7vZIuwzQlEoTOO;WH2=d}ko}sz1!~b3v!=W#AELCMsbB zG-jOB?Y@oX=l6T0S!>>7cw(QwzHz3vs85zKu-_#j2s-#$K0tmn1zeZ$X@uS6Ph&WL zz0L!5DB+tb0~WS3l^3TdvkfcJSqd zcQ%@zrfH*wqUXx|O|$6mH&Xn jqN-eB}PFbVLN^n$!zJ05E z9C|GKgWj90-`Mr_c9m0YV7;&I6z9~^l^rs47mW%T$s8|W=^&kOo!sRwk~dgxvMBhS zue_VXo4_Ag?z|RZMrq`K<(#Dfcu iBoF+6@Nc1udqAfW1{ zrsJ|gu^8?Hq^lI7nX{1R-3>srSuj`r$< ec?5GPM%0Ps^=0F4c2+E2 z*V0m7sY^I>@RSIDbH#U9Wg96ss4eL4jaZN-57LXX;vBC3_kQtX092M@=|}VBsDKsp zM5_N_*>=jl{Yp?OZHwLk=GHm=IiQzh2fIMlPk8csi3KNBfher@L*#uKzRMO%e+`$v zbpGVAM*`vlSmO&E$PG`%hx3Ckhc#ABWfDfj5v=IluLX}chcr0g$^0SMf=tlC#J`6h zOqBDg&ap7F?Zba`s4-(sGrVMuVsFky-@d3--~R%5;i12*Ve7B%n{y#PYR>%gBtyUf zxezSQ{7@1Kp=XbMxA*WD)zjoh%vq6~!8|9~HvP|;MGTK@x{BV93P5SCH;?#Bf8XOf zyw4fi{l!475ME^wS97r9>DyevSklw;1|j<@RKxnY$k$qCnN*clMd5=)LbhfUy)OTL zL>14=mzfWpqQ^Xoy`uI0!hHQ}41~%#*z`>G41%vhKAkO3a;v~}|JlMk0_yqiCjhZ# zRLe6pmh;ljxWmA`2Y@)VhcE_Q^=VZ7N9z35)aKQ|VH>rE)U%LYd_`X<3h(QvZ4