diff --git a/README.md b/README.md
index 77394f2..0733237 100644
--- a/README.md
+++ b/README.md
@@ -44,7 +44,7 @@ Features used: function composition (no agent), pdf loading, split-extract-rewri
 
 Splits a text into chunks and generates embeddings.
 
-Features used: direct function calls (no agent), split text, generate embeddings
+Features used: direct function calls (no agent), split text (gpt3-tokenizer), generate embeddings
 
 ## Features
 
@@ -80,7 +80,8 @@ Features used: direct function calls (no agent), split text, generate embeddings
   - Utility functions to combine and convert prompts
 - Text functions
   - Extract information (extract & rewrite; extract recursively)
-  - Split text into chunks
+  - Splitters: split text into chunks
+    - By character, by token (GPT3-tokenizer)
   - Helpers: load, generate
 - Data sources
   - Webpage as HTML text
@@ -150,8 +151,9 @@ export async function runWikipediaAgent({
     },
     execute: $.tool.executeExtractInformationFromWebpage({
       extract: $.text.extractRecursively.asExtractFunction({
-        split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-          maxCharactersPerChunk: 2048 * 4, // needs to fit into a gpt-3.5-turbo prompt
+        split: $.text.splitRecursivelyAtToken.asSplitFunction({
+          tokenizer: $.provider.openai.gptTokenizer(),
+          maxChunkSize: 2048, // needs to fit into a gpt-3.5-turbo prompt
         }),
         extract: $.text.generateText.asFunction({
           prompt: $.prompt.extractChatPrompt(),
diff --git a/docs/concepts/index.md b/docs/concepts/index.md
index 8989fb2..137ce21 100644
--- a/docs/concepts/index.md
+++ b/docs/concepts/index.md
@@ -13,9 +13,10 @@ You can use all almost all helper functions in JS Agent directly. This includes
 Here is an example of splitting a text into chunks and using the OpenAI embedding API directly to get the embedding of each chunk ([full example](https://github.com/lgrammel/js-agent/tree/main/examples/split-and-embed-text)):
 
 ```typescript
-const chunks = $.text.splitRecursivelyAtCharacter({
+const chunks = await $.text.splitRecursivelyAtToken({
   text,
-  maxCharactersPerChunk: 1024 * 4,
+  tokenizer: $.provider.openai.gptTokenizer(),
+  maxChunkSize: 128,
 });
 
 const embeddings = [];
@@ -44,7 +45,7 @@ Here is the example that creates a Twitter thread on a topic using the content o
 ```typescript
 const rewriteAsTwitterThread = $.text.splitExtractRewrite.asExtractFunction({
   split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-    maxCharactersPerChunk: 1024 * 4,
+    maxChunkSize: 1024 * 4,
   }),
   extract: $.text.generateText.asFunction({
     model: gpt4,
diff --git a/docs/docs/tutorial-wikipedia-agent/complete-agent.md b/docs/docs/tutorial-wikipedia-agent/complete-agent.md
index b621d60..a18ebaa 100644
--- a/docs/docs/tutorial-wikipedia-agent/complete-agent.md
+++ b/docs/docs/tutorial-wikipedia-agent/complete-agent.md
@@ -60,8 +60,9 @@ async function runWikipediaAgent({
     },
     execute: $.tool.executeExtractInformationFromWebpage({
       extract: $.text.extractRecursively.asExtractFunction({
-        split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-          maxCharactersPerChunk: 2048 * 4, // needs to fit into a gpt-3.5-turbo prompt
+        split: $.text.splitRecursivelyAtToken.asSplitFunction({
+          tokenizer: $.provider.openai.gptTokenizer(),
+          maxChunkSize: 2048, // needs to fit into a gpt-3.5-turbo prompt
         }),
         extract: $.text.generateText.asFunction({
           prompt: $.prompt.extractChatPrompt(),
diff --git a/docs/docs/tutorial-wikipedia-agent/create-read-article-tool.md b/docs/docs/tutorial-wikipedia-agent/create-read-article-tool.md
index d79e34b..12c0e0c 100644
--- a/docs/docs/tutorial-wikipedia-agent/create-read-article-tool.md
+++ b/docs/docs/tutorial-wikipedia-agent/create-read-article-tool.md
@@ -18,8 +18,9 @@ const readWikipediaArticleAction = $.tool.extractInformationFromWebpage({
   },
   execute: $.tool.executeExtractInformationFromWebpage({
     extract: $.text.extractRecursively.asExtractFunction({
-      split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-        maxCharactersPerChunk: 2048 * 4, // needs to fit into a gpt-3.5-turbo prompt
+      split: $.text.splitRecursivelyAtToken.asSplitFunction({
+        tokenizer: $.provider.openai.gptTokenizer(),
+        maxChunkSize: 2048, // needs to fit into a gpt-3.5-turbo prompt
       }),
       extract: $.text.generateText.asFunction({
         prompt: $.prompt.extractChatPrompt(),
diff --git a/examples/pdf-to-twitter-thread/src/createTwitterThreadFromPdf.ts b/examples/pdf-to-twitter-thread/src/createTwitterThreadFromPdf.ts
index d31f502..400f5e7 100644
--- a/examples/pdf-to-twitter-thread/src/createTwitterThreadFromPdf.ts
+++ b/examples/pdf-to-twitter-thread/src/createTwitterThreadFromPdf.ts
@@ -18,9 +18,10 @@ export async function createTwitterThreadFromPdf({
 
   const rewriteAsTwitterThread = $.text.splitExtractRewrite.asExtractFunction({
     split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-      maxCharactersPerChunk: 1024 * 4,
+      maxChunkSize: 1024 * 4,
     }),
     extract: $.text.generateText.asFunction({
+      id: "extract",
       model: gpt4,
       prompt: $.prompt.extractAndExcludeChatPrompt({
         excludeKeyword: "IRRELEVANT",
@@ -28,6 +29,7 @@ export async function createTwitterThreadFromPdf({
     }),
     include: (text) => text !== "IRRELEVANT",
     rewrite: $.text.generateText.asFunction({
+      id: "rewrite",
       model: gpt4,
       prompt: async ({ text, topic }) => [
         {
diff --git a/examples/pdf-to-twitter-thread/src/main.ts b/examples/pdf-to-twitter-thread/src/main.ts
index 53aca1f..cd455ea 100644
--- a/examples/pdf-to-twitter-thread/src/main.ts
+++ b/examples/pdf-to-twitter-thread/src/main.ts
@@ -26,7 +26,7 @@ createTwitterThreadFromPdf({
   openAiApiKey,
   context: {
     recordCall: (call) => {
-      console.log(`...${call.metadata.id ?? "unknown"}...`);
+      console.log(`${call.metadata.id ?? "unknown"}...`);
     },
   },
 })
diff --git a/examples/split-and-embed-text/src/main.ts b/examples/split-and-embed-text/src/main.ts
index a741657..f9e3aac 100644
--- a/examples/split-and-embed-text/src/main.ts
+++ b/examples/split-and-embed-text/src/main.ts
@@ -1,6 +1,6 @@
 import { Command } from "commander";
 import dotenv from "dotenv";
-import { splitAndEmbed } from "./splitAndEmbed";
+import { splitAndEmbedText } from "./splitAndEmbedText";
 
 dotenv.config();
 
@@ -19,7 +19,7 @@ if (!openAiApiKey) {
   throw new Error("OPENAI_API_KEY is not set");
 }
 
-splitAndEmbed({
+splitAndEmbedText({
   textFilePath: file,
   openAiApiKey,
 })
diff --git a/examples/split-and-embed-text/src/splitAndEmbed.ts b/examples/split-and-embed-text/src/splitAndEmbedText.ts
similarity index 77%
rename from examples/split-and-embed-text/src/splitAndEmbed.ts
rename to examples/split-and-embed-text/src/splitAndEmbedText.ts
index 50c6ef5..c4f63eb 100644
--- a/examples/split-and-embed-text/src/splitAndEmbed.ts
+++ b/examples/split-and-embed-text/src/splitAndEmbedText.ts
@@ -1,7 +1,7 @@
 import * as $ from "js-agent";
 import fs from "node:fs/promises";
 
-export async function splitAndEmbed({
+export async function splitAndEmbedText({
   textFilePath,
   openAiApiKey,
 }: {
@@ -10,9 +10,10 @@ export async function splitAndEmbed({
 }) {
   const text = await fs.readFile(textFilePath, "utf8");
 
-  const chunks = $.text.splitRecursivelyAtCharacter({
+  const chunks = await $.text.splitRecursivelyAtToken({
     text,
-    maxCharactersPerChunk: 1024 * 4,
+    tokenizer: $.provider.openai.gptTokenizer(),
+    maxChunkSize: 128,
   });
 
   const embeddings = [];
diff --git a/examples/wikipedia/src/runWikipediaAgent.ts b/examples/wikipedia/src/runWikipediaAgent.ts
index c8ecd0b..0efe444 100644
--- a/examples/wikipedia/src/runWikipediaAgent.ts
+++ b/examples/wikipedia/src/runWikipediaAgent.ts
@@ -36,8 +36,9 @@ export async function runWikipediaAgent({
     },
     execute: $.tool.executeExtractInformationFromWebpage({
       extract: $.text.extractRecursively.asExtractFunction({
-        split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-          maxCharactersPerChunk: 2048 * 4, // needs to fit into a gpt-3.5-turbo prompt
+        split: $.text.splitRecursivelyAtToken.asSplitFunction({
+          tokenizer: $.provider.openai.gptTokenizer(),
+          maxChunkSize: 2048, // needs to fit into a gpt-3.5-turbo prompt
         }),
         extract: $.text.generateText.asFunction({
           prompt: $.prompt.extractChatPrompt(),
diff --git a/packages/agent/README.md b/packages/agent/README.md
index b1b49c4..2b58f4a 100644
--- a/packages/agent/README.md
+++ b/packages/agent/README.md
@@ -42,7 +42,7 @@ Features used: function composition (no agent), pdf loading, split-extract-rewri
 
 Splits a text into chunks and generates embeddings.
 
-Features used: direct function calls (no agent), split text, generate embeddings
+Features used: direct function calls (no agent), split text (gpt3-tokenizer), generate embeddings
 
 ## Features
 
@@ -78,7 +78,8 @@ Features used: direct function calls (no agent), split text, generate embeddings
   - Utility functions to combine and convert prompts
 - Text functions
   - Extract information (extract & rewrite; extract recursively)
-  - Split text into chunks
+  - Splitters: split text into chunks
+    - By character, by token (GPT3-tokenizer)
   - Helpers: load, generate
 - Data sources
   - Webpage as HTML text
@@ -148,8 +149,9 @@ export async function runWikipediaAgent({
     },
     execute: $.tool.executeExtractInformationFromWebpage({
       extract: $.text.extractRecursively.asExtractFunction({
-        split: $.text.splitRecursivelyAtCharacter.asSplitFunction({
-          maxCharactersPerChunk: 2048 * 4, // needs to fit into a gpt-3.5-turbo prompt
+        split: $.text.splitRecursivelyAtToken.asSplitFunction({
+          tokenizer: $.provider.openai.gptTokenizer(),
+          maxChunkSize: 2048, // needs to fit into a gpt-3.5-turbo prompt
         }),
         extract: $.text.generateText.asFunction({
           prompt: $.prompt.extractChatPrompt(),
diff --git a/packages/agent/package.json b/packages/agent/package.json
index 204807e..768eeaa 100644
--- a/packages/agent/package.json
+++ b/packages/agent/package.json
@@ -42,6 +42,7 @@
     "fastify": "4.14.1",
     "fastify-type-provider-zod": "1.1.9",
     "html-to-text": "9.0.5",
+    "gpt3-tokenizer": "1.1.5",
     "hyperid": "3.1.1",
     "pdfjs-dist": "3.5.141",
     "pino": "8.11.0",
diff --git a/packages/agent/src/provider/openai/GPTTokenizer.ts b/packages/agent/src/provider/openai/GPTTokenizer.ts
new file mode 100644
index 0000000..df5918d
--- /dev/null
+++ b/packages/agent/src/provider/openai/GPTTokenizer.ts
@@ -0,0 +1,21 @@
+import GPT3Tokenizer from "gpt3-tokenizer";
+import { Tokenizer } from "../../tokenizer/Tokenizer";
+
+export const gptTokenizer = ({
+  type = "gpt3",
+}: {
+  type?: "gpt3" | "codex";
+} = {}): Tokenizer => {
+  const gptTokenizer = new GPT3Tokenizer({ type });
+
+  return Object.freeze({
+    encode: async (text: string) => {
+      const encodeResult = gptTokenizer.encode(text);
+      return {
+        tokens: encodeResult.bpe,
+        texts: encodeResult.text,
+      };
+    },
+    decode: async (tokens: Array<number>) => gptTokenizer.decode(tokens),
+  });
+};
diff --git a/packages/agent/src/provider/openai/index.ts b/packages/agent/src/provider/openai/index.ts
index c6bdfb6..e45a032 100644
--- a/packages/agent/src/provider/openai/index.ts
+++ b/packages/agent/src/provider/openai/index.ts
@@ -1,3 +1,4 @@
+export * from "./GPTTokenizer.js";
 export * from "./OpenAIChatCompletion.js";
 export * from "./OpenAIEmbedding.js";
 export * from "./OpenAITextCompletion.js";
diff --git a/packages/agent/src/text/split/index.ts b/packages/agent/src/text/split/index.ts
index 0d007d4..30f9c16 100644
--- a/packages/agent/src/text/split/index.ts
+++ b/packages/agent/src/text/split/index.ts
@@ -1,2 +1,2 @@
-export * from "./splitRecursivelyAtCharacter";
+export * from "./splitRecursively";
 export * from "./SplitFunction";
diff --git a/packages/agent/src/text/split/splitRecursively.ts b/packages/agent/src/text/split/splitRecursively.ts
new file mode 100644
index 0000000..791d1a2
--- /dev/null
+++ b/packages/agent/src/text/split/splitRecursively.ts
@@ -0,0 +1,75 @@
+import { Tokenizer } from "../../tokenizer/Tokenizer";
+import { SplitFunction } from "./SplitFunction";
+
+function splitRecursivelyImplementation({
+  maxChunkSize,
+  segments,
+}: {
+  maxChunkSize: number;
+  segments: string | Array<string>;
+}): Array<string> {
+  if (segments.length < maxChunkSize) {
+    return Array.isArray(segments) ? [segments.join("")] : [segments];
+  }
+
+  const half = Math.ceil(segments.length / 2);
+  const left = segments.slice(0, half);
+  const right = segments.slice(half);
+
+  return [
+    ...splitRecursivelyImplementation({
+      segments: left,
+      maxChunkSize,
+    }),
+    ...splitRecursivelyImplementation({
+      segments: right,
+      maxChunkSize,
+    }),
+  ];
+}
+
+export const splitRecursivelyAtCharacter = async ({
+  maxChunkSize,
+  text,
+}: {
+  maxChunkSize: number;
+  text: string;
+}) =>
+  splitRecursivelyImplementation({
+    maxChunkSize,
+    segments: text,
+  });
+
+splitRecursivelyAtCharacter.asSplitFunction =
+  ({ maxChunkSize }: { maxChunkSize: number }): SplitFunction =>
+  async ({ text }: { text: string }) =>
+    splitRecursivelyAtCharacter({ maxChunkSize, text });
+
+export const splitRecursivelyAtToken = async ({
+  tokenizer,
+  maxChunkSize,
+  text,
+}: {
+  tokenizer: Tokenizer;
+  maxChunkSize: number;
+  text: string;
+}) =>
+  splitRecursivelyImplementation({
+    maxChunkSize,
+    segments: (await tokenizer.encode(text)).texts,
+  });
+
+splitRecursivelyAtToken.asSplitFunction =
+  ({
+    tokenizer,
+    maxChunkSize,
+  }: {
+    tokenizer: Tokenizer;
+    maxChunkSize: number;
+  }): SplitFunction =>
+  async ({ text }: { text: string }) =>
+    splitRecursivelyAtToken({
+      tokenizer,
+      maxChunkSize,
+      text,
+    });
diff --git a/packages/agent/src/text/split/splitRecursivelyAtCharacter.ts b/packages/agent/src/text/split/splitRecursivelyAtCharacter.ts
deleted file mode 100644
index 89f1f73..0000000
--- a/packages/agent/src/text/split/splitRecursivelyAtCharacter.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-import { SplitFunction } from "./SplitFunction";
-
-export function splitRecursivelyAtCharacter({
-  maxCharactersPerChunk,
-  text,
-}: {
-  maxCharactersPerChunk: number;
-  text: string;
-}): Array<string> {
-  if (text.length < maxCharactersPerChunk) {
-    return [text];
-  }
-
-  const half = Math.ceil(text.length / 2);
-  const left = text.substring(0, half);
-  const right = text.substring(half);
-
-  return [
-    ...splitRecursivelyAtCharacter({ text: left, maxCharactersPerChunk }),
-    ...splitRecursivelyAtCharacter({ text: right, maxCharactersPerChunk }),
-  ];
-}
-
-splitRecursivelyAtCharacter.asSplitFunction =
-  ({
-    maxCharactersPerChunk,
-  }: {
-    maxCharactersPerChunk: number;
-  }): SplitFunction =>
-  async ({ text }: { text: string }) =>
-    splitRecursivelyAtCharacter({ maxCharactersPerChunk, text });
diff --git a/packages/agent/src/tokenizer/Tokenizer.ts b/packages/agent/src/tokenizer/Tokenizer.ts
new file mode 100644
index 0000000..b5b5ffc
--- /dev/null
+++ b/packages/agent/src/tokenizer/Tokenizer.ts
@@ -0,0 +1,7 @@
+export type Tokenizer = {
+  encode: (text: string) => PromiseLike<{
+    tokens: Array<number>;
+    texts: Array<string>;
+  }>;
+  decode: (tokens: Array<number>) => PromiseLike<string>;
+};
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index cee6020..e8ec1ef 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -63,7 +63,7 @@ importers:
         specifier: '*'
         version: link:../../packages/agent
 
-  examples/split-and-embed:
+  examples/split-and-embed-text:
     dependencies:
       commander:
         specifier: 10.0.1
@@ -110,6 +110,9 @@ importers:
       fastify-type-provider-zod:
         specifier: 1.1.9
         version: 1.1.9(fastify@4.14.1)(zod@3.21.4)
+      gpt3-tokenizer:
+        specifier: 1.1.5
+        version: 1.1.5
       html-to-text:
         specifier: 9.0.5
         version: 9.0.5
@@ -1439,6 +1442,10 @@ packages:
     resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
     dev: true
 
+  /array-keyed-map@2.1.3:
+    resolution: {integrity: sha512-JIUwuFakO+jHjxyp4YgSiKXSZeC0U+R1jR94bXWBcVlFRBycqXlb+kH9JHxBGcxnVuSqx5bnn0Qz9xtSeKOjiA==}
+    dev: false
+
   /asynckit@0.4.0:
     resolution: {integrity: sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==}
 
@@ -2300,6 +2307,13 @@ packages:
     engines: {node: '>=4'}
     dev: true
 
+  /gpt3-tokenizer@1.1.5:
+    resolution: {integrity: sha512-O9iCL8MqGR0Oe9wTh0YftzIbysypNQmS5a5JG3cB3M4LMYjlAVvNnf8LUzVY9MrI7tj+YLY356uHtO2lLX2HpA==}
+    engines: {node: '>=12'}
+    dependencies:
+      array-keyed-map: 2.1.3
+    dev: false
+
   /graceful-fs@4.2.11:
     resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==}
     dev: true