diff --git a/package-lock.json b/package-lock.json index df56d3a..8e6519f 100644 --- a/package-lock.json +++ b/package-lock.json @@ -15,6 +15,7 @@ "@types/jest": "^29.5.3", "jest": "^29.6.1", "openai": "^4.2.0", + "prettier": "^3.0.2", "ts-jest": "^29.1.1", "typescript": "^5.1.6" } @@ -3253,6 +3254,21 @@ "node": ">=8" } }, + "node_modules/prettier": { + "version": "3.0.2", + "resolved": "https://registry.npmjs.org/prettier/-/prettier-3.0.2.tgz", + "integrity": "sha512-o2YR9qtniXvwEZlOKbveKfDQVyqxbEIWn48Z8m3ZJjBjcCmUy3xZGIv+7AkaeuaTr6yPXJjwv07ZWlsWbEy1rQ==", + "dev": true, + "bin": { + "prettier": "bin/prettier.cjs" + }, + "engines": { + "node": ">=14" + }, + "funding": { + "url": "https://github.com/prettier/prettier?sponsor=1" + } + }, "node_modules/pretty-format": { "version": "29.6.1", "resolved": "https://registry.npmjs.org/pretty-format/-/pretty-format-29.6.1.tgz", diff --git a/package.json b/package.json index 3d79bdf..a691555 100644 --- a/package.json +++ b/package.json @@ -10,7 +10,8 @@ "scripts": { "test": "jest", "build": "tsc", - "prepublishOnly": "npm run build" + "prepublishOnly": "npm run build", + "lint": "prettier --check src tests" }, "author": { "email": "harry@hmarr.com", @@ -26,10 +27,14 @@ "@types/jest": "^29.5.3", "jest": "^29.6.1", "openai": "^4.2.0", + "prettier": "^3.0.2", "ts-jest": "^29.1.1", "typescript": "^5.1.6" }, "dependencies": { "js-tiktoken": "^1.0.7" + }, + "prettier": { + "trailingComma": "all" } -} +} \ No newline at end of file diff --git a/src/functions.ts b/src/functions.ts index 94498c0..a5a5eb5 100644 --- a/src/functions.ts +++ b/src/functions.ts @@ -22,24 +22,24 @@ interface ObjectProp { type Prop = { description?: string; } & ( - | ObjectProp - | { + | ObjectProp + | { type: "string"; enum?: string[]; } - | { + | { type: "number" | "integer"; minimum?: number; maximum?: number; enum?: number[]; } - | { type: "boolean" } - | { type: "null" } - | { + | { type: "boolean" } + | { type: "null" } + | { type: "array"; items?: Prop; } - ); +); // When OpenAI use functions in the prompt, they format them as TypeScript definitions rather than OpenAPI JSON schemas. // This function converts the JSON schemas into TypeScript definitions. @@ -75,7 +75,7 @@ function formatObjectProperties(obj: ObjectProp, indent: number): string { lines.push(`${name}?: ${formatType(param, indent)},`); } } - return lines.map(line => ' '.repeat(indent) + line).join("\n"); + return lines.map((line) => " ".repeat(indent) + line).join("\n"); } // Format a single property type @@ -108,4 +108,4 @@ function formatType(param: Prop, indent: number): string { } return "any[]"; } -} \ No newline at end of file +} diff --git a/src/index.ts b/src/index.ts index 50a41ab..58dd2d0 100644 --- a/src/index.ts +++ b/src/index.ts @@ -14,17 +14,25 @@ let encoder: Tiktoken | undefined; * @param {Function[]} prompt.functions OpenAI function definitions * @returns An estimate for the number of tokens the prompt will use */ -export function promptTokensEstimate({ messages, functions }: { messages: Message[], functions?: Function[] }): number { +export function promptTokensEstimate({ + messages, + functions, +}: { + messages: Message[]; + functions?: Function[]; +}): number { // It appears that if functions are present, the first system message is padded with a trailing newline. This // was inferred by trying lots of combinations of messages and functions and seeing what the token counts were. let paddedSystem = false; - let tokens = messages.map(m => { - if (m.role === "system" && functions && !paddedSystem) { - m = { ...m, content: m.content + "\n" } - paddedSystem = true; - } - return messageTokensEstimate(m); - }).reduce((a, b) => a + b, 0); + let tokens = messages + .map((m) => { + if (m.role === "system" && functions && !paddedSystem) { + m = { ...m, content: m.content + "\n" }; + paddedSystem = true; + } + return messageTokensEstimate(m); + }) + .reduce((a, b) => a + b, 0); // Each completion (vs message) seems to carry a 3-token overhead tokens += 3; @@ -37,7 +45,7 @@ export function promptTokensEstimate({ messages, functions }: { messages: Messag // If there's a system message _and_ functions are present, subtract four tokens. I assume this is because // functions typically add a system message, but reuse the first one if it's already there. This offsets // the extra 9 tokens added by the function definitions. - if (functions && messages.find(m => m.role === "system")) { + if (functions && messages.find((m) => m.role === "system")) { tokens -= 4; } @@ -68,7 +76,7 @@ export function messageTokensEstimate(message: Message): number { message.content, message.name, message.function_call?.name, - message.function_call?.arguments + message.function_call?.arguments, ].filter((v): v is string => !!v); let tokens = components.map(stringTokens).reduce((a, b) => a + b, 0); tokens += 3; // Add three per message @@ -85,7 +93,7 @@ export function messageTokensEstimate(message: Message): number { } /** - * Estimate the number of tokens a function definition will use. Note that using the function definition within + * Estimate the number of tokens a function definition will use. Note that using the function definition within * a prompt will add extra tokens, so you might want to use `promptTokensEstimate` instead. * @param funcs An array of OpenAI function definitions * @returns An estimate for the number of tokens the function definitions will use diff --git a/tests/token-counts.test.ts b/tests/token-counts.test.ts index a152dd0..2d9ec03 100644 --- a/tests/token-counts.test.ts +++ b/tests/token-counts.test.ts @@ -7,7 +7,7 @@ type Example = { messages: Message[]; functions?: Function[]; tokens: number; - validate?: boolean + validate?: boolean; }; const TEST_CASES: Example[] = [ @@ -76,27 +76,19 @@ const TEST_CASES: Example[] = [ }, // these are all random test cases below { - messages: [ - { role: "user", content: "hello" } - ], - tokens: 8 + messages: [{ role: "user", content: "hello" }], + tokens: 8, }, { - messages: [ - { role: "user", content: "hello world" } - ], - tokens: 9 + messages: [{ role: "user", content: "hello world" }], + tokens: 9, }, { - messages: [ - { role: "system", content: "hello" } - ], + messages: [{ role: "system", content: "hello" }], tokens: 8, }, { - messages: [ - { role: "system", content: "hello:" } - ], + messages: [{ role: "system", content: "hello:" }], tokens: 9, }, { @@ -105,37 +97,31 @@ const TEST_CASES: Example[] = [ { role: "user", content: "hello robot" }, { role: "assistant", content: "hello world" }, ], - tokens: 27 + tokens: 27, }, { - messages: [ - { role: "user", content: "hello" } - ], + messages: [{ role: "user", content: "hello" }], functions: [ { name: "foo", - parameters: { type: "object", properties: {} } - } + parameters: { type: "object", properties: {} }, + }, ], - tokens: 31 + tokens: 31, }, { - messages: [ - { role: "user", content: "hello" } - ], + messages: [{ role: "user", content: "hello" }], functions: [ { name: "foo", description: "Do a foo", - parameters: { type: "object", properties: {} } - } + parameters: { type: "object", properties: {} }, + }, ], - tokens: 36 + tokens: 36, }, { - messages: [ - { role: "user", content: "hello" } - ], + messages: [{ role: "user", content: "hello" }], functions: [ { name: "bing_bong", @@ -144,16 +130,14 @@ const TEST_CASES: Example[] = [ type: "object", properties: { foo: { type: "string" }, - } - } - } + }, + }, + }, ], - tokens: 49 + tokens: 49, }, { - messages: [ - { role: "user", content: "hello" } - ], + messages: [{ role: "user", content: "hello" }], functions: [ { name: "bing_bong", @@ -163,16 +147,14 @@ const TEST_CASES: Example[] = [ properties: { foo: { type: "string" }, bar: { type: "number", description: "A number" }, - } - } - } + }, + }, + }, ], tokens: 57, }, { - messages: [ - { role: "user", content: "hello" } - ], + messages: [{ role: "user", content: "hello" }], functions: [ { name: "bing_bong", @@ -184,12 +166,12 @@ const TEST_CASES: Example[] = [ type: "object", properties: { bar: { type: "string", enum: ["a", "b", "c"] }, - baz: { type: "boolean" } - } + baz: { type: "boolean" }, + }, }, - } - } - } + }, + }, + }, ], tokens: 68, }, @@ -203,100 +185,122 @@ const TEST_CASES: Example[] = [ { messages: [ { role: "user", content: "hello world" }, - { role: "function", name: "do_stuff", content: `{"foo": "bar", "baz": 1.5}` }, + { + role: "function", + name: "do_stuff", + content: `{"foo": "bar", "baz": 1.5}`, + }, ], tokens: 28, }, { messages: [ - { role: "function", name: "dance_the_tango", content: `{"a": { "b" : { "c": false}}}` }, + { + role: "function", + name: "dance_the_tango", + content: `{"a": { "b" : { "c": false}}}`, + }, ], tokens: 24, }, { messages: [ - { role: "assistant", content: "", function_call: { name: "do_stuff", arguments: `{"foo": "bar", "baz": 1.5}` } }, + { + role: "assistant", + content: "", + function_call: { + name: "do_stuff", + arguments: `{"foo": "bar", "baz": 1.5}`, + }, + }, ], tokens: 26, }, { messages: [ - { role: "assistant", content: "", function_call: { name: "do_stuff", arguments: `{"foo":"bar", "baz":\n\n 1.5}` } }, + { + role: "assistant", + content: "", + function_call: { + name: "do_stuff", + arguments: `{"foo":"bar", "baz":\n\n 1.5}`, + }, + }, ], tokens: 25, }, { messages: [ - { "role": "system", "content": "Hello" }, - { "role": "user", "content": "Hi there" }, + { role: "system", content: "Hello" }, + { role: "user", content: "Hi there" }, ], functions: [ { - "name": "do_stuff", - "parameters": { "type": "object", "properties": {} } - } + name: "do_stuff", + parameters: { type: "object", properties: {} }, + }, ], tokens: 35, }, { messages: [ - { "role": "system", "content": "Hello:" }, - { "role": "user", "content": "Hi there" }, + { role: "system", content: "Hello:" }, + { role: "user", content: "Hi there" }, ], functions: [ - { "name": "do_stuff", "parameters": { "type": "object", "properties": {} } } + { name: "do_stuff", parameters: { type: "object", properties: {} } }, ], tokens: 35, }, { messages: [ - { "role": "system", "content": "Hello:" }, - { "role": "system", "content": "Hello" }, - { "role": "user", "content": "Hi there" }, + { role: "system", content: "Hello:" }, + { role: "system", content: "Hello" }, + { role: "user", content: "Hi there" }, ], functions: [ - { "name": "do_stuff", "parameters": { "type": "object", "properties": {} } } + { name: "do_stuff", parameters: { type: "object", properties: {} } }, ], tokens: 40, }, { - messages: [{ role: 'user', content: 'hello' }], + messages: [{ role: "user", content: "hello" }], functions: [ { - name: 'get_recipe', + name: "get_recipe", parameters: { - type: 'object', - required: ['ingredients', 'instructions', 'time_to_cook'], + type: "object", + required: ["ingredients", "instructions", "time_to_cook"], properties: { ingredients: { - type: 'array', + type: "array", items: { - type: 'object', - required: ['name', 'unit', 'amount'], + type: "object", + required: ["name", "unit", "amount"], properties: { name: { - type: 'string', + type: "string", }, unit: { - enum: ['grams', 'ml', 'cups', 'pieces', 'teaspoons'], - type: 'string', + enum: ["grams", "ml", "cups", "pieces", "teaspoons"], + type: "string", }, amount: { - type: 'number', + type: "number", }, }, }, }, instructions: { - type: 'array', + type: "array", items: { - type: 'string', + type: "string", }, - description: 'Steps to prepare the recipe (no numbering)', + description: "Steps to prepare the recipe (no numbering)", }, time_to_cook: { - type: 'number', - description: 'Total time to prepare the recipe in minutes', + type: "number", + description: "Total time to prepare the recipe in minutes", }, }, }, @@ -305,101 +309,104 @@ const TEST_CASES: Example[] = [ tokens: 106, }, { - messages: [{ role: 'user', content: 'hello' }], + messages: [{ role: "user", content: "hello" }], functions: [ { - name: 'function', - description: 'description', + name: "function", + description: "description", parameters: { - type: 'object', + type: "object", properties: { quality: { - type: 'object', + type: "object", properties: { pros: { - type: 'array', + type: "array", items: { - type: 'string', + type: "string", }, - description: 'Write 3 points why this text is well written', - } + description: "Write 3 points why this text is well written", + }, }, - } + }, }, - } - }], + }, + }, + ], tokens: 46, }, { - messages: [{ role: 'user', content: 'hello' }], + messages: [{ role: "user", content: "hello" }], functions: [ { - name: 'function', - description: 'desctiption1', + name: "function", + description: "desctiption1", parameters: { - type: 'object', - description: 'desctiption2', + type: "object", + description: "desctiption2", properties: { mainField: { - type: 'string', - description: 'description3', + type: "string", + description: "description3", }, - 'field number one': { - type: 'object', - description: 'description4', + "field number one": { + type: "object", + description: "description4", properties: { yesNoField: { - type: 'string', - description: 'description5', - enum: [ - 'Yes', - 'No', - ], + type: "string", + description: "description5", + enum: ["Yes", "No"], }, howIsInteresting: { - type: 'string', - description: 'description6', + type: "string", + description: "description6", }, scoreInteresting: { - type: 'number', - description: 'description7', + type: "number", + description: "description7", }, isInteresting: { - type: 'string', - description: 'description8', - enum: [ - 'Yes', - 'No', - ], + type: "string", + description: "description8", + enum: ["Yes", "No"], }, }, }, }, - - } - } + }, + }, ], tokens: 96, - } + }, ]; const validateAll = false; const openAITimeout = 10000; describe.each(TEST_CASES)("token counts (%j)", (example) => { - const validateTest = ((validateAll || example.validate) ? test : test.skip) - validateTest("test data matches openai", async () => { - const openai = new OpenAI(); - const response = await openai.chat.completions.create({ - model: "gpt-3.5-turbo", - messages: example.messages, - functions: example.functions as any, - max_tokens: 10, - }); - expect(response.usage?.prompt_tokens).toBe(example.tokens); - }, openAITimeout); + const validateTest = validateAll || example.validate ? test : test.skip; + validateTest( + "test data matches openai", + async () => { + const openai = new OpenAI(); + const response = await openai.chat.completions.create({ + model: "gpt-3.5-turbo", + messages: example.messages, + functions: example.functions as any, + max_tokens: 10, + }); + expect(response.usage?.prompt_tokens).toBe(example.tokens); + }, + openAITimeout, + ); test("estimate is correct", async () => { - expect(promptTokensEstimate({ messages: example.messages, functions: example.functions })).toBe(example.tokens); + expect( + promptTokensEstimate({ + messages: example.messages, + functions: example.functions, + }), + ).toBe(example.tokens); }); -}) \ No newline at end of file +});