Skip to content

Commit d6b99c8

Browse files
RIscRIptRobertCraigie
authored andcommitted
fix(embeddings): correctly decode base64 data (#1448)
* Fix Core.toFloat32Array, Buffer version According to NodeJS docs Buffer.buffer is not guaranteed to correspond exactly to the original Buffer. [1] The previous implementation could use buffer garbage while converting bytes to floats. [1] https://nodejs.org/api/buffer.html#bufbuffer * add tests for embeddings data * fix formatting --------- Co-authored-by: Robert Craigie <[email protected]>
1 parent 4ba9947 commit d6b99c8

File tree

4 files changed

+55
-9
lines changed

4 files changed

+55
-9
lines changed

src/core.ts

+4-1
Original file line numberDiff line numberDiff line change
@@ -1309,7 +1309,10 @@ export const toBase64 = (str: string | null | undefined): string => {
13091309
export const toFloat32Array = (base64Str: string): Array<number> => {
13101310
if (typeof Buffer !== 'undefined') {
13111311
// for Node.js environment
1312-
return Array.from(new Float32Array(Buffer.from(base64Str, 'base64').buffer));
1312+
const buf = Buffer.from(base64Str, 'base64');
1313+
return Array.from(
1314+
new Float32Array(buf.buffer, buf.byteOffset, buf.length / Float32Array.BYTES_PER_ELEMENT),
1315+
);
13131316
} else {
13141317
// for legacy web platform APIs
13151318
const binaryStr = atob(base64Str);
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"object":"list","data":[{"object":"embedding","index":0,"embedding":"A1fLvaC4Bb0QB7w8yEvrPOm9Xj2r0yA8EW4sPRq75j3Fbiq81/chPumAGb0afqG8R6AFvpzsQT35SPO7Hi39PEMAir1lf0A92McfvRoVlLxQv9o9tHqIvQYlrL0fwlK8sufPPYz2gjzH5Ho93GebvN+eCTxjRjW8PJRKvXMtFD4+n3C9ByMPO39Gkjs1Jm49A1fLPdNXpjv8RLm92McfveKpLz01VNO9SUIevhAHvD0flG09+9srvW5j7Txp8dY8LW4Ju08bJb1GdL29g+aNPWlLBD1p8dY8LkCkvfPLtjxcBj4+1/ehPebv/bz/Ifo8SqkOvREFHzyAr588HbUPPbFS+r00gri825WAPQlcGj1qHZ+8o8EOPo880Tn5dli9zRUSPc2APD0b5RG9mhxEvTyUSj3FQMU95u/9vE20tD3wwBC94NmxvXSUhL3Ofh8904WLPRbeJb2Paja8BClmvhwgOj2e6Ic9em0LPdj1BD3lSau7dJQEPJi107yB6kc97sTKO6lAaD2YDwE9YDuPPSFVC735dtg9SK1IOysJNrwtQkE8BmJxPb2ZXT0hVYs9g+YNvLfuuz2nyhe9z7nHN5UVWDxea5E77F1avTIbyL256oG9ft+hPVWJAbwNoug82TCtvUrm072wgN86JPWGO3TRyTwOY4a8xJwPvkx5DL1f1B68RwkTvja7Q72BrQI9Pfs6PTdfeb3RxG09jJxVvfl22D3eCbQ9FbR6vTPtYrn0mzS+kqGkPDxXhbwyG8i98M9wveayuL1EpL88lNqvve3yL70RQmQ7VcZGPaPBjr1wyEA9fKaWOskMibwNomi8J9Rku9EeGz016Si8O1mivQ38lb0EgxO88P1VvcilmLuNA0a9lj8DvHCceD3lSSs9uFWsve6HBT6XEZ68ShS5PFJSE70dTIK86OvDvSNgsbzS8DU8bPz8PAuVpTxKQIE9/NmOPBhFFj7LsL67PJRKvIxu8LwSqVS8D8yTPSOOlj1g0gG8A+69vYz2AjxPhLK80fLSPbrL/LztWz09LAcZvqfKF73B/JO8lnzIvCk5OLxwMU69dmQCvQtp3bs6hwe9WZKKume4S7x3CLg9zK4hPLsjDT16P6a7MbTXPRp+IT0dtQ89GayGvcngwD2F8bO70R4bu8tFlDxcBr67xAWdvdnWfzzQTIC9zn6fPYSKwz3alx28h8GxPW74wj3eNxk+xUBFvIpjyj0WdRi9AkoIPXhvqLugx+U8F0ezvUlCHjx3NAC9uvlhPEOmXD36oAM9D56uvddgrz2giiC9GhWUvHrWGLv0yRk8fOPbvMc+KLs7//S8v5UjPJUV2D0KLjW6YKa5PDciNDuJznQ9USZLPQ=="}],"model":"text-embedding-3-large","usage":{"prompt_tokens":1,"total_tokens":1}}
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"object":"list","data":[{"object":"embedding","index":0,"embedding":[-0.099287055,-0.032646775,0.022952586,0.028722659,0.05438033,0.009816091,0.042097155,0.112661555,-0.010402386,0.158172,-0.037476454,-0.01971345,-0.13049422,0.04734479,-0.0074244705,0.030905303,-0.06738331,0.046996493,-0.039008945,-0.018076468,0.10681021,-0.06664029,-0.08405499,-0.012863665,0.10151614,0.015986703,0.061253335,-0.018970422,0.008399694,-0.011064145,-0.049457774,0.14470463,-0.058745615,0.0021840946,0.00446397,0.058141906,0.099287055,0.0050763874,-0.09046361,-0.039008945,0.042886622,-0.103187956,-0.15454973,0.091810346,0.058002587,-0.041957837,0.028978076,0.02623816,-0.002097021,-0.040309247,-0.09250693,0.06928732,0.03229848,0.02623816,-0.08020054,0.022314047,0.18557113,0.079086,-0.030998182,0.030533789,-0.034829415,0.009705798,0.019492865,0.035084832,-0.122228034,-0.022523023,0.06278583,0.037685428,-0.019423205,0.13941054,0.00039908706,-0.052847836,0.035665322,0.04602127,-0.035618883,-0.04787884,0.049457774,0.096314944,-0.030998182,0.08823452,-0.03534025,-0.086841345,-0.06473628,0.03893929,0.06812634,-0.040495,-0.011133804,-0.22476584,0.045440778,0.06636165,0.03403995,0.032461017,-0.005227315,0.008092035,-0.025843427,0.048807625,0.0061880266,0.05670229,0.031509012,0.06993747,-0.034016732,0.10569567,0.0030620862,-0.011110584,0.011795563,0.058931373,0.054101694,0.068033464,-0.008660915,0.091763906,-0.0370585,0.000023809172,0.013188739,0.004437848,-0.053312227,-0.09770812,-0.06343598,0.07903956,-0.007906278,0.028397584,-0.084565826,-0.103466585,0.0017051902,0.0041185785,0.024636008,-0.016404655,-0.14024645,-0.034295365,-0.009694188,-0.14359008,-0.04778596,0.031903747,0.045649756,-0.06088182,0.058049027,-0.052151248,0.10569567,0.087909445,-0.061206896,-0.00021641403,-0.17637616,0.020096574,-0.016276948,-0.09770812,-0.058792055,-0.09018497,0.023393758,-0.08586612,-0.04295628,0.0034829418,0.048528988,-0.06970527,0.047066152,0.0011493708,-0.01672973,-0.014198792,-0.0034916492,0.037871186,-0.010309507,-0.079271756,-0.073234655,-0.0090034045,-0.052244127,-0.0046584345,-0.04834323,-0.008010766,0.060696065,0.04181852,-0.08414787,0.13040134,-0.019295497,0.022592682,-0.03596718,-0.015905434,-0.0956648,-0.021652287,0.011104779,0.030882083,0.02021267,0.0631109,0.017437927,0.14674795,-0.005819415,-0.012364443,-0.029349588,-0.012979763,0.072166555,0.07351329,-0.007923692,-0.09273913,0.007993352,-0.021791605,0.1030022,-0.030858863,0.046230245,-0.14944142,-0.0370585,-0.018064858,-0.02447347,-0.011244097,-0.050340116,-0.03183409,-0.006756907,-0.033087946,-0.001057218,-0.012434102,0.089859895,0.009868335,0.034457903,-0.005073485,0.10532416,0.0394269,0.035084832,-0.06575794,0.09417874,-0.005491438,-0.002366949,0.018099686,-0.005799098,-0.07667115,0.0156151885,-0.06264651,0.07787858,0.09547904,-0.009618724,0.086794905,0.095200405,0.14962718,-0.012039368,0.09882267,-0.037221037,0.033273704,-0.0051402412,0.02804929,-0.08753794,0.009659358,-0.031300034,0.01379245,0.053869497,0.03213594,-0.08526241,0.085633926,-0.039194703,-0.018076468,-0.0023321197,0.009386528,-0.026841871,-0.0025672184,-0.02990686,0.009984433,0.105509914,-0.00069114624,0.022662342,0.0027486214,0.05976728,0.04959709]}],"model":"text-embedding-3-large","usage":{"prompt_tokens":1,"total_tokens":1}}

tests/api-resources/embeddings.test.ts

+49-8
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
import OpenAI from 'openai';
44
import { Response } from 'node-fetch';
5+
import { mockFetch } from '../utils/mock-fetch';
6+
import fs from 'fs/promises';
7+
import Path from 'path';
58

69
const client = new OpenAI({
710
apiKey: 'My API Key',
@@ -33,34 +36,72 @@ describe('resource embeddings', () => {
3336
});
3437
});
3538

36-
test('create: encoding_format=float should create float32 embeddings', async () => {
39+
test('create: encoding_format=default should create float32 embeddings', async () => {
40+
const client = makeClient();
3741
const response = await client.embeddings.create({
3842
input: 'The quick brown fox jumped over the lazy dog',
3943
model: 'text-embedding-3-small',
4044
});
4145

4246
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
43-
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
47+
expect(response.data?.at(0)?.embedding.at(0)).toBe(-0.09928705543279648);
4448
});
4549

46-
test('create: encoding_format=base64 should create float32 embeddings', async () => {
50+
test('create: encoding_format=float should create float32 embeddings', async () => {
51+
const client = makeClient();
4752
const response = await client.embeddings.create({
4853
input: 'The quick brown fox jumped over the lazy dog',
4954
model: 'text-embedding-3-small',
50-
encoding_format: 'base64',
55+
encoding_format: 'float',
5156
});
5257

5358
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
54-
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
59+
expect(response.data?.at(0)?.embedding.at(0)).toBe(-0.099287055);
5560
});
5661

57-
test('create: encoding_format=default should create float32 embeddings', async () => {
62+
test('create: encoding_format=base64 should return base64 embeddings', async () => {
63+
const client = makeClient();
5864
const response = await client.embeddings.create({
5965
input: 'The quick brown fox jumped over the lazy dog',
6066
model: 'text-embedding-3-small',
67+
encoding_format: 'base64',
6168
});
6269

63-
expect(response.data?.at(0)?.embedding).toBeInstanceOf(Array);
64-
expect(Number.isFinite(response.data?.at(0)?.embedding.at(0))).toBe(true);
70+
expect(typeof response.data?.at(0)?.embedding).toBe('string');
6571
});
6672
});
73+
74+
function makeClient(): OpenAI {
75+
const { fetch, handleRequest } = mockFetch();
76+
77+
handleRequest(async (_, init) => {
78+
const format = (JSON.parse(init!.body as string) as OpenAI.EmbeddingCreateParams).encoding_format;
79+
return new Response(
80+
await fs.readFile(
81+
Path.join(
82+
__dirname,
83+
84+
// these responses were taken from the live API with:
85+
//
86+
// model: 'text-embedding-3-large',
87+
// input: 'h',
88+
// dimensions: 256,
89+
90+
format === 'base64' ? 'embeddings-base64-response.json' : 'embeddings-float-response.json',
91+
),
92+
),
93+
{
94+
status: 200,
95+
headers: {
96+
'Content-Type': 'application/json',
97+
},
98+
},
99+
);
100+
});
101+
102+
return new OpenAI({
103+
fetch,
104+
apiKey: 'My API Key',
105+
baseURL: process.env['TEST_API_BASE_URL'] ?? 'http://127.0.0.1:4010',
106+
});
107+
}

0 commit comments

Comments
 (0)