@@ -439,7 +439,7 @@ export type TiktokenModel =
439
439
| "gpt-4o-realtime-preview-2024-10-01"
440
440
441
441
/**
442
- * @param {TiktokenModel} encoding
442
+ * @param {TiktokenModel} model
443
443
* @param {Record<string, number>} [extend_special_tokens]
444
444
* @returns {Tiktoken}
445
445
*/
@@ -452,84 +452,8 @@ pub fn encoding_for_model(
452
452
model : & str ,
453
453
extend_special_tokens : JsValue ,
454
454
) -> Result < Tiktoken , JsError > {
455
- let encoding = match model {
456
- "text-davinci-003" => Ok ( "p50k_base" ) ,
457
- "text-davinci-002" => Ok ( "p50k_base" ) ,
458
- "text-davinci-001" => Ok ( "r50k_base" ) ,
459
- "text-curie-001" => Ok ( "r50k_base" ) ,
460
- "text-babbage-001" => Ok ( "r50k_base" ) ,
461
- "text-ada-001" => Ok ( "r50k_base" ) ,
462
- "davinci" => Ok ( "r50k_base" ) ,
463
- "davinci-002" => Ok ( "cl100k_base" ) ,
464
- "curie" => Ok ( "r50k_base" ) ,
465
- "babbage" => Ok ( "r50k_base" ) ,
466
- "babbage-002" => Ok ( "cl100k_base" ) ,
467
- "ada" => Ok ( "r50k_base" ) ,
468
- "code-davinci-002" => Ok ( "p50k_base" ) ,
469
- "code-davinci-001" => Ok ( "p50k_base" ) ,
470
- "code-cushman-002" => Ok ( "p50k_base" ) ,
471
- "code-cushman-001" => Ok ( "p50k_base" ) ,
472
- "davinci-codex" => Ok ( "p50k_base" ) ,
473
- "cushman-codex" => Ok ( "p50k_base" ) ,
474
- "text-davinci-edit-001" => Ok ( "p50k_edit" ) ,
475
- "code-davinci-edit-001" => Ok ( "p50k_edit" ) ,
476
- "text-embedding-ada-002" => Ok ( "cl100k_base" ) ,
477
- "text-embedding-3-small" => Ok ( "cl100k_base" ) ,
478
- "text-embedding-3-large" => Ok ( "cl100k_base" ) ,
479
- "text-similarity-davinci-001" => Ok ( "r50k_base" ) ,
480
- "text-similarity-curie-001" => Ok ( "r50k_base" ) ,
481
- "text-similarity-babbage-001" => Ok ( "r50k_base" ) ,
482
- "text-similarity-ada-001" => Ok ( "r50k_base" ) ,
483
- "text-search-davinci-doc-001" => Ok ( "r50k_base" ) ,
484
- "text-search-curie-doc-001" => Ok ( "r50k_base" ) ,
485
- "text-search-babbage-doc-001" => Ok ( "r50k_base" ) ,
486
- "text-search-ada-doc-001" => Ok ( "r50k_base" ) ,
487
- "code-search-babbage-code-001" => Ok ( "r50k_base" ) ,
488
- "code-search-ada-code-001" => Ok ( "r50k_base" ) ,
489
- "gpt2" => Ok ( "gpt2" ) ,
490
- "gpt-3.5-turbo" => Ok ( "cl100k_base" ) ,
491
- "gpt-3.5-turbo-0301" => Ok ( "cl100k_base" ) ,
492
- "gpt-3.5-turbo-0613" => Ok ( "cl100k_base" ) ,
493
- "gpt-3.5-turbo-16k" => Ok ( "cl100k_base" ) ,
494
- "gpt-3.5-turbo-16k-0613" => Ok ( "cl100k_base" ) ,
495
- "gpt-3.5-turbo-instruct" => Ok ( "cl100k_base" ) ,
496
- "gpt-3.5-turbo-instruct-0914" => Ok ( "cl100k_base" ) ,
497
- "gpt-4" => Ok ( "cl100k_base" ) ,
498
- "gpt-4-0314" => Ok ( "cl100k_base" ) ,
499
- "gpt-4-0613" => Ok ( "cl100k_base" ) ,
500
- "gpt-4-32k" => Ok ( "cl100k_base" ) ,
501
- "gpt-4-32k-0314" => Ok ( "cl100k_base" ) ,
502
- "gpt-4-32k-0613" => Ok ( "cl100k_base" ) ,
503
- "gpt-3.5-turbo-1106" => Ok ( "cl100k_base" ) ,
504
- "gpt-35-turbo" => Ok ( "cl100k_base" ) ,
505
- "gpt-4-1106-preview" => Ok ( "cl100k_base" ) ,
506
- "gpt-4-vision-preview" => Ok ( "cl100k_base" ) ,
507
- "gpt-3.5-turbo-0125" => Ok ( "cl100k_base" ) ,
508
- "gpt-4-turbo" => Ok ( "cl100k_base" ) ,
509
- "gpt-4-turbo-2024-04-09" => Ok ( "cl100k_base" ) ,
510
- "gpt-4-turbo-preview" => Ok ( "cl100k_base" ) ,
511
- "gpt-4-0125-preview" => Ok ( "cl100k_base" ) ,
512
- "gpt-4o" => Ok ( "o200k_base" ) ,
513
- "gpt-4o-2024-05-13" => Ok ( "o200k_base" ) ,
514
- "gpt-4o-2024-08-06" => Ok ( "o200k_base" ) ,
515
- "gpt-4o-2024-11-20" => Ok ( "o200k_base" ) ,
516
- "gpt-4o-mini-2024-07-18" => Ok ( "o200k_base" ) ,
517
- "gpt-4o-mini" => Ok ( "o200k_base" ) ,
518
- "o1" => Ok ( "o200k_base" ) ,
519
- "o1-2024-12-17" => Ok ( "o200k_base" ) ,
520
- "o1-mini" => Ok ( "o200k_base" ) ,
521
- "o1-preview" => Ok ( "o200k_base" ) ,
522
- "o1-preview-2024-09-12" => Ok ( "o200k_base" ) ,
523
- "o1-mini-2024-09-12" => Ok ( "o200k_base" ) ,
524
- "chatgpt-4o-latest" => Ok ( "o200k_base" ) ,
525
- "gpt-4o-realtime" => Ok ( "o200k_base" ) ,
526
- "gpt-4o-realtime-preview-2024-10-01" => Ok ( "o200k_base" ) ,
527
- "o3-mini" => Ok ( "o200k_base" ) ,
528
- "o3-mini-2025-01-31" => Ok ( "o200k_base" ) ,
529
- model => Err ( JsError :: new (
530
- format ! ( "Invalid model: {}" , model. to_string( ) ) . as_str ( ) ,
531
- ) ) ,
532
- } ?;
455
+ let binding = get_encoding_name_for_model ( model) ?;
456
+ let encoding = binding. as_str ( ) ;
533
457
534
458
Tiktoken :: with_encoding (
535
459
encoding,
@@ -538,3 +462,96 @@ pub fn encoding_for_model(
538
462
. ok ( ) ,
539
463
)
540
464
}
465
+
466
+ #[ cfg( feature = "inline" ) ]
467
+ #[ wasm_bindgen( typescript_custom_section) ]
468
+ const _: & ' static str = r#"
469
+ /**
470
+ * @param {TiktokenModel} model
471
+ * @returns {TiktokenEncoding}
472
+ */
473
+ export function get_encoding_name_for_model(model: TiktokenModel): TiktokenEncoding;
474
+ "# ;
475
+
476
+ #[ cfg( feature="inline" ) ]
477
+ #[ wasm_bindgen( skip_typescript) ]
478
+ pub fn get_encoding_name_for_model ( model : & str ) -> Result < String , JsError > {
479
+ match model {
480
+ "text-davinci-003" => Ok ( "p50k_base" . into ( ) ) ,
481
+ "text-davinci-002" => Ok ( "p50k_base" . into ( ) ) ,
482
+ "text-davinci-001" => Ok ( "r50k_base" . into ( ) ) ,
483
+ "text-curie-001" => Ok ( "r50k_base" . into ( ) ) ,
484
+ "text-babbage-001" => Ok ( "r50k_base" . into ( ) ) ,
485
+ "text-ada-001" => Ok ( "r50k_base" . into ( ) ) ,
486
+ "davinci" => Ok ( "r50k_base" . into ( ) ) ,
487
+ "davinci-002" => Ok ( "cl100k_base" . into ( ) ) ,
488
+ "curie" => Ok ( "r50k_base" . into ( ) ) ,
489
+ "babbage" => Ok ( "r50k_base" . into ( ) ) ,
490
+ "babbage-002" => Ok ( "cl100k_base" . into ( ) ) ,
491
+ "ada" => Ok ( "r50k_base" . into ( ) ) ,
492
+ "code-davinci-002" => Ok ( "p50k_base" . into ( ) ) ,
493
+ "code-davinci-001" => Ok ( "p50k_base" . into ( ) ) ,
494
+ "code-cushman-002" => Ok ( "p50k_base" . into ( ) ) ,
495
+ "code-cushman-001" => Ok ( "p50k_base" . into ( ) ) ,
496
+ "davinci-codex" => Ok ( "p50k_base" . into ( ) ) ,
497
+ "cushman-codex" => Ok ( "p50k_base" . into ( ) ) ,
498
+ "text-davinci-edit-001" => Ok ( "p50k_edit" . into ( ) ) ,
499
+ "code-davinci-edit-001" => Ok ( "p50k_edit" . into ( ) ) ,
500
+ "text-embedding-ada-002" => Ok ( "cl100k_base" . into ( ) ) ,
501
+ "text-embedding-3-small" => Ok ( "cl100k_base" . into ( ) ) ,
502
+ "text-embedding-3-large" => Ok ( "cl100k_base" . into ( ) ) ,
503
+ "text-similarity-davinci-001" => Ok ( "r50k_base" . into ( ) ) ,
504
+ "text-similarity-curie-001" => Ok ( "r50k_base" . into ( ) ) ,
505
+ "text-similarity-babbage-001" => Ok ( "r50k_base" . into ( ) ) ,
506
+ "text-similarity-ada-001" => Ok ( "r50k_base" . into ( ) ) ,
507
+ "text-search-davinci-doc-001" => Ok ( "r50k_base" . into ( ) ) ,
508
+ "text-search-curie-doc-001" => Ok ( "r50k_base" . into ( ) ) ,
509
+ "text-search-babbage-doc-001" => Ok ( "r50k_base" . into ( ) ) ,
510
+ "text-search-ada-doc-001" => Ok ( "r50k_base" . into ( ) ) ,
511
+ "code-search-babbage-code-001" => Ok ( "r50k_base" . into ( ) ) ,
512
+ "code-search-ada-code-001" => Ok ( "r50k_base" . into ( ) ) ,
513
+ "gpt2" => Ok ( "gpt2" . into ( ) ) ,
514
+ "gpt-3.5-turbo" => Ok ( "cl100k_base" . into ( ) ) ,
515
+ "gpt-3.5-turbo-0301" => Ok ( "cl100k_base" . into ( ) ) ,
516
+ "gpt-3.5-turbo-0613" => Ok ( "cl100k_base" . into ( ) ) ,
517
+ "gpt-3.5-turbo-16k" => Ok ( "cl100k_base" . into ( ) ) ,
518
+ "gpt-3.5-turbo-16k-0613" => Ok ( "cl100k_base" . into ( ) ) ,
519
+ "gpt-3.5-turbo-instruct" => Ok ( "cl100k_base" . into ( ) ) ,
520
+ "gpt-3.5-turbo-instruct-0914" => Ok ( "cl100k_base" . into ( ) ) ,
521
+ "gpt-4" => Ok ( "cl100k_base" . into ( ) ) ,
522
+ "gpt-4-0314" => Ok ( "cl100k_base" . into ( ) ) ,
523
+ "gpt-4-0613" => Ok ( "cl100k_base" . into ( ) ) ,
524
+ "gpt-4-32k" => Ok ( "cl100k_base" . into ( ) ) ,
525
+ "gpt-4-32k-0314" => Ok ( "cl100k_base" . into ( ) ) ,
526
+ "gpt-4-32k-0613" => Ok ( "cl100k_base" . into ( ) ) ,
527
+ "gpt-3.5-turbo-1106" => Ok ( "cl100k_base" . into ( ) ) ,
528
+ "gpt-35-turbo" => Ok ( "cl100k_base" . into ( ) ) ,
529
+ "gpt-4-1106-preview" => Ok ( "cl100k_base" . into ( ) ) ,
530
+ "gpt-4-vision-preview" => Ok ( "cl100k_base" . into ( ) ) ,
531
+ "gpt-3.5-turbo-0125" => Ok ( "cl100k_base" . into ( ) ) ,
532
+ "gpt-4-turbo" => Ok ( "cl100k_base" . into ( ) ) ,
533
+ "gpt-4-turbo-2024-04-09" => Ok ( "cl100k_base" . into ( ) ) ,
534
+ "gpt-4-turbo-preview" => Ok ( "cl100k_base" . into ( ) ) ,
535
+ "gpt-4-0125-preview" => Ok ( "cl100k_base" . into ( ) ) ,
536
+ "gpt-4o" => Ok ( "o200k_base" . into ( ) ) ,
537
+ "gpt-4o-2024-05-13" => Ok ( "o200k_base" . into ( ) ) ,
538
+ "gpt-4o-2024-08-06" => Ok ( "o200k_base" . into ( ) ) ,
539
+ "gpt-4o-2024-11-20" => Ok ( "o200k_base" . into ( ) ) ,
540
+ "gpt-4o-mini-2024-07-18" => Ok ( "o200k_base" . into ( ) ) ,
541
+ "gpt-4o-mini" => Ok ( "o200k_base" . into ( ) ) ,
542
+ "o1" => Ok ( "o200k_base" . into ( ) ) ,
543
+ "o1-2024-12-17" => Ok ( "o200k_base" . into ( ) ) ,
544
+ "o1-mini" => Ok ( "o200k_base" . into ( ) ) ,
545
+ "o1-preview" => Ok ( "o200k_base" . into ( ) ) ,
546
+ "o1-preview-2024-09-12" => Ok ( "o200k_base" . into ( ) ) ,
547
+ "o1-mini-2024-09-12" => Ok ( "o200k_base" . into ( ) ) ,
548
+ "chatgpt-4o-latest" => Ok ( "o200k_base" . into ( ) ) ,
549
+ "gpt-4o-realtime" => Ok ( "o200k_base" . into ( ) ) ,
550
+ "gpt-4o-realtime-preview-2024-10-01" => Ok ( "o200k_base" . into ( ) ) ,
551
+ "o3-mini" => Ok ( "o200k_base" . into ( ) ) ,
552
+ "o3-mini-2025-01-31" => Ok ( "o200k_base" . into ( ) ) ,
553
+ model => Err ( JsError :: new (
554
+ format ! ( "Invalid model: {}" , model. to_string( ) ) . as_str ( ) ,
555
+ ) ) ,
556
+ }
557
+ }
0 commit comments