Release 7.7.0

cohere-ai · Jan 17, 2024 · 6f1a6af · 6f1a6af
1 parent c2eceb4
commit 6f1a6af
Show file tree

Hide file tree

Showing 150 changed files with 2,598 additions and 440 deletions.
diff --git a/package.json b/package.json
@@ -1,6 +1,6 @@
 {
     "name": "cohere-ai",
-    "version": "7.6.2",
+    "version": "7.7.0",
     "private": false,
     "repository": "https://github.com/cohere-ai/cohere-typescript",
     "main": "./index.js",

diff --git a/src/Client.ts b/src/Client.ts
diff --git a/src/api/client/requests/ChatRequest.ts b/src/api/client/requests/ChatRequest.ts
@@ -13,7 +13,9 @@ export interface ChatRequest {
     message: string;
     /**
      * Defaults to `command`.
-     * The identifier of the model, which can be one of the existing Cohere models or the full ID for a [finetuned custom model](/docs/training-custom-models).
+     *
+     * The identifier of the model, which can be one of the existing Cohere models or the full ID for a [fine-tuned custom model](https://docs.cohere.com/docs/chat-fine-tuning).
+     *
      * Compatible Cohere models are `command` and `command-light` as well as the experimental `command-nightly` and `command-light-nightly` variants. Read more about [Cohere models](https://docs.cohere.com/docs/models).
      *
      */
@@ -30,26 +32,32 @@ export interface ChatRequest {
     chatHistory?: Cohere.ChatMessage[];
     /**
      * An alternative to `chat_history`. Previous conversations can be resumed by providing the conversation's identifier. The contents of `message` and the model's response will be stored as part of this conversation.
+     *
      * If a conversation with this id does not already exist, a new conversation will be created.
      *
      */
     conversationId?: string;
     /**
      * Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+     *
      * Dictates how the prompt will be constructed.
+     *
      * With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+     *
      * With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
      *
      */
     promptTruncation?: Cohere.ChatRequestPromptTruncation;
     /**
      * Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+     *
      * When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
      *
      */
     connectors?: Cohere.ChatConnector[];
     /**
      * Defaults to `false`.
+     *
      * When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
      *
      */
@@ -61,12 +69,14 @@ export interface ChatRequest {
     documents?: Cohere.ChatDocument[];
     /**
      * Defaults to `"accurate"`.
+     *
      * Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results or `"fast"` results.
      *
      */
     citationQuality?: Cohere.ChatRequestCitationQuality;
     /**
-     * Defaults to `0.3`
+     * Defaults to `0.3`.
+     *
      * A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
      *
      */

diff --git a/src/api/client/requests/ChatStreamRequest.ts b/src/api/client/requests/ChatStreamRequest.ts
@@ -13,7 +13,9 @@ export interface ChatStreamRequest {
     message: string;
     /**
      * Defaults to `command`.
-     * The identifier of the model, which can be one of the existing Cohere models or the full ID for a [finetuned custom model](/docs/training-custom-models).
+     *
+     * The identifier of the model, which can be one of the existing Cohere models or the full ID for a [fine-tuned custom model](https://docs.cohere.com/docs/chat-fine-tuning).
+     *
      * Compatible Cohere models are `command` and `command-light` as well as the experimental `command-nightly` and `command-light-nightly` variants. Read more about [Cohere models](https://docs.cohere.com/docs/models).
      *
      */
@@ -30,26 +32,32 @@ export interface ChatStreamRequest {
     chatHistory?: Cohere.ChatMessage[];
     /**
      * An alternative to `chat_history`. Previous conversations can be resumed by providing the conversation's identifier. The contents of `message` and the model's response will be stored as part of this conversation.
+     *
      * If a conversation with this id does not already exist, a new conversation will be created.
      *
      */
     conversationId?: string;
     /**
      * Defaults to `AUTO` when `connectors` are specified and `OFF` in all other cases.
+     *
      * Dictates how the prompt will be constructed.
+     *
      * With `prompt_truncation` set to "AUTO", some elements from `chat_history` and `documents` will be dropped in an attempt to construct a prompt that fits within the model's context length limit.
+     *
      * With `prompt_truncation` set to "OFF", no elements will be dropped. If the sum of the inputs exceeds the model's context length limit, a `TooManyTokens` error will be returned.
      *
      */
     promptTruncation?: Cohere.ChatStreamRequestPromptTruncation;
     /**
      * Accepts `{"id": "web-search"}`, and/or the `"id"` for a custom [connector](https://docs.cohere.com/docs/connectors), if you've [created](https://docs.cohere.com/docs/creating-and-deploying-a-connector) one.
+     *
      * When specified, the model's reply will be enriched with information found by quering each of the connectors (RAG).
      *
      */
     connectors?: Cohere.ChatConnector[];
     /**
      * Defaults to `false`.
+     *
      * When `true`, the response will only contain a list of generated search queries, but no search will take place, and no reply from the model to the user's `message` will be generated.
      *
      */
@@ -61,12 +69,14 @@ export interface ChatStreamRequest {
     documents?: Cohere.ChatDocument[];
     /**
      * Defaults to `"accurate"`.
+     *
      * Dictates the approach taken to generating citations as part of the RAG flow by allowing the user to specify whether they want `"accurate"` results or `"fast"` results.
      *
      */
     citationQuality?: Cohere.ChatStreamRequestCitationQuality;
     /**
-     * Defaults to `0.3`
+     * Defaults to `0.3`.
+     *
      * A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations, and higher temperatures mean more random generations.
      *
      */

diff --git a/src/api/client/requests/ClassifyRequest.ts b/src/api/client/requests/ClassifyRequest.ts
@@ -5,14 +5,18 @@
 import * as Cohere from "../..";
 
 export interface ClassifyRequest {
-    /** Represents a list of queries to be classified, each entry must not be empty. The maximum is 96 inputs. */
+    /**
+     * A list of up to 96 texts to be classified. Each one must be a non-empty string.
+     * There is, however, no consistent, universal limit to the length a particular input can be. We perform classification on the first `x` tokens of each input, and `x` varies depending on which underlying model is powering classification. The maximum token length for each model is listed in the "max tokens" column [here](https://docs.cohere.com/docs/models).
+     * Note: by default the `truncate` parameter is set to `END`, so tokens exceeding the limit will be automatically dropped. This behavior can be disabled by setting `truncate` to `NONE`, which will result in validation errors for longer texts.
+     */
     inputs: string[];
     /**
      * An array of examples to provide context to the model. Each example is a text string and its associated label/class. Each unique label requires at least 2 examples associated with it; the maximum number of examples is 2500, and each example has a maximum length of 512 tokens. The values should be structured as `{text: "...",label: "..."}`.
-     * Note: [Custom Models](/training-representation-models) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
+     * Note: [Fine-tuned Models](https://docs.cohere.com/docs/classify-fine-tuning) trained on classification examples don't require the `examples` parameter to be passed in explicitly.
      */
     examples: Cohere.ClassifyRequestExamplesItem[];
-    /** The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID. */
+    /** The identifier of the model. Currently available models are `embed-multilingual-v2.0`, `embed-english-light-v2.0`, and `embed-english-v2.0` (default). Smaller "light" models are faster, while larger models will perform better. [Fine-tuned models](https://docs.cohere.com/docs/fine-tuning) can also be supplied with their full ID. */
     model?: string;
     /** The ID of a custom playground preset. You can create presets in the [playground](https://dashboard.cohere.ai/playground/classify?model=large). If you use a preset, all other parameters become optional, and any included parameters will override the preset's parameters. */
     preset?: string;

diff --git a/src/api/client/requests/EmbedRequest.ts b/src/api/client/requests/EmbedRequest.ts
@@ -24,15 +24,7 @@ export interface EmbedRequest {
      * * `embed-multilingual-v2.0`  768
      */
     model?: string;
-    /**
-     * Specifies the type of input you're giving to the model. Not required for older versions of the embedding models (i.e. anything lower than v3), but is required for more recent versions (i.e. anything bigger than v2).
-     *
-     * * `"search_document"`: Use this when you encode documents for embeddings that you store in a vector database for search use-cases.
-     * * `"search_query"`: Use this when you query your vector DB to find relevant documents.
-     * * `"classification"`: Use this when you use the embeddings as an input to a text classifier.
-     * * `"clustering"`: Use this when you want to cluster the embeddings.
-     */
-    inputType?: string;
+    inputType?: Cohere.EmbedInputType;
     /**
      * Specifies the types of embeddings you want to get back. Not required and default is None, which returns the Embed Floats response type. Can be one or more of the following types.
      *

diff --git a/src/api/client/requests/GenerateRequest.ts b/src/api/client/requests/GenerateRequest.ts
@@ -21,17 +21,6 @@ export interface GenerateRequest {
      *
      */
     numGenerations?: number;
-    /**
-     * When `true`, the response will be a JSON stream of events. Streaming is beneficial for user interfaces that render the contents of the response piece by piece, as it gets generated.
-     *
-     * The final event will contain the complete response, and will contain an `is_finished` field set to `true`. The event will also contain a `finish_reason`, which can be one of the following:
-     * - `COMPLETE` - the model sent back a finished reply
-     * - `MAX_TOKENS` - the reply was cut off because the model reached the maximum number of tokens for its context length
-     * - `ERROR` - something went wrong when generating the reply
-     * - `ERROR_TOXIC` - the model generated a reply that was deemed toxic
-     *
-     */
-    stream?: boolean;
     /**
      * The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
      *

diff --git a/src/api/client/requests/GenerateStreamRequest.ts b/src/api/client/requests/GenerateStreamRequest.ts
@@ -0,0 +1,92 @@
+/**
+ * This file was auto-generated by Fern from our API Definition.
+ */
+
+import * as Cohere from "../..";
+
+export interface GenerateStreamRequest {
+    /**
+     * The input text that serves as the starting point for generating the response.
+     * Note: The prompt will be pre-processed and modified before reaching the model.
+     *
+     */
+    prompt: string;
+    /**
+     * The identifier of the model to generate with. Currently available models are `command` (default), `command-nightly` (experimental), `command-light`, and `command-light-nightly` (experimental).
+     * Smaller, "light" models are faster, while larger models will perform better. [Custom models](/docs/training-custom-models) can also be supplied with their full ID.
+     */
+    model?: string;
+    /**
+     * The maximum number of generations that will be returned. Defaults to `1`, min value of `1`, max value of `5`.
+     *
+     */
+    numGenerations?: number;
+    /**
+     * The maximum number of tokens the model will generate as part of the response. Note: Setting a low value may result in incomplete generations.
+     *
+     * This parameter is off by default, and if it's not specified, the model will continue generating until it emits an EOS completion token. See [BPE Tokens](/bpe-tokens-wiki) for more details.
+     *
+     * Can only be set to `0` if `return_likelihoods` is set to `ALL` to get the likelihood of the prompt.
+     *
+     */
+    maxTokens?: number;
+    /**
+     * One of `NONE|START|END` to specify how the API will handle inputs longer than the maximum token length.
+     *
+     * Passing `START` will discard the start of the input. `END` will discard the end of the input. In both cases, input is discarded until the remaining input is exactly the maximum input token length for the model.
+     *
+     * If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
+     */
+    truncate?: Cohere.GenerateStreamRequestTruncate;
+    /**
+     * A non-negative float that tunes the degree of randomness in generation. Lower temperatures mean less random generations. See [Temperature](/temperature-wiki) for more details.
+     * Defaults to `0.75`, min value of `0.0`, max value of `5.0`.
+     *
+     */
+    temperature?: number;
+    /**
+     * Identifier of a custom preset. A preset is a combination of parameters, such as prompt, temperature etc. You can create presets in the [playground](https://dashboard.cohere.ai/playground/generate).
+     * When a preset is specified, the `prompt` parameter becomes optional, and any included parameters will override the preset's parameters.
+     *
+     */
+    preset?: string;
+    /** The generated text will be cut at the beginning of the earliest occurrence of an end sequence. The sequence will be excluded from the text. */
+    endSequences?: string[];
+    /** The generated text will be cut at the end of the earliest occurrence of a stop sequence. The sequence will be included the text. */
+    stopSequences?: string[];
+    /**
+     * Ensures only the top `k` most likely tokens are considered for generation at each step.
+     * Defaults to `0`, min value of `0`, max value of `500`.
+     *
+     */
+    k?: number;
+    /**
+     * Ensures that only the most likely tokens, with total probability mass of `p`, are considered for generation at each step. If both `k` and `p` are enabled, `p` acts after `k`.
+     * Defaults to `0.75`. min value of `0.01`, max value of `0.99`.
+     *
+     */
+    p?: number;
+    /**
+     * Used to reduce repetitiveness of generated tokens. The higher the value, the stronger a penalty is applied to previously present tokens, proportional to how many times they have already appeared in the prompt or prior generation.'
+     *
+     */
+    frequencyPenalty?: number;
+    /** Defaults to `0.0`, min value of `0.0`, max value of `1.0`. Can be used to reduce repetitiveness of generated tokens. Similar to `frequency_penalty`, except that this penalty is applied equally to all tokens that have already appeared, regardless of their exact frequencies. */
+    presencePenalty?: number;
+    /**
+     * One of `GENERATION|ALL|NONE` to specify how and if the token likelihoods are returned with the response. Defaults to `NONE`.
+     *
+     * If `GENERATION` is selected, the token likelihoods will only be provided for generated text.
+     *
+     * If `ALL` is selected, the token likelihoods will be provided both for the prompt and the generated text.
+     */
+    returnLikelihoods?: Cohere.GenerateStreamRequestReturnLikelihoods;
+    /**
+     * Used to prevent the model from generating unwanted tokens or to incentivize it to include desired tokens. The format is `{token_id: bias}` where bias is a float between -10 and 10. Tokens can be obtained from text using [Tokenize](/reference/tokenize).
+     *
+     * For example, if the value `{'11': -10}` is provided, the model will be very unlikely to include the token 11 (`"\n"`, the newline character) anywhere in the generated text. In contrast `{'11': 10}` will result in generations that nearly only contain that token. Values between -10 and 10 will proportionally affect the likelihood of the token appearing in the generated text.
+     *
+     * Note: logit bias may not be supported for all custom models.
+     */
+    logitBias?: Record<string, number>;
+}
diff --git a/src/api/client/requests/index.ts b/src/api/client/requests/index.ts
@@ -1,5 +1,6 @@
 export { ChatStreamRequest } from "./ChatStreamRequest";
 export { ChatRequest } from "./ChatRequest";
+export { GenerateStreamRequest } from "./GenerateStreamRequest";
 export { GenerateRequest } from "./GenerateRequest";
 export { EmbedRequest } from "./EmbedRequest";
 export { RerankRequest } from "./RerankRequest";

diff --git a/src/api/index.ts b/src/api/index.ts
@@ -1,4 +1,4 @@
+export * from "./resources";
 export * from "./types";
 export * from "./errors";
-export * from "./resources";
 export * from "./client";