-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #58 from vlm-run/sh/add-dataset
add datasets endpoint
- Loading branch information
Showing
8 changed files
with
499 additions
and
48 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,6 @@ | ||
{ | ||
"name": "vlmrun", | ||
"version": "0.2.6", | ||
"version": "0.2.7", | ||
"description": "The official TypeScript library for the VlmRun API", | ||
"author": "VlmRun <[email protected]>", | ||
"main": "dist/index.js", | ||
|
@@ -26,9 +26,10 @@ | |
"dependencies": { | ||
"axios": "^1.7.9", | ||
"dotenv": "^16.4.7", | ||
"mime-types": "^2.1.35", | ||
"path": "^0.12.7", | ||
"tar": "^7.4.3", | ||
"zod": "~3.24.2", | ||
"mime-types": "^2.1.35", | ||
"zod-to-json-schema": "~3.24.1" | ||
}, | ||
"devDependencies": { | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,102 @@ | ||
import { Client, APIRequestor } from "./base_requestor"; | ||
import { DatasetResponse, DatasetCreateParams, DatasetListParams } from "./types"; | ||
import { createArchive } from "../utils"; | ||
import * as fs from "fs"; | ||
import * as path from "path"; | ||
import { Files } from "../index"; | ||
|
||
export class Datasets { | ||
private requestor: APIRequestor; | ||
private files: Files; | ||
|
||
constructor(client: Client) { | ||
this.requestor = new APIRequestor({ | ||
...client, | ||
baseURL: `${client.baseURL}/datasets`, | ||
}); | ||
|
||
this.files = new Files(client); | ||
} | ||
|
||
/** | ||
* Create a dataset from a directory of files. | ||
* | ||
* @param params Dataset creation parameters. | ||
* @returns The dataset creation response. | ||
*/ | ||
async create(params: DatasetCreateParams): Promise<DatasetResponse> { | ||
const validTypes = ["images", "videos", "documents"]; | ||
if (!validTypes.includes(params.datasetType)) { | ||
throw new Error("dataset_type must be one of: images, videos, documents"); | ||
} | ||
|
||
// Create tar.gz archive of the dataset directory. | ||
const tarPath = await createArchive(params.datasetDirectory, params.datasetName); | ||
const tarSizeMB = (fs.statSync(tarPath).size / 1024 / 1024).toFixed(2); | ||
console.debug(`Created tar.gz file [path=${tarPath}, size=${tarSizeMB} MB]`); | ||
|
||
// Upload the tar.gz file using the client's file upload method. | ||
const fileResponse = await this.files.upload({ | ||
filePath: tarPath, | ||
purpose: "datasets", | ||
}); | ||
const fileSizeMB = (fileResponse.bytes / 1024 / 1024).toFixed(2); | ||
console.debug( | ||
`Uploaded tar.gz file [path=${tarPath}, file_id=${fileResponse.id}, size=${fileSizeMB} MB]` | ||
); | ||
|
||
// Create the dataset by sending a POST request. | ||
const [response] = await this.requestor.request<DatasetResponse>( | ||
"POST", | ||
"create", | ||
undefined, // No query parameters | ||
{ | ||
file_id: fileResponse.id, | ||
domain: params.domain, | ||
dataset_name: params.datasetName, | ||
dataset_type: params.datasetType, | ||
wandb_base_url: params.wandbBaseUrl, | ||
wandb_project_name: params.wandbProjectName, | ||
wandb_api_key: params.wandbApiKey, | ||
} | ||
); | ||
|
||
return response; | ||
} | ||
|
||
/** | ||
* Get dataset information by its ID. | ||
* | ||
* @param datasetId The ID of the dataset to retrieve. | ||
* @returns The dataset information. | ||
*/ | ||
async get(datasetId: string): Promise<DatasetResponse> { | ||
const [response] = await this.requestor.request<DatasetResponse>( | ||
"GET", | ||
datasetId | ||
); | ||
return response; | ||
} | ||
|
||
/** | ||
* List all datasets with pagination support. | ||
* | ||
* @param skip Number of datasets to skip. | ||
* @param limit Maximum number of datasets to return. | ||
* @returns A list of dataset responses. | ||
*/ | ||
async list(params?: DatasetListParams): Promise<DatasetResponse[]> { | ||
const [items] = await this.requestor.request<DatasetResponse[]>( | ||
"GET", | ||
"", | ||
{ | ||
skip: params?.skip ?? 0, | ||
limit: params?.limit ?? 10, | ||
} | ||
); | ||
if (!Array.isArray(items)) { | ||
throw new Error("Expected array response"); | ||
} | ||
return items; | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.