Skip to content

Commit

Permalink
Merge pull request #58 from vlm-run/sh/add-dataset
Browse files Browse the repository at this point in the history
add datasets endpoint
  • Loading branch information
shahrear33 authored Feb 18, 2025
2 parents ae5ea56 + 720fb8f commit f054afb
Show file tree
Hide file tree
Showing 8 changed files with 499 additions and 48 deletions.
136 changes: 93 additions & 43 deletions package-lock.json

Large diffs are not rendered by default.

5 changes: 3 additions & 2 deletions package.json
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
{
"name": "vlmrun",
"version": "0.2.6",
"version": "0.2.7",
"description": "The official TypeScript library for the VlmRun API",
"author": "VlmRun <[email protected]>",
"main": "dist/index.js",
Expand All @@ -26,9 +26,10 @@
"dependencies": {
"axios": "^1.7.9",
"dotenv": "^16.4.7",
"mime-types": "^2.1.35",
"path": "^0.12.7",
"tar": "^7.4.3",
"zod": "~3.24.2",
"mime-types": "^2.1.35",
"zod-to-json-schema": "~3.24.1"
},
"devDependencies": {
Expand Down
102 changes: 102 additions & 0 deletions src/client/datasets.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,102 @@
import { Client, APIRequestor } from "./base_requestor";
import { DatasetResponse, DatasetCreateParams, DatasetListParams } from "./types";
import { createArchive } from "../utils";
import * as fs from "fs";
import * as path from "path";
import { Files } from "../index";

export class Datasets {
private requestor: APIRequestor;
private files: Files;

constructor(client: Client) {
this.requestor = new APIRequestor({
...client,
baseURL: `${client.baseURL}/datasets`,
});

this.files = new Files(client);
}

/**
* Create a dataset from a directory of files.
*
* @param params Dataset creation parameters.
* @returns The dataset creation response.
*/
async create(params: DatasetCreateParams): Promise<DatasetResponse> {
const validTypes = ["images", "videos", "documents"];
if (!validTypes.includes(params.datasetType)) {
throw new Error("dataset_type must be one of: images, videos, documents");
}

// Create tar.gz archive of the dataset directory.
const tarPath = await createArchive(params.datasetDirectory, params.datasetName);
const tarSizeMB = (fs.statSync(tarPath).size / 1024 / 1024).toFixed(2);
console.debug(`Created tar.gz file [path=${tarPath}, size=${tarSizeMB} MB]`);

// Upload the tar.gz file using the client's file upload method.
const fileResponse = await this.files.upload({
filePath: tarPath,
purpose: "datasets",
});
const fileSizeMB = (fileResponse.bytes / 1024 / 1024).toFixed(2);
console.debug(
`Uploaded tar.gz file [path=${tarPath}, file_id=${fileResponse.id}, size=${fileSizeMB} MB]`
);

// Create the dataset by sending a POST request.
const [response] = await this.requestor.request<DatasetResponse>(
"POST",
"create",
undefined, // No query parameters
{
file_id: fileResponse.id,
domain: params.domain,
dataset_name: params.datasetName,
dataset_type: params.datasetType,
wandb_base_url: params.wandbBaseUrl,
wandb_project_name: params.wandbProjectName,
wandb_api_key: params.wandbApiKey,
}
);

return response;
}

/**
* Get dataset information by its ID.
*
* @param datasetId The ID of the dataset to retrieve.
* @returns The dataset information.
*/
async get(datasetId: string): Promise<DatasetResponse> {
const [response] = await this.requestor.request<DatasetResponse>(
"GET",
datasetId
);
return response;
}

/**
* List all datasets with pagination support.
*
* @param skip Number of datasets to skip.
* @param limit Maximum number of datasets to return.
* @returns A list of dataset responses.
*/
async list(params?: DatasetListParams): Promise<DatasetResponse[]> {
const [items] = await this.requestor.request<DatasetResponse[]>(
"GET",
"",
{
skip: params?.skip ?? 0,
limit: params?.limit ?? 10,
}
);
if (!Array.isArray(items)) {
throw new Error("Expected array response");
}
return items;
}
}
2 changes: 1 addition & 1 deletion src/client/fine_tuning.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { Client, APIRequestor } from "./base_requestor";
import { FinetuningResponse, FinetuningProvisionResponse, FinetuningGenerateParams, FinetuningListParams, PredictionResponse, FinetuningCreateParams, FinetuningProvisionParams } from "./types";
import { encodeImage, processImage } from "../utils";
import { processImage } from "../utils";

export class Finetuning {
private requestor: APIRequestor;
Expand Down
30 changes: 28 additions & 2 deletions src/client/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -247,6 +247,34 @@ export interface FinetuningListParams {
limit?: number;
}

export interface DatasetListParams {
skip?: number;
limit?: number;
}

export interface DatasetResponse {
id: string;
created_at: string;
completed_at?: string;
status: JobStatus;
domain: string;
dataset_name: string;
dataset_type: "images" | "videos" | "documents";
file_id: string;
wandb_url?: string;
message?: string;
}

export interface DatasetCreateParams {
datasetDirectory: string;
domain: string;
datasetName: string;
datasetType: "images" | "videos" | "documents";
wandbBaseUrl?: string;
wandbProjectName?: string;
wandbApiKey?: string;
}

export class APIError extends Error {
constructor(
message: string,
Expand All @@ -263,5 +291,3 @@ export interface VlmRunError extends Error {
code?: string;
cause?: Error;
}


3 changes: 3 additions & 0 deletions src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ import {
} from "./client/predictions";
import { Feedback } from "./client/feedback";
import { Finetuning } from "./client/fine_tuning";
import { Datasets } from "./client/datasets";

export * from "./client/types";
export * from "./client/base_requestor";
Expand Down Expand Up @@ -39,6 +40,7 @@ export class VlmRun {
readonly web: WebPredictions;
readonly feedback: Feedback;
readonly finetuning: Finetuning;
readonly dataset: Datasets;

constructor(config: VlmRunConfig) {
this.client = {
Expand All @@ -56,5 +58,6 @@ export class VlmRun {
this.web = new WebPredictions(this.client);
this.feedback = new Feedback(this.client);
this.finetuning = new Finetuning(this.client);
this.dataset = new Datasets(this.client)
}
}
31 changes: 31 additions & 0 deletions src/utils/file.ts
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,34 @@ export const readFileFromPathAsFile = async (filePath: string): Promise<File> =>
throw new Error(`Error reading file at ${filePath}: ${error.message}`);
}
};

export const createArchive = async (directory: string, archiveName: string): Promise<string> => {
try {
if (typeof window === 'undefined') {
const fs = require('fs');
const path = require('path');
const os = require('os');
const tar = require('tar');

const tarPath = path.join(os.tmpdir(), `${archiveName}.tar.gz`);

const files = fs.readdirSync(directory);

await tar.create(
{
gzip: true,
file: tarPath,
cwd: directory,
},
files
);

return tarPath;
} else {
throw new Error("createArchive is not supported in a browser environment.");
}
} catch (error: any) {
throw new Error(`Error creating archive for ${directory}: ${error.message}`);
}
};

Loading

0 comments on commit f054afb

Please sign in to comment.