From 720fb8f631198f14182a175e981e66982f1b5662 Mon Sep 17 00:00:00 2001 From: shahrear33 Date: Tue, 18 Feb 2025 20:32:07 +0600 Subject: [PATCH] feat: add datasets endpoint --- package-lock.json | 136 +++++++++++------ package.json | 5 +- src/client/datasets.ts | 102 +++++++++++++ src/client/fine_tuning.ts | 2 +- src/client/types.ts | 30 +++- src/index.ts | 3 + src/utils/file.ts | 31 ++++ tests/unit/client/datasets.test.ts | 238 +++++++++++++++++++++++++++++ 8 files changed, 499 insertions(+), 48 deletions(-) create mode 100644 src/client/datasets.ts create mode 100644 tests/unit/client/datasets.test.ts diff --git a/package-lock.json b/package-lock.json index 0b993d9..46c0266 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,18 +1,19 @@ { "name": "vlmrun", - "version": "0.2.5", + "version": "0.2.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "vlmrun", - "version": "0.2.5", + "version": "0.2.6", "license": "Apache-2.0", "dependencies": { "axios": "^1.7.9", "dotenv": "^16.4.7", "mime-types": "^2.1.35", "path": "^0.12.7", + "tar": "^7.4.3", "zod": "~3.24.2", "zod-to-json-schema": "~3.24.1" }, @@ -928,7 +929,6 @@ "version": "8.0.2", "resolved": "https://registry.npmjs.org/@isaacs/cliui/-/cliui-8.0.2.tgz", "integrity": "sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==", - "dev": true, "license": "ISC", "dependencies": { "string-width": "^5.1.2", @@ -942,6 +942,18 @@ "node": ">=12" } }, + "node_modules/@isaacs/fs-minipass": { + "version": "4.0.1", + "resolved": "https://registry.npmjs.org/@isaacs/fs-minipass/-/fs-minipass-4.0.1.tgz", + "integrity": "sha512-wgm9Ehl2jpeqP3zw/7mo3kRHFp5MEDhqAdwy1fTGkHAwnkGOVsgpvQhL8B5n1qlb01jV3n/bI0ZfZp5lWA1k4w==", + "license": "ISC", + "dependencies": { + "minipass": "^7.0.4" + }, + "engines": { + "node": ">=18.0.0" + } + }, "node_modules/@istanbuljs/load-nyc-config": { "version": "1.1.0", "resolved": "https://registry.npmjs.org/@istanbuljs/load-nyc-config/-/load-nyc-config-1.1.0.tgz", @@ -1387,7 +1399,6 @@ "version": "0.11.0", "resolved": "https://registry.npmjs.org/@pkgjs/parseargs/-/parseargs-0.11.0.tgz", "integrity": "sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==", - "dev": true, "license": "MIT", "optional": true, "engines": { @@ -1825,7 +1836,6 @@ "version": "6.1.0", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-6.1.0.tgz", "integrity": "sha512-7HSX4QQb4CspciLpVFwyRe79O3xsIZDDLER21kERQ71oaPodF8jL725AgJMFAYbooIqolJoRLuM81SpeUkpkvA==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -1838,7 +1848,6 @@ "version": "6.2.1", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-6.2.1.tgz", "integrity": "sha512-bN798gFfQX+viw3R7yrGWRqnrN2oRkEkUjjl4JNn4E8GxxbjtG3FbrEIIY3l8/hrwUwIeCZvi4QuOTP4MErVug==", - "dev": true, "license": "MIT", "engines": { "node": ">=12" @@ -2025,14 +2034,12 @@ "version": "1.0.2", "resolved": "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz", "integrity": "sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw==", - "dev": true, "license": "MIT" }, "node_modules/brace-expansion": { "version": "2.0.1", "resolved": "https://registry.npmjs.org/brace-expansion/-/brace-expansion-2.0.1.tgz", "integrity": "sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA==", - "dev": true, "license": "MIT", "dependencies": { "balanced-match": "^1.0.0" @@ -2229,6 +2236,15 @@ "url": "https://paulmillr.com/funding/" } }, + "node_modules/chownr": { + "version": "3.0.0", + "resolved": "https://registry.npmjs.org/chownr/-/chownr-3.0.0.tgz", + "integrity": "sha512-+IxzY9BZOQd/XuYPRmrvEVjF/nqj5kgT4kEq7VofrDoM1MxoRjEWkrCC3EtLi59TVawxTAn+orJwFQcrqEN1+g==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, "node_modules/ci-info": { "version": "3.9.0", "resolved": "https://registry.npmjs.org/ci-info/-/ci-info-3.9.0.tgz", @@ -2357,7 +2373,6 @@ "version": "2.0.1", "resolved": "https://registry.npmjs.org/color-convert/-/color-convert-2.0.1.tgz", "integrity": "sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==", - "dev": true, "license": "MIT", "dependencies": { "color-name": "~1.1.4" @@ -2370,7 +2385,6 @@ "version": "1.1.4", "resolved": "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz", "integrity": "sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==", - "dev": true, "license": "MIT" }, "node_modules/combined-stream": { @@ -2442,7 +2456,6 @@ "version": "7.0.6", "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", - "dev": true, "license": "MIT", "dependencies": { "path-key": "^3.1.0", @@ -2537,7 +2550,6 @@ "version": "0.2.0", "resolved": "https://registry.npmjs.org/eastasianwidth/-/eastasianwidth-0.2.0.tgz", "integrity": "sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==", - "dev": true, "license": "MIT" }, "node_modules/ejs": { @@ -2577,7 +2589,6 @@ "version": "9.2.2", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-9.2.2.tgz", "integrity": "sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==", - "dev": true, "license": "MIT" }, "node_modules/error-ex": { @@ -2815,7 +2826,6 @@ "version": "3.3.0", "resolved": "https://registry.npmjs.org/foreground-child/-/foreground-child-3.3.0.tgz", "integrity": "sha512-Ld2g8rrAyMYFXBhEqMz8ZAHBi4J4uS1i/CxGMDnjyFWddMXLVcDp051DZfu+t7+ab7Wv6SMqpWmyFIj5UbfFvg==", - "dev": true, "license": "ISC", "dependencies": { "cross-spawn": "^7.0.0", @@ -2915,7 +2925,6 @@ "version": "10.4.5", "resolved": "https://registry.npmjs.org/glob/-/glob-10.4.5.tgz", "integrity": "sha512-7Bv8RF0k6xjo7d4A/PxYLbUCfb6c+Vpd2/mB2yRDlew7Jb5hEXiCD9ibfO7wpk8i4sevK6DFny9h7EYbM3/sHg==", - "dev": true, "license": "ISC", "dependencies": { "foreground-child": "^3.1.0", @@ -3053,7 +3062,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/is-fullwidth-code-point/-/is-fullwidth-code-point-3.0.0.tgz", "integrity": "sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -3093,7 +3101,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/isexe/-/isexe-2.0.0.tgz", "integrity": "sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==", - "dev": true, "license": "ISC" }, "node_modules/istanbul-lib-coverage": { @@ -3187,7 +3194,6 @@ "version": "3.4.3", "resolved": "https://registry.npmjs.org/jackspeak/-/jackspeak-3.4.3.tgz", "integrity": "sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==", - "dev": true, "license": "BlueOak-1.0.0", "dependencies": { "@isaacs/cliui": "^8.0.2" @@ -4040,7 +4046,6 @@ "version": "10.4.3", "resolved": "https://registry.npmjs.org/lru-cache/-/lru-cache-10.4.3.tgz", "integrity": "sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==", - "dev": true, "license": "ISC" }, "node_modules/make-dir": { @@ -4150,7 +4155,6 @@ "version": "9.0.5", "resolved": "https://registry.npmjs.org/minimatch/-/minimatch-9.0.5.tgz", "integrity": "sha512-G6T0ZX48xgozx7587koeX9Ys2NYy6Gmv//P89sEte9V9whIapMNF4idKxnW2QtCcLiTWlb/wfCabAtAFWhhBow==", - "dev": true, "license": "ISC", "dependencies": { "brace-expansion": "^2.0.1" @@ -4166,12 +4170,39 @@ "version": "7.1.2", "resolved": "https://registry.npmjs.org/minipass/-/minipass-7.1.2.tgz", "integrity": "sha512-qOOzS1cBTWYF4BH8fVePDBOO9iptMnGUEZwNc/cMWnTV2nVLZ7VoNWEPHkYczZA0pdoA7dl6e7FL659nX9S2aw==", - "dev": true, "license": "ISC", "engines": { "node": ">=16 || 14 >=14.17" } }, + "node_modules/minizlib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/minizlib/-/minizlib-3.0.1.tgz", + "integrity": "sha512-umcy022ILvb5/3Djuu8LWeqUa8D68JaBzlttKeMWen48SjabqS3iY5w/vzeMzMUNhLDifyhbOwKDSznB1vvrwg==", + "license": "MIT", + "dependencies": { + "minipass": "^7.0.4", + "rimraf": "^5.0.5" + }, + "engines": { + "node": ">= 18" + } + }, + "node_modules/mkdirp": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/mkdirp/-/mkdirp-3.0.1.tgz", + "integrity": "sha512-+NsyUUAZDmo6YVHzL/stxSu3t9YS1iljliy3BSDrXJ/dkn1KYdmtZODGGjLcc9XLgVVpH4KshHB8XmZgMhaBXg==", + "license": "MIT", + "bin": { + "mkdirp": "dist/cjs/src/bin.js" + }, + "engines": { + "node": ">=10" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/ms": { "version": "2.1.3", "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", @@ -4319,7 +4350,6 @@ "version": "1.0.1", "resolved": "https://registry.npmjs.org/package-json-from-dist/-/package-json-from-dist-1.0.1.tgz", "integrity": "sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==", - "dev": true, "license": "BlueOak-1.0.0" }, "node_modules/parse-json": { @@ -4372,7 +4402,6 @@ "version": "3.1.1", "resolved": "https://registry.npmjs.org/path-key/-/path-key-3.1.1.tgz", "integrity": "sha512-ojmeN0qd+y0jszEtoY48r0Peq5dwMEkIlCOu6Q5f41lfkswXuKtYrhgoTpLnyIcHm24Uhqx+5Tqm2InSwLhE6Q==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -4388,7 +4417,6 @@ "version": "1.11.1", "resolved": "https://registry.npmjs.org/path-scurry/-/path-scurry-1.11.1.tgz", "integrity": "sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==", - "dev": true, "license": "BlueOak-1.0.0", "dependencies": { "lru-cache": "^10.2.0", @@ -4646,6 +4674,21 @@ "node": ">=10" } }, + "node_modules/rimraf": { + "version": "5.0.10", + "resolved": "https://registry.npmjs.org/rimraf/-/rimraf-5.0.10.tgz", + "integrity": "sha512-l0OE8wL34P4nJH/H2ffoaniAokM2qSmrtXHmlpvYr5AVVX8msAyW0l8NVJFDxlSK4u3Uh/f41cQheDVdnYijwQ==", + "license": "ISC", + "dependencies": { + "glob": "^10.3.7" + }, + "bin": { + "rimraf": "dist/esm/bin.mjs" + }, + "funding": { + "url": "https://github.com/sponsors/isaacs" + } + }, "node_modules/rollup": { "version": "4.34.0", "resolved": "https://registry.npmjs.org/rollup/-/rollup-4.34.0.tgz", @@ -4698,7 +4741,6 @@ "version": "2.0.0", "resolved": "https://registry.npmjs.org/shebang-command/-/shebang-command-2.0.0.tgz", "integrity": "sha512-kHxr2zZpYtdmrN1qDjrrX/Z1rR1kG8Dx+gkpK1G4eXmvXswmcE1hTWBWYUzlraYw1/yZp6YuDY77YtvbN0dmDA==", - "dev": true, "license": "MIT", "dependencies": { "shebang-regex": "^3.0.0" @@ -4711,7 +4753,6 @@ "version": "3.0.0", "resolved": "https://registry.npmjs.org/shebang-regex/-/shebang-regex-3.0.0.tgz", "integrity": "sha512-7++dFhtcx3353uBaq8DDR4NuxBetBzC7ZQOhmTQInHEd6bSrXdiEyzCvG07Z44UYdLShWUyXt5M/yhz8ekcb1A==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -4721,7 +4762,6 @@ "version": "4.1.0", "resolved": "https://registry.npmjs.org/signal-exit/-/signal-exit-4.1.0.tgz", "integrity": "sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==", - "dev": true, "license": "ISC", "engines": { "node": ">=14" @@ -4833,7 +4873,6 @@ "version": "5.1.2", "resolved": "https://registry.npmjs.org/string-width/-/string-width-5.1.2.tgz", "integrity": "sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==", - "dev": true, "license": "MIT", "dependencies": { "eastasianwidth": "^0.2.0", @@ -4852,7 +4891,6 @@ "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, "license": "MIT", "dependencies": { "emoji-regex": "^8.0.0", @@ -4867,7 +4905,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -4877,14 +4914,12 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, "license": "MIT" }, "node_modules/string-width-cjs/node_modules/strip-ansi": { "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" @@ -4897,7 +4932,6 @@ "version": "7.1.0", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-7.1.0.tgz", "integrity": "sha512-iq6eVVI64nQQTRYq2KtEg2d2uU7LElhTJwsH4YzIHZshxlgZms/wIc4VoDQTlG/IvVIrBKG06CrZnp0qv7hkcQ==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^6.0.1" @@ -4914,7 +4948,6 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" @@ -4927,7 +4960,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -5010,6 +5042,32 @@ "url": "https://github.com/sponsors/ljharb" } }, + "node_modules/tar": { + "version": "7.4.3", + "resolved": "https://registry.npmjs.org/tar/-/tar-7.4.3.tgz", + "integrity": "sha512-5S7Va8hKfV7W5U6g3aYxXmlPoZVAwUMy9AOKyF2fVuZa2UD3qZjg578OrLRt8PcNN1PleVaL/5/yYATNL0ICUw==", + "license": "ISC", + "dependencies": { + "@isaacs/fs-minipass": "^4.0.0", + "chownr": "^3.0.0", + "minipass": "^7.1.2", + "minizlib": "^3.0.1", + "mkdirp": "^3.0.1", + "yallist": "^5.0.0" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/tar/node_modules/yallist": { + "version": "5.0.0", + "resolved": "https://registry.npmjs.org/yallist/-/yallist-5.0.0.tgz", + "integrity": "sha512-YgvUTfwqyc7UXVMrB+SImsVYSmTS8X/tSrtdNZMImM+n7+QTriRXyXim0mBrTXNeqzVF0KWGgHPeiyViFFrNDw==", + "license": "BlueOak-1.0.0", + "engines": { + "node": ">=18" + } + }, "node_modules/test-exclude": { "version": "6.0.0", "resolved": "https://registry.npmjs.org/test-exclude/-/test-exclude-6.0.0.tgz", @@ -5395,7 +5453,6 @@ "version": "2.0.2", "resolved": "https://registry.npmjs.org/which/-/which-2.0.2.tgz", "integrity": "sha512-BLI3Tl1TW3Pvl70l3yq3Y64i+awpwXqsGBYWkkqMtnbXgrMD+yj7rhW0kuEDxzJaYXGjEW5ogapKNMEKNMjibA==", - "dev": true, "license": "ISC", "dependencies": { "isexe": "^2.0.0" @@ -5411,7 +5468,6 @@ "version": "8.1.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-8.1.0.tgz", "integrity": "sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==", - "dev": true, "license": "MIT", "dependencies": { "ansi-styles": "^6.1.0", @@ -5430,7 +5486,6 @@ "version": "7.0.0", "resolved": "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz", "integrity": "sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==", - "dev": true, "license": "MIT", "dependencies": { "ansi-styles": "^4.0.0", @@ -5448,7 +5503,6 @@ "version": "5.0.1", "resolved": "https://registry.npmjs.org/ansi-regex/-/ansi-regex-5.0.1.tgz", "integrity": "sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==", - "dev": true, "license": "MIT", "engines": { "node": ">=8" @@ -5458,7 +5512,6 @@ "version": "4.3.0", "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-4.3.0.tgz", "integrity": "sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==", - "dev": true, "license": "MIT", "dependencies": { "color-convert": "^2.0.1" @@ -5474,14 +5527,12 @@ "version": "8.0.0", "resolved": "https://registry.npmjs.org/emoji-regex/-/emoji-regex-8.0.0.tgz", "integrity": "sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==", - "dev": true, "license": "MIT" }, "node_modules/wrap-ansi-cjs/node_modules/string-width": { "version": "4.2.3", "resolved": "https://registry.npmjs.org/string-width/-/string-width-4.2.3.tgz", "integrity": "sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==", - "dev": true, "license": "MIT", "dependencies": { "emoji-regex": "^8.0.0", @@ -5496,7 +5547,6 @@ "version": "6.0.1", "resolved": "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz", "integrity": "sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==", - "dev": true, "license": "MIT", "dependencies": { "ansi-regex": "^5.0.1" diff --git a/package.json b/package.json index 597b72f..a2e1628 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "vlmrun", - "version": "0.2.6", + "version": "0.2.7", "description": "The official TypeScript library for the VlmRun API", "author": "VlmRun ", "main": "dist/index.js", @@ -26,9 +26,10 @@ "dependencies": { "axios": "^1.7.9", "dotenv": "^16.4.7", + "mime-types": "^2.1.35", "path": "^0.12.7", + "tar": "^7.4.3", "zod": "~3.24.2", - "mime-types": "^2.1.35", "zod-to-json-schema": "~3.24.1" }, "devDependencies": { diff --git a/src/client/datasets.ts b/src/client/datasets.ts new file mode 100644 index 0000000..63c5c33 --- /dev/null +++ b/src/client/datasets.ts @@ -0,0 +1,102 @@ +import { Client, APIRequestor } from "./base_requestor"; +import { DatasetResponse, DatasetCreateParams, DatasetListParams } from "./types"; +import { createArchive } from "../utils"; +import * as fs from "fs"; +import * as path from "path"; +import { Files } from "../index"; + +export class Datasets { + private requestor: APIRequestor; + private files: Files; + + constructor(client: Client) { + this.requestor = new APIRequestor({ + ...client, + baseURL: `${client.baseURL}/datasets`, + }); + + this.files = new Files(client); + } + + /** + * Create a dataset from a directory of files. + * + * @param params Dataset creation parameters. + * @returns The dataset creation response. + */ + async create(params: DatasetCreateParams): Promise { + const validTypes = ["images", "videos", "documents"]; + if (!validTypes.includes(params.datasetType)) { + throw new Error("dataset_type must be one of: images, videos, documents"); + } + + // Create tar.gz archive of the dataset directory. + const tarPath = await createArchive(params.datasetDirectory, params.datasetName); + const tarSizeMB = (fs.statSync(tarPath).size / 1024 / 1024).toFixed(2); + console.debug(`Created tar.gz file [path=${tarPath}, size=${tarSizeMB} MB]`); + + // Upload the tar.gz file using the client's file upload method. + const fileResponse = await this.files.upload({ + filePath: tarPath, + purpose: "datasets", + }); + const fileSizeMB = (fileResponse.bytes / 1024 / 1024).toFixed(2); + console.debug( + `Uploaded tar.gz file [path=${tarPath}, file_id=${fileResponse.id}, size=${fileSizeMB} MB]` + ); + + // Create the dataset by sending a POST request. + const [response] = await this.requestor.request( + "POST", + "create", + undefined, // No query parameters + { + file_id: fileResponse.id, + domain: params.domain, + dataset_name: params.datasetName, + dataset_type: params.datasetType, + wandb_base_url: params.wandbBaseUrl, + wandb_project_name: params.wandbProjectName, + wandb_api_key: params.wandbApiKey, + } + ); + + return response; + } + + /** + * Get dataset information by its ID. + * + * @param datasetId The ID of the dataset to retrieve. + * @returns The dataset information. + */ + async get(datasetId: string): Promise { + const [response] = await this.requestor.request( + "GET", + datasetId + ); + return response; + } + + /** + * List all datasets with pagination support. + * + * @param skip Number of datasets to skip. + * @param limit Maximum number of datasets to return. + * @returns A list of dataset responses. + */ + async list(params?: DatasetListParams): Promise { + const [items] = await this.requestor.request( + "GET", + "", + { + skip: params?.skip ?? 0, + limit: params?.limit ?? 10, + } + ); + if (!Array.isArray(items)) { + throw new Error("Expected array response"); + } + return items; + } +} diff --git a/src/client/fine_tuning.ts b/src/client/fine_tuning.ts index 95bd5e6..a7be784 100644 --- a/src/client/fine_tuning.ts +++ b/src/client/fine_tuning.ts @@ -1,6 +1,6 @@ import { Client, APIRequestor } from "./base_requestor"; import { FinetuningResponse, FinetuningProvisionResponse, FinetuningGenerateParams, FinetuningListParams, PredictionResponse, FinetuningCreateParams, FinetuningProvisionParams } from "./types"; -import { encodeImage, processImage } from "../utils"; +import { processImage } from "../utils"; export class Finetuning { private requestor: APIRequestor; diff --git a/src/client/types.ts b/src/client/types.ts index 2773f0b..0a2dcb2 100644 --- a/src/client/types.ts +++ b/src/client/types.ts @@ -247,6 +247,34 @@ export interface FinetuningListParams { limit?: number; } +export interface DatasetListParams { + skip?: number; + limit?: number; +} + +export interface DatasetResponse { + id: string; + created_at: string; + completed_at?: string; + status: JobStatus; + domain: string; + dataset_name: string; + dataset_type: "images" | "videos" | "documents"; + file_id: string; + wandb_url?: string; + message?: string; +} + +export interface DatasetCreateParams { + datasetDirectory: string; + domain: string; + datasetName: string; + datasetType: "images" | "videos" | "documents"; + wandbBaseUrl?: string; + wandbProjectName?: string; + wandbApiKey?: string; +} + export class APIError extends Error { constructor( message: string, @@ -263,5 +291,3 @@ export interface VlmRunError extends Error { code?: string; cause?: Error; } - - diff --git a/src/index.ts b/src/index.ts index 608a46f..323d0be 100644 --- a/src/index.ts +++ b/src/index.ts @@ -11,6 +11,7 @@ import { } from "./client/predictions"; import { Feedback } from "./client/feedback"; import { Finetuning } from "./client/fine_tuning"; +import { Datasets } from "./client/datasets"; export * from "./client/types"; export * from "./client/base_requestor"; @@ -39,6 +40,7 @@ export class VlmRun { readonly web: WebPredictions; readonly feedback: Feedback; readonly finetuning: Finetuning; + readonly dataset: Datasets; constructor(config: VlmRunConfig) { this.client = { @@ -56,5 +58,6 @@ export class VlmRun { this.web = new WebPredictions(this.client); this.feedback = new Feedback(this.client); this.finetuning = new Finetuning(this.client); + this.dataset = new Datasets(this.client) } } diff --git a/src/utils/file.ts b/src/utils/file.ts index daa451a..0bb88a3 100644 --- a/src/utils/file.ts +++ b/src/utils/file.ts @@ -17,3 +17,34 @@ export const readFileFromPathAsFile = async (filePath: string): Promise => throw new Error(`Error reading file at ${filePath}: ${error.message}`); } }; + +export const createArchive = async (directory: string, archiveName: string): Promise => { + try { + if (typeof window === 'undefined') { + const fs = require('fs'); + const path = require('path'); + const os = require('os'); + const tar = require('tar'); + + const tarPath = path.join(os.tmpdir(), `${archiveName}.tar.gz`); + + const files = fs.readdirSync(directory); + + await tar.create( + { + gzip: true, + file: tarPath, + cwd: directory, + }, + files + ); + + return tarPath; + } else { + throw new Error("createArchive is not supported in a browser environment."); + } + } catch (error: any) { + throw new Error(`Error creating archive for ${directory}: ${error.message}`); + } + }; + \ No newline at end of file diff --git a/tests/unit/client/datasets.test.ts b/tests/unit/client/datasets.test.ts new file mode 100644 index 0000000..ef9cfc0 --- /dev/null +++ b/tests/unit/client/datasets.test.ts @@ -0,0 +1,238 @@ +import { Client } from '../../../src/client/base_requestor'; +import { Datasets } from '../../../src/client/datasets'; +import { DatasetResponse, FileResponse } from '../../../src/client/types'; +import { Files } from '../../../src/client/files'; + +jest.mock('../../../src/client/base_requestor'); +jest.mock('../../../src/client/files'); +jest.mock('../../../src/utils/file', () => ({ + createArchive: jest.fn().mockResolvedValue('/tmp/test-dataset.tar.gz'), +})); + +jest.mock('fs', () => ({ + ...jest.requireActual('fs'), + statSync: jest.fn().mockReturnValue({ size: 1024 * 1024 }), // 1 MB mock file +})); + +jest.mock('../../../src/client/files', () => ({ + Files: jest.fn().mockImplementation(() => ({ + upload: jest.fn().mockResolvedValue({ + id: 'file_123', + filename: 'test-dataset.tar.gz', + bytes: 1024 * 1024, // Mocking a 1 MB file + purpose: 'datasets', + created_at: new Date().toISOString(), + object: 'file', + }), + })), +})); + + +describe('Datasets', () => { + let client: jest.Mocked; + let datasets: Datasets; + let mockFiles: jest.Mocked; + + beforeEach(() => { + client = { + apiKey: 'test-api-key', + baseURL: 'https://api.example.com', + } as jest.Mocked; + + mockFiles = { + upload: jest.fn(), + } as unknown as jest.Mocked; + + datasets = new Datasets(client); + // @ts-ignore - Accessing private property for testing + datasets.vlmClient = { files: mockFiles }; + }); + + describe('create', () => { + it('should create a dataset with minimal parameters', async () => { + const mockFileResponse: FileResponse = { + id: 'file_123', + filename: 'test-dataset.tar.gz', + bytes: 1024, + purpose: 'datasets', + created_at: new Date().toISOString(), + object: 'file', + }; + + const mockDatasetResponse: DatasetResponse = { + id: 'ds_123', + status: 'running', + domain: 'test-domain', + dataset_name: 'test-dataset', + dataset_type: 'images', + file_id: 'file_123', + created_at: new Date().toISOString(), + }; + + mockFiles.upload.mockResolvedValue(mockFileResponse); + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([mockDatasetResponse, 200, {}]); + + const result = await datasets.create({ + datasetDirectory: '/path/to/dataset', + domain: 'test-domain', + datasetName: 'test-dataset', + datasetType: 'images', + }); + + expect(result).toEqual(mockDatasetResponse); + expect(datasets['requestor'].request).toHaveBeenCalledWith( + 'POST', + 'create', + undefined, + { + file_id: 'file_123', + domain: 'test-domain', + dataset_name: 'test-dataset', + dataset_type: 'images', + wandb_base_url: undefined, + wandb_project_name: undefined, + wandb_api_key: undefined, + } + ); + }); + + it('should create a dataset with minimal parameters', async () => { + mockFiles.upload.mockResolvedValueOnce({ + id: 'file_123', + filename: 'test-dataset.tar.gz', + bytes: 1024 * 1024, // Ensure 'bytes' field is present + purpose: 'datasets', + created_at: new Date().toISOString(), + object: 'file', + }); + + const mockDatasetResponse: DatasetResponse = { + id: 'ds_123', + status: 'running', + domain: 'test-domain', + dataset_name: 'test-dataset', + dataset_type: 'images', + file_id: 'file_123', + created_at: new Date().toISOString(), + }; + + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([mockDatasetResponse, 200, {}]); + + const result = await datasets.create({ + datasetDirectory: '/path/to/dataset', + domain: 'test-domain', + datasetName: 'test-dataset', + datasetType: 'images', + }); + + expect(result).toEqual(mockDatasetResponse); + }); + + it('should throw error for invalid dataset type', async () => { + await expect(datasets.create({ + datasetDirectory: '/path/to/dataset', + domain: 'test-domain', + datasetName: 'test-dataset', + datasetType: 'invalid' as any, + })).rejects.toThrow('dataset_type must be one of: images, videos, documents'); + }); + }); + + describe('get', () => { + it('should get dataset by ID', async () => { + const mockResponse: DatasetResponse = { + id: 'ds_123', + status: 'completed', + domain: 'test-domain', + dataset_name: 'test-dataset', + dataset_type: 'images', + file_id: 'file_123', + created_at: new Date().toISOString(), + completed_at: new Date().toISOString(), + }; + + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([mockResponse, 200, {}]); + + const result = await datasets.get('ds_123'); + + expect(result).toEqual(mockResponse); + expect(datasets['requestor'].request).toHaveBeenCalledWith( + 'GET', + 'ds_123' + ); + }); + }); + + describe('list', () => { + it('should list datasets with default pagination', async () => { + const mockResponse: DatasetResponse[] = [ + { + id: 'ds_123', + status: 'completed', + domain: 'test-domain', + dataset_name: 'test-dataset-1', + dataset_type: 'images', + file_id: 'file_123', + created_at: new Date().toISOString(), + }, + { + id: 'ds_456', + status: 'running', + domain: 'test-domain', + dataset_name: 'test-dataset-2', + dataset_type: 'videos', + file_id: 'file_456', + created_at: new Date().toISOString(), + }, + ]; + + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([mockResponse, 200, {}]); + + const result = await datasets.list(); + + expect(result).toEqual(mockResponse); + expect(datasets['requestor'].request).toHaveBeenCalledWith( + 'GET', + '', + { + skip: 0, + limit: 10, + } + ); + }); + + it('should list datasets with custom pagination', async () => { + const mockResponse: DatasetResponse[] = [ + { + id: 'ds_789', + status: 'completed', + domain: 'test-domain', + dataset_name: 'test-dataset-3', + dataset_type: 'documents', + file_id: 'file_789', + created_at: new Date().toISOString(), + }, + ]; + + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([mockResponse, 200, {}]); + + const result = await datasets.list({ skip: 2, limit: 1 }); + + expect(result).toEqual(mockResponse); + expect(datasets['requestor'].request).toHaveBeenCalledWith( + 'GET', + '', + { + skip: 2, + limit: 1, + } + ); + }); + + it('should throw error for non-array response', async () => { + jest.spyOn(datasets['requestor'], 'request').mockResolvedValue([{}, 200, {}]); + + await expect(datasets.list()).rejects.toThrow('Expected array response'); + }); + }); +});