Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

💥 Switch gguf & tasks interdependency #1004

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions .github/workflows/gguf-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,10 @@ jobs:
node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');"
git commit . -m "🔖 @huggingface/gguf $BUMPED_VERSION"
git tag "gguf-v$BUMPED_VERSION"

- name: "Check Deps are published before publishing this package"
run: pnpm -w check-deps tasks

- run: pnpm publish --no-git-checks .
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
Expand Down
3 changes: 0 additions & 3 deletions .github/workflows/tasks-publish.yml
Original file line number Diff line number Diff line change
Expand Up @@ -50,9 +50,6 @@ jobs:
git commit . -m "🔖 @huggingface/tasks $BUMPED_VERSION"
git tag "tasks-v$BUMPED_VERSION"

- name: "Check Deps are published before publishing this package"
run: pnpm -w check-deps gguf

- run: pnpm publish --no-git-checks .
env:
NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ jobs:
run: |
sleep 3
pnpm i --filter root --filter inference... --filter hub... --frozen-lockfile
pnpm --filter inference --filter hub --filter tasks --filter gguf publish --force --no-git-checks --registry http://localhost:4874/
pnpm --filter inference --filter hub --filter tasks publish --force --no-git-checks --registry http://localhost:4874/

- name: E2E test - test yarn install
working-directory: e2e/ts
Expand Down
4 changes: 3 additions & 1 deletion packages/gguf/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,6 @@
},
"source": "index.ts",
"scripts": {
"prepare": "pnpm run build",
"lint": "eslint --quiet --fix --ext .cjs,.ts .",
"lint:check": "eslint --ext .cjs,.ts .",
"format": "prettier --write .",
Expand All @@ -50,6 +49,9 @@
],
"author": "Hugging Face",
"license": "MIT",
"dependencies": {
"@huggingface/tasks": "workspace:^"
},
"devDependencies": {
"@types/node": "^20.12.8"
}
Expand Down
5 changes: 5 additions & 0 deletions packages/gguf/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

12 changes: 2 additions & 10 deletions packages/gguf/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
import type { MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
import { GGMLQuantizationType, GGUFValueType } from "./types";
import { GGUFValueType } from "./types";
import { isBackend } from "./utils/isBackend";
import { promisesQueue } from "./utils/promisesQueue";

export type { MetadataBaseValue, MetadataValue, Version, GGUFMetadata, GGUFTensorInfo, GGUFParseOutput } from "./types";
export { GGUFValueType, GGMLFileQuantizationType, GGMLQuantizationType, Architecture } from "./types";
export { GGUF_QUANT_DESCRIPTIONS } from "./quant-descriptions";
export { parseGGUFQuantLabel, GGUF_QUANT_RE, GGUF_QUANT_RE_GLOBAL } from "@huggingface/tasks";

export const RE_GGUF_FILE = /\.gguf$/;
export const RE_GGUF_SHARD_FILE = /^(?<prefix>.*?)-(?<shard>\d{5})-of-(?<total>\d{5})\.gguf$/;
Expand All @@ -29,15 +30,6 @@ export function parseGgufShardFilename(filename: string): GgufShardFileInfo | nu
return null;
}

const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");

export function parseGGUFQuantLabel(fname: string): string | undefined {
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
return quantLabel;
}

const isVersion = (version: number): version is Version => version === 1 || version === 2 || version === 3;

/**
Expand Down
34 changes: 2 additions & 32 deletions packages/gguf/src/types.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
import type { TransformerLLM } from "./transformer-llm";
import { LLM_ARCHITECTURES } from "./transformer-llm";
import type { GGMLQuantizationType } from "@huggingface/tasks";
export { GGMLQuantizationType } from "@huggingface/tasks";

export type MetadataBaseValue = string | number | bigint | boolean;
export type MetadataValue = MetadataBaseValue | MetadataBaseValue[] | MetadataValue[]; /// recursive as arrays can be nested.
Expand Down Expand Up @@ -45,38 +47,6 @@ export enum GGMLFileQuantizationType {
MOSTLY_Q4_0_8_8 = 35,
}

export enum GGMLQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2_K = 10,
Q3_K = 11,
Q4_K = 12,
Q5_K = 13,
Q6_K = 14,
Q8_K = 15,
IQ2_XXS = 16,
IQ2_XS = 17,
IQ3_XXS = 18,
IQ1_S = 19,
IQ4_NL = 20,
IQ3_S = 21,
IQ2_S = 22,
IQ4_XS = 23,
I8 = 24,
I16 = 25,
I32 = 26,
I64 = 27,
F64 = 28,
IQ1_M = 29,
BF16 = 30,
}

export enum GGUFValueType {
UINT8 = 0,
INT8 = 1,
Expand Down
3 changes: 0 additions & 3 deletions packages/tasks/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -51,8 +51,5 @@
"@types/node": "^20.11.5",
"quicktype-core": "https://github.com/huggingface/quicktype/raw/pack-18.0.17/packages/quicktype-core/quicktype-core-18.0.17.tgz",
"type-fest": "^3.13.1"
},
"dependencies": {
"@huggingface/gguf": "workspace:^"
}
}
5 changes: 0 additions & 5 deletions packages/tasks/pnpm-lock.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

40 changes: 40 additions & 0 deletions packages/tasks/src/gguf.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
export enum GGMLQuantizationType {
F32 = 0,
F16 = 1,
Q4_0 = 2,
Q4_1 = 3,
Q5_0 = 6,
Q5_1 = 7,
Q8_0 = 8,
Q8_1 = 9,
Q2_K = 10,
Q3_K = 11,
Q4_K = 12,
Q5_K = 13,
Q6_K = 14,
Q8_K = 15,
IQ2_XXS = 16,
IQ2_XS = 17,
IQ3_XXS = 18,
IQ1_S = 19,
IQ4_NL = 20,
IQ3_S = 21,
IQ2_S = 22,
IQ4_XS = 23,
I8 = 24,
I16 = 25,
I32 = 26,
I64 = 27,
F64 = 28,
IQ1_M = 29,
BF16 = 30,
}

const ggufQuants = Object.values(GGMLQuantizationType).filter((v): v is string => typeof v === "string");
export const GGUF_QUANT_RE = new RegExp(`(?<quant>${ggufQuants.join("|")})` + "(_(?<sizeVariation>[A-Z]+))?");
export const GGUF_QUANT_RE_GLOBAL = new RegExp(GGUF_QUANT_RE, "g");

export function parseGGUFQuantLabel(fname: string): string | undefined {
const quantLabel = fname.toUpperCase().match(GGUF_QUANT_RE_GLOBAL)?.at(-1); // if there is multiple quant substrings in a name, we prefer the last one
return quantLabel;
}
2 changes: 2 additions & 0 deletions packages/tasks/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,8 @@ export type {
export { SPECIAL_TOKENS_ATTRIBUTES } from "./tokenizer-data";

import * as snippets from "./snippets";
export * from "./gguf";

export { snippets };

export { SKUS, DEFAULT_MEMORY_OPTIONS } from "./hardware";
Expand Down
2 changes: 1 addition & 1 deletion packages/tasks/src/local-apps.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import { parseGGUFQuantLabel } from "./gguf";
import type { ModelData } from "./model-data";
import type { PipelineType } from "./pipelines";
import { parseGGUFQuantLabel } from "@huggingface/gguf";

export interface LocalAppSnippet {
/**
Expand Down
Loading