Skip to content

Commit 8cf1a86

Browse files
committed
update Nscale provider
2 parents 83f8e2d + f9e360f commit 8cf1a86

File tree

334 files changed

+16286
-11521
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

334 files changed

+16286
-11521
lines changed

.github/workflows/test.yml

+5-3
Original file line numberDiff line numberDiff line change
@@ -61,11 +61,13 @@ jobs:
6161
echo "SINCE=$(git merge-base origin/${{ github.event.pull_request.base.ref }} ${{ github.sha }})" >> $GITHUB_OUTPUT
6262
fi
6363
64+
- run: google-chrome --version
65+
6466
- run: npm install -g corepack@latest && corepack enable
6567

6668
- uses: actions/setup-node@v3
6769
with:
68-
node-version: "20"
70+
node-version: "22"
6971
cache: "pnpm"
7072
cache-dependency-path: "**/pnpm-lock.yaml"
7173
- run: |
@@ -105,7 +107,7 @@ jobs:
105107
run: |
106108
sleep 3
107109
pnpm i --filter root --filter inference... --filter hub... --filter tasks-gen --frozen-lockfile
108-
pnpm --filter inference --filter hub --filter tasks publish --force --no-git-checks --registry http://localhost:4874/
110+
pnpm --filter inference --filter hub --filter tasks --filter jinja publish --force --no-git-checks --registry http://localhost:4874/
109111
110112
- name: E2E test - test yarn install
111113
working-directory: e2e/ts
@@ -136,7 +138,7 @@ jobs:
136138
deno-version: vx.x.x
137139
- name: E2E test - deno import from npm
138140
working-directory: e2e/deno
139-
run: deno run --allow-net --allow-env=HF_TOKEN index.ts
141+
run: deno run --allow-read --allow-net --allow-env=HF_TOKEN index.ts
140142
env:
141143
NPM_CONFIG_REGISTRY: http://localhost:4874/
142144
HF_TOKEN: ${{ secrets.HF_TOKEN }}

CODEOWNERS

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
# Ownership for the Inference Package
22

3-
/packages/inference/ @julien-c @hanouticelina @SBrandeis @coyotte508
3+
/packages/inference/ @julien-c @hanouticelina @SBrandeis
44

55
# Ownership for the Tasks Package
66

CONTRIBUTING.md

+6-2
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,7 @@ We use `pnpm` as our package manager. You need to use it, eg `pnpm install` inst
88

99
If you want to format the whole codebase, you can do `pnpm -r format` at the root.
1010

11-
Other than that, we avoid runtime dependencies unless they're strictly needed. For example, our only dependency is `hash-wasm`, and it's only in the browser context and when uploaded files are > 10MB.
11+
Other than that, we avoid runtime dependencies unless they're strictly needed. Even then, we prefer vendoring the code.
1212

1313
## Pull requests
1414

@@ -18,7 +18,11 @@ It's not a hard requirement, but please consider using an icon from [Gitmoji](ht
1818

1919
## Tests
2020

21-
If you want to run only specific tests, you can do `pnpm test -- -t "test name"`
21+
If you want to run only specific tests, you can do `pnpm test -- -t "test name"`.
22+
23+
You can also do `npx vitest ./packages/hub/src/utils/XetBlob.spec.ts` to run a specific test file.
24+
25+
Or `cd packages/hub && npx vitest --browser.name=chrome --browser.headless --config vitest-browser.config.mts ./src/utils/XetBlob.spec.ts` to run browser tests on a specific file
2226

2327
## Adding a package
2428

README.md

+12-11
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@
1010
</p>
1111

1212
```ts
13-
// Programatically interact with the Hub
13+
// Programmatically interact with the Hub
1414

1515
await createRepo({
1616
repo: { type: "model", name: "my-user/nlp-model" },
@@ -31,6 +31,7 @@ await uploadFile({
3131

3232
await inference.chatCompletion({
3333
model: "meta-llama/Llama-3.1-8B-Instruct",
34+
provider: "sambanova", // or together, fal-ai, replicate, cohere …
3435
messages: [
3536
{
3637
role: "user",
@@ -39,11 +40,11 @@ await inference.chatCompletion({
3940
],
4041
max_tokens: 512,
4142
temperature: 0.5,
42-
provider: "sambanova", // or together, fal-ai, replicate, cohere …
4343
});
4444

4545
await inference.textToImage({
4646
model: "black-forest-labs/FLUX.1-dev",
47+
provider: "replicate",
4748
inputs: "a picture of a green bird",
4849
});
4950

@@ -54,7 +55,7 @@ await inference.textToImage({
5455

5556
This is a collection of JS libraries to interact with the Hugging Face API, with TS types included.
5657

57-
- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and third-party Inference Providers to make calls to 100,000+ Machine Learning models
58+
- [@huggingface/inference](packages/inference/README.md): Use HF Inference API (serverless), Inference Endpoints (dedicated) and all supported Inference Providers to make calls to 100,000+ Machine Learning models
5859
- [@huggingface/hub](packages/hub/README.md): Interact with huggingface.co to create or delete repos and commit / download files
5960
- [@huggingface/agents](packages/agents/README.md): Interact with HF models through a natural language interface
6061
- [@huggingface/gguf](packages/gguf/README.md): A GGUF parser that works on remotely hosted files.
@@ -84,7 +85,7 @@ npm install @huggingface/agents
8485
Then import the libraries in your code:
8586

8687
```ts
87-
import { HfInference } from "@huggingface/inference";
88+
import { InferenceClient } from "@huggingface/inference";
8889
import { HfAgent } from "@huggingface/agents";
8990
import { createRepo, commit, deleteRepo, listFiles } from "@huggingface/hub";
9091
import type { RepoId } from "@huggingface/hub";
@@ -96,21 +97,21 @@ You can run our packages with vanilla JS, without any bundler, by using a CDN or
9697

9798
```html
9899
<script type="module">
99-
import { HfInference } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.5.1/+esm';
100-
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.0.2/+esm";
100+
import { InferenceClient } from 'https://cdn.jsdelivr.net/npm/@huggingface/inference@3.7.0/+esm';
101+
import { createRepo, commit, deleteRepo, listFiles } from "https://cdn.jsdelivr.net/npm/@huggingface/hub@1.1.2/+esm";
101102
</script>
102103
```
103104

104105
### Deno
105106

106107
```ts
107108
// esm.sh
108-
import { HfInference } from "https://esm.sh/@huggingface/inference"
109+
import { InferenceClient } from "https://esm.sh/@huggingface/inference"
109110
import { HfAgent } from "https://esm.sh/@huggingface/agents";
110111

111112
import { createRepo, commit, deleteRepo, listFiles } from "https://esm.sh/@huggingface/hub"
112113
// or npm:
113-
import { HfInference } from "npm:@huggingface/inference"
114+
import { InferenceClient } from "npm:@huggingface/inference"
114115
import { HfAgent } from "npm:@huggingface/agents";
115116

116117
import { createRepo, commit, deleteRepo, listFiles } from "npm:@huggingface/hub"
@@ -123,11 +124,11 @@ Get your HF access token in your [account settings](https://huggingface.co/setti
123124
### @huggingface/inference examples
124125

125126
```ts
126-
import { HfInference } from "@huggingface/inference";
127+
import { InferenceClient } from "@huggingface/inference";
127128

128129
const HF_TOKEN = "hf_...";
129130

130-
const inference = new HfInference(HF_TOKEN);
131+
const inference = new InferenceClient(HF_TOKEN);
131132

132133
// Chat completion API
133134
const out = await inference.chatCompletion({
@@ -179,7 +180,7 @@ await inference.imageToText({
179180

180181
// Using your own dedicated inference endpoint: https://hf.co/docs/inference-endpoints/
181182
const gpt2 = inference.endpoint('https://xyz.eu-west-1.aws.endpoints.huggingface.cloud/gpt2');
182-
const { generated_text } = await gpt2.textGeneration({inputs: 'The answer to the universe is'});
183+
const { generated_text } = await gpt2.textGeneration({ inputs: 'The answer to the universe is' });
183184

184185
// Chat Completion
185186
const llamaEndpoint = inference.endpoint(

e2e/deno/index.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { HfInference } from "npm:@huggingface/inference@*";
1+
import { InferenceClient } from "npm:@huggingface/inference@*";
22
import { whoAmI, listFiles } from "npm:@huggingface/hub@*";
33

44
const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });
@@ -10,7 +10,7 @@ for await (const file of listFiles({ repo: "gpt2" })) {
1010

1111
const token = Deno.env.get("HF_TOKEN");
1212
if (token) {
13-
const hf = new HfInference(token);
13+
const hf = new InferenceClient(token);
1414

1515
const tokenInfo = await whoAmI({ credentials: { accessToken: token } });
1616
console.log(tokenInfo);

e2e/svelte/src/routes/+page.svelte

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
<script>
22
import { whoAmI, listFiles } from "@huggingface/hub";
3-
import { HfInference } from "@huggingface/inference";
3+
import { InferenceClient } from "@huggingface/inference";
44
5-
const hf = new HfInference();
5+
const hf = new InferenceClient();
66
77
const test = async () => {
88
const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });

e2e/ts/src/index.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,9 @@
1-
import { HfInference } from "@huggingface/inference";
1+
import { InferenceClient } from "@huggingface/inference";
22
import { whoAmI } from "@huggingface/hub";
33

44
const hfToken = process.env.token;
55

6-
const hf = new HfInference(hfToken);
6+
const hf = new InferenceClient(hfToken);
77

88
(async () => {
99
const info = await whoAmI({ credentials: { accessToken: "hf_hub.js" }, hubUrl: "https://hub-ci.huggingface.co" });

packages/agents/README.md

+1-1
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ const agent = new HfAgent(
5858

5959

6060
#### From your own endpoints
61-
You can also specify your own endpoint, as long as it implements the same API, for exemple using [text generation inference](https://github.com/huggingface/text-generation-inference) and [Inference Endpoints](https://huggingface.co/inference-endpoints).
61+
You can also specify your own endpoint, as long as it implements the same API, for example using [text generation inference](https://github.com/huggingface/text-generation-inference) and [Inference Endpoints](https://huggingface.co/inference-endpoints).
6262

6363
```ts
6464
import { HfAgent, LLMFromEndpoint } from "@huggingface/agents";

packages/agents/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,6 @@
5656
"@types/node": "^18.13.0"
5757
},
5858
"dependencies": {
59-
"@huggingface/inference": "^2.6.1"
59+
"@huggingface/inference": "workspace:^"
6060
}
6161
}

packages/agents/pnpm-lock.yaml

+2-13
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

packages/agents/src/lib/evalBuilder.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { HfInference } from "@huggingface/inference";
1+
import { InferenceClient } from "@huggingface/inference";
22
import type { Data, Tool } from "../types";
33

44
// this function passes the tools & files to the context before calling eval
@@ -17,7 +17,7 @@ export async function evalBuilder(
1717

1818
// add tools to context
1919
for (const tool of tools) {
20-
const toolCall = (input: Promise<Data>) => tool.call?.(input, new HfInference(accessToken ?? ""));
20+
const toolCall = (input: Promise<Data>) => tool.call?.(input, new InferenceClient(accessToken ?? ""));
2121
// @ts-expect-error adding to the scope
2222
globalThis[tool.name] = toolCall;
2323
}

packages/agents/src/llms/LLMHF.ts

+3-3
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
import type { LLM } from "../types";
2-
import { HfInference } from "@huggingface/inference";
2+
import { InferenceClient } from "@huggingface/inference";
33

44
export function LLMFromHub(accessToken?: string, model?: string): LLM {
5-
const inference = new HfInference(accessToken);
5+
const inference = new InferenceClient(accessToken);
66

77
return async (prompt: string): Promise<string> => {
88
const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
@@ -20,7 +20,7 @@ export function LLMFromHub(accessToken?: string, model?: string): LLM {
2020
}
2121

2222
export function LLMFromEndpoint(accessToken: string, endpoint: string): LLM {
23-
const inference = new HfInference(accessToken).endpoint(endpoint);
23+
const inference = new InferenceClient(accessToken).endpoint(endpoint);
2424
return async (prompt: string): Promise<string> => {
2525
const formattedPrompt = "<|user|>" + prompt + "<|end|><|assistant|>";
2626

packages/agents/src/tools/imageToText.ts

+7-4
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,12 @@ export const imageToTextTool: Tool = {
1515
if (typeof data === "string") throw "Input must be a blob.";
1616

1717
return (
18-
await inference.imageToText({
19-
data,
20-
})
21-
).generated_text;
18+
// eslint-disable-next-line @typescript-eslint/no-non-null-assertion
19+
(
20+
await inference.imageToText({
21+
data,
22+
})
23+
).generated_text!
24+
);
2225
},
2326
};

packages/agents/src/types.d.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,12 @@
1-
import type { HfInference } from "@huggingface/inference";
1+
import type { InferenceClient } from "@huggingface/inference";
22

33
export type Data = string | Blob | ArrayBuffer;
44

55
export interface Tool {
66
name: string;
77
description: string;
88
examples: Array<Example>;
9-
call?: (input: Promise<Data>, inference: HfInference) => Promise<Data>;
9+
call?: (input: Promise<Data>, inference: InferenceClient) => Promise<Data>;
1010
}
1111

1212
export interface Example {

packages/agents/test/HfAgent.spec.ts

+2-2
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
import { describe, expect, it } from "vitest";
22
import { HfAgent, defaultTools, LLMFromHub, LLMFromEndpoint } from "../src";
33
import type { Data } from "../src/types";
4-
import type { HfInference } from "@huggingface/inference";
4+
import type { InferenceClient } from "@huggingface/inference";
55

66
const env = import.meta.env;
77
if (!env.HF_TOKEN) {
@@ -33,7 +33,7 @@ describe("HfAgent", () => {
3333
},
3434
],
3535
// eslint-disable-next-line @typescript-eslint/no-unused-vars
36-
call: async (input: Promise<Data>, inference: HfInference): Promise<Data> => {
36+
call: async (input: Promise<Data>, inference: InferenceClient): Promise<Data> => {
3737
const data = await input;
3838
if (typeof data !== "string") {
3939
throw new Error("Input must be a string");

packages/gguf/src/cli.ts

+2-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
#!/usr/bin/env node
22

3-
import { GGMLQuantizationType, gguf, ggufAllShards, GGUFParseOutput } from ".";
3+
import type { GGUFParseOutput } from ".";
4+
import { GGMLQuantizationType, ggufAllShards } from ".";
45
import { GGML_QUANT_SIZES } from "./quant-descriptions";
56

67
interface PrintColumnHeader {

packages/hub/.eslintignore

+1
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
dist
22
sha256.js
3+
src/vendor

packages/hub/README.md

+1
Original file line numberDiff line numberDiff line change
@@ -174,3 +174,4 @@ Under the hood, `@huggingface/hub` uses a lazy blob implementation to load the f
174174
## Dependencies
175175

176176
- `@huggingface/tasks` : Typings only
177+
- `@huggingface/lz4` : URL join utility

packages/hub/package.json

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
{
22
"name": "@huggingface/hub",
33
"packageManager": "[email protected]",
4-
"version": "1.0.2",
4+
"version": "1.1.2",
55
"description": "Utilities to interact with the Hugging Face hub",
66
"repository": "https://github.com/huggingface/huggingface.js.git",
77
"publishConfig": {

packages/hub/src/index.ts

+1
Original file line numberDiff line numberDiff line change
@@ -22,3 +22,4 @@ export { HubApiError, InvalidApiResponseFormatError } from "./error";
2222
* Only exported for E2Es convenience
2323
*/
2424
export { sha256 as __internal_sha256 } from "./utils/sha256";
25+
export { XetBlob as __internal_XetBlob } from "./utils/XetBlob";

packages/hub/src/lib/download-file-to-cache-dir.spec.ts

+7-2
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import type { Stats } from "node:fs";
77
import { getHFHubCachePath, getRepoFolderName } from "./cache-management";
88
import { toRepoId } from "../utils/toRepoId";
99
import { downloadFileToCacheDir } from "./download-file-to-cache-dir";
10+
import { createSymlink } from "../utils/symlink";
1011

1112
vi.mock("node:fs/promises", () => ({
1213
writeFile: vi.fn(),
@@ -21,6 +22,10 @@ vi.mock("./paths-info", () => ({
2122
pathsInfo: vi.fn(),
2223
}));
2324

25+
vi.mock("../utils/symlink", () => ({
26+
createSymlink: vi.fn(),
27+
}));
28+
2429
const DUMMY_REPO: RepoId = {
2530
name: "hello-world",
2631
type: "model",
@@ -196,7 +201,7 @@ describe("downloadFileToCacheDir", () => {
196201
expect(vi.mocked(lstat).mock.calls[0][0]).toBe(expectedBlob);
197202

198203
// symlink should have been created
199-
expect(symlink).toHaveBeenCalledOnce();
204+
expect(createSymlink).toHaveBeenCalledOnce();
200205
// no download done
201206
expect(fetchMock).not.toHaveBeenCalled();
202207

@@ -283,6 +288,6 @@ describe("downloadFileToCacheDir", () => {
283288
// 2. should rename the incomplete to the blob expected name
284289
expect(rename).toHaveBeenCalledWith(incomplete, expectedBlob);
285290
// 3. should create symlink pointing to blob
286-
expect(symlink).toHaveBeenCalledWith(expectedBlob, expectPointer);
291+
expect(createSymlink).toHaveBeenCalledWith({ sourcePath: expectedBlob, finalPath: expectPointer });
287292
});
288293
});

0 commit comments

Comments
 (0)