Skip to content

Commit f9e360f

Browse files
committed
Update nscale-cloud provider
1 parent 64e5cdf commit f9e360f

File tree

7 files changed

+229
-0
lines changed

7 files changed

+229
-0
lines changed

packages/inference/README.md

+2
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ Currently, we support the following providers:
5252
- [Hyperbolic](https://hyperbolic.xyz)
5353
- [Nebius](https://studio.nebius.ai)
5454
- [Novita](https://novita.ai/?utm_source=github_huggingface&utm_medium=github_readme&utm_campaign=link)
55+
- [Nscale](https://nscale.com)
5556
- [Replicate](https://replicate.com)
5657
- [Sambanova](https://sambanova.ai)
5758
- [Together](https://together.xyz)
@@ -79,6 +80,7 @@ Only a subset of models are supported when requesting third-party providers. You
7980
- [Fireworks AI supported models](https://huggingface.co/api/partners/fireworks-ai/models)
8081
- [Hyperbolic supported models](https://huggingface.co/api/partners/hyperbolic/models)
8182
- [Nebius supported models](https://huggingface.co/api/partners/nebius/models)
83+
- [Nscale supported models](https://huggingface.co/api/partners/nscale-cloud/models)
8284
- [Replicate supported models](https://huggingface.co/api/partners/replicate/models)
8385
- [Sambanova supported models](https://huggingface.co/api/partners/sambanova/models)
8486
- [Together supported models](https://huggingface.co/api/partners/together/models)

packages/inference/src/lib/getProviderHelper.ts

+5
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import * as HFInference from "../providers/hf-inference";
88
import * as Hyperbolic from "../providers/hyperbolic";
99
import * as Nebius from "../providers/nebius";
1010
import * as Novita from "../providers/novita";
11+
import * as Nscale from "../providers/nscale-cloud";
1112
import * as OpenAI from "../providers/openai";
1213
import type {
1314
AudioClassificationTaskHelper,
@@ -109,6 +110,10 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
109110
conversational: new Novita.NovitaConversationalTask(),
110111
"text-generation": new Novita.NovitaTextGenerationTask(),
111112
},
113+
"nscale-cloud": {
114+
"text-to-image": new Nscale.NscaleCloudTextToImageTask(),
115+
conversational: new Nscale.NscaleCloudConversationalTask(),
116+
},
112117
openai: {
113118
conversational: new OpenAI.OpenAIConversationalTask(),
114119
},

packages/inference/src/providers/consts.ts

+1
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ export const HARDCODED_MODEL_ID_MAPPING: Record<InferenceProvider, Record<ModelI
2525
hyperbolic: {},
2626
nebius: {},
2727
novita: {},
28+
"nscale-cloud": {"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct", "black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-schnell"},
2829
openai: {},
2930
replicate: {},
3031
sambanova: {},
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,95 @@
1+
/**
2+
* See the registered mapping of HF model ID => Nscale model ID here:
3+
*
4+
* https://huggingface.co/api/partners/nscale/models
5+
*
6+
* This is a publicly available mapping.
7+
*
8+
* If you want to try to run inference for a new model locally before it's registered on huggingface.co,
9+
* you can add it to the dictionary "HARDCODED_MODEL_ID_MAPPING" in consts.ts, for dev purposes.
10+
*
11+
* - If you work at Nscale and want to update this mapping, please use the model mapping API we provide on huggingface.co
12+
* - If you're a community member and want to add a new supported HF model to Nscale, please open an issue on the present repo
13+
* and we will tag Nscale team members.
14+
*
15+
* Thanks!
16+
*/
17+
import { InferenceOutputError } from "../lib/InferenceOutputError";
18+
import type { BodyParams, UrlParams } from "../types";
19+
import { omit } from "../utils/omit";
20+
import {
21+
BaseConversationalTask,
22+
BaseTextGenerationTask,
23+
TaskProviderHelper,
24+
type TextToImageTaskHelper,
25+
} from "./providerHelper";
26+
27+
const NSCALE_API_BASE_URL = "https://inference.api.nscale.com";
28+
29+
interface NscaleCloudBase64ImageGeneration {
30+
data: Array<{
31+
b64_json: string;
32+
}>;
33+
}
34+
35+
export class NscaleCloudConversationalTask extends BaseConversationalTask {
36+
constructor() {
37+
super("nscale-cloud", NSCALE_API_BASE_URL);
38+
}
39+
}
40+
41+
export class NscaleCloudTextGenerationTask extends BaseTextGenerationTask {
42+
constructor() {
43+
super("nscale-cloud", NSCALE_API_BASE_URL);
44+
}
45+
46+
override makeRoute(): string {
47+
return "v1/chat/completions";
48+
}
49+
50+
}
51+
52+
export class NscaleCloudTextToImageTask extends TaskProviderHelper implements TextToImageTaskHelper {
53+
constructor() {
54+
super("nscale-cloud", NSCALE_API_BASE_URL);
55+
}
56+
57+
preparePayload(params: BodyParams): Record<string, unknown> {
58+
return {
59+
...omit(params.args, ["inputs", "parameters"]),
60+
...(params.args.parameters as Record<string, unknown>),
61+
response_format: "b64_json",
62+
prompt: params.args.inputs,
63+
model: params.model,
64+
};
65+
}
66+
67+
makeRoute(params: UrlParams): string {
68+
void params;
69+
return "v1/images/generations";
70+
}
71+
72+
async getResponse(
73+
response: NscaleCloudBase64ImageGeneration,
74+
url?: string,
75+
headers?: HeadersInit,
76+
outputType?: "url" | "blob"
77+
): Promise<string | Blob> {
78+
if (
79+
typeof response === "object" &&
80+
"data" in response &&
81+
Array.isArray(response.data) &&
82+
response.data.length > 0 &&
83+
"b64_json" in response.data[0] &&
84+
typeof response.data[0].b64_json === "string"
85+
) {
86+
const base64Data = response.data[0].b64_json;
87+
if (outputType === "url") {
88+
return `data:image/jpeg;base64,${base64Data}`;
89+
}
90+
return fetch(`data:image/jpeg;base64,${base64Data}`).then((res) => res.blob());
91+
}
92+
93+
throw new InferenceOutputError("Expected Nscale text-to-image response format");
94+
}
95+
}

packages/inference/src/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ export const INFERENCE_PROVIDERS = [
4646
"hyperbolic",
4747
"nebius",
4848
"novita",
49+
"nscale-cloud",
4950
"openai",
5051
"replicate",
5152
"sambanova",

packages/inference/test/InferenceClient.spec.ts

+51
Original file line numberDiff line numberDiff line change
@@ -1392,4 +1392,55 @@ describe.concurrent("InferenceClient", () => {
13921392
},
13931393
TIMEOUT
13941394
);
1395+
describe.concurrent(
1396+
"Nscale",
1397+
() => {
1398+
const client = new InferenceClient(env.HF_NSCALE_CLOUD_KEY ?? "dummy");
1399+
1400+
HARDCODED_MODEL_ID_MAPPING["nscale-cloud"] = {
1401+
"meta-llama/Llama-3.1-8B-Instruct": "meta-llama/Llama-3.1-8B-Instruct",
1402+
"black-forest-labs/FLUX.1-schnell": "black-forest-labs/FLUX.1-schnell",
1403+
};
1404+
1405+
it("chatCompletion", async () => {
1406+
const res = await client.chatCompletion({
1407+
model: "meta-llama/Llama-3.1-8B-Instruct",
1408+
provider: "nscale-cloud",
1409+
messages: [{ role: "user", content: "Complete this sentence with words, one plus one is equal " }],
1410+
});
1411+
if (res.choices && res.choices.length > 0) {
1412+
const completion = res.choices[0].message?.content;
1413+
expect(completion).toContain("two");
1414+
}
1415+
});
1416+
it("chatCompletion stream", async () => {
1417+
const stream = client.chatCompletionStream({
1418+
model: "meta-llama/Llama-3.1-8B-Instruct",
1419+
provider: "nscale-cloud",
1420+
messages: [{ role: "user", content: "Say 'this is a test'" }],
1421+
stream: true,
1422+
}) as AsyncGenerator<ChatCompletionStreamOutput>;
1423+
let fullResponse = "";
1424+
for await (const chunk of stream) {
1425+
if (chunk.choices && chunk.choices.length > 0) {
1426+
const content = chunk.choices[0].delta?.content;
1427+
if (content) {
1428+
fullResponse += content;
1429+
}
1430+
}
1431+
}
1432+
expect(fullResponse).toBeTruthy();
1433+
expect(fullResponse.length).toBeGreaterThan(0);
1434+
});
1435+
it("textToImage", async () => {
1436+
const res = await client.textToImage({
1437+
model: "black-forest-labs/FLUX.1-schnell",
1438+
provider: "nscale-cloud",
1439+
inputs: "An astronaut riding a horse",
1440+
});
1441+
expect(res).toBeInstanceOf(Blob);
1442+
});
1443+
},
1444+
TIMEOUT
1445+
);
13951446
});

packages/inference/test/tapes.json

+74
Original file line numberDiff line numberDiff line change
@@ -7155,5 +7155,79 @@
71557155
"vary": "Origin, Access-Control-Request-Method, Access-Control-Request-Headers"
71567156
}
71577157
}
7158+
},
7159+
"02f73b287b8500fe0103b36a7eaac523a994d628f1077e7e25595d570ace13e7": {
7160+
"url": "https://inference.api.nscale.com/v1/chat/completions",
7161+
"init": {
7162+
"headers": {
7163+
"Content-Type": "application/json"
7164+
},
7165+
"method": "POST",
7166+
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Say 'this is a test'\"}],\"stream\":true,\"model\":\"meta-llama/Llama-3.1-8B-Instruct\"}"
7167+
},
7168+
"response": {
7169+
"body": "data: {\"choices\":[{\"delta\":{\"content\":\"\",\"role\":\"assistant\"},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\"This\",\"role\":null},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\" is\",\"role\":null},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\" a\",\"role\":null},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\" test\",\"role\":null},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\".\",\"role\":null},\"finish_reason\":null,\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[{\"delta\":{\"content\":\"\",\"role\":null},\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null}],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":null}\n\ndata: {\"choices\":[],\"created\":1744199718,\"id\":\"chatcmpl-147e143e-28d0-43de-96b4-c005351d6ada\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion.chunk\",\"usage\":{\"completion_tokens\":6,\"prompt_tokens\":42,\"total_tokens\":48}}\n\n",
7170+
"status": 201,
7171+
"statusText": "Created",
7172+
"headers": {
7173+
"cache-control": "no-cache",
7174+
"connection": "keep-alive",
7175+
"content-type": "text/event-stream",
7176+
"strict-transport-security": "max-age=31536000; includeSubDomains",
7177+
"transfer-encoding": "chunked"
7178+
}
7179+
}
7180+
},
7181+
"0fb086d4e10baa1a1faa65605858c8c3dc83a43e986a2f7f6c0846ac13c43991": {
7182+
"url": "https://inference.api.nscale.com/v1/chat/completions",
7183+
"init": {
7184+
"headers": {
7185+
"Content-Type": "application/json"
7186+
},
7187+
"method": "POST",
7188+
"body": "{\"messages\":[{\"role\":\"user\",\"content\":\"Complete this sentence with words, one plus one is equal \"}],\"model\":\"meta-llama/Llama-3.1-8B-Instruct\"}"
7189+
},
7190+
"response": {
7191+
"body": "{\"choices\":[{\"finish_reason\":\"stop\",\"index\":0,\"logprobs\":null,\"message\":{\"content\":\"...two. One plus one is equal to two.\",\"role\":\"assistant\"},\"stop_reason\":null}],\"created\":1744199718,\"id\":\"chatcmpl-cf0fc8ff-6e27-4c4c-8daa-f3ea8b8c0558\",\"model\":\"meta-llama/Llama-3.1-8B-Instruct\",\"object\":\"chat.completion\",\"prompt_logprobs\":null,\"usage\":{\"completion_tokens\":12,\"prompt_tokens\":46,\"total_tokens\":58}}",
7192+
"status": 200,
7193+
"statusText": "OK",
7194+
"headers": {
7195+
"connection": "keep-alive",
7196+
"content-type": "application/json; charset=utf-8",
7197+
"strict-transport-security": "max-age=31536000; includeSubDomains"
7198+
}
7199+
}
7200+
},
7201+
"ae0d3a9ca0e29820cd41575cb77fdabb1aa017ca5b9c6ae2febcafb876de1171": {
7202+
"url": "https://inference.api.nscale.com/v1/images/generations",
7203+
"init": {
7204+
"headers": {
7205+
"Content-Type": "application/json"
7206+
},
7207+
"method": "POST",
7208+
"body": "{\"response_format\":\"b64_json\",\"prompt\":\"An astronaut riding a horse\",\"model\":\"black-forest-labs/FLUX.1-schnell\"}"
7209+
},
7210+
"response": {
7211+
"body": "{\"created\":1744199720,\"data\":[{\"b64_json\":\"iVBORw0KGgoAAAANSUhEUgAAAgAAAAIACAIAAAB7GkOtAAAG/XRFWHRwcm9tcHQAeyI1IjogeyJpbnB1dHMiOiB7IndpZHRoIjogNTEyLCAiaGVpZ2h0IjogNTEyLCAiYmF0Y2hfc2l6ZSI6IDF9LCAiY2xhc3NfdHlwZSI6ICJFbXB0eUxhdGVudEltYWdlIiwgIl9tZXRhIjogeyJ0aXRsZSI6ICJFbXB0eSBMYXRlbnQgSW1hZ2UifX0sICI2IjogeyJpbnB1dHMiOiB7InRleHQiOiAicGhvdG9yZWFsaXN0aWMsIHJlYWxpc20sIG5hdHVyYWwgcGljdHVyZSwgQW4gYXN0cm9uYXV0IHJpZGluZyBhIGhvcnNlIiwgImNsaXAiOiBbIjExIiwgMF19LCAiY2xhc3NfdHlwZSI6ICJDTElQVGV4dEVuY29kZSIsICJfbWV0YSI6IHsidGl0bGUiOiAiQ0xJUCBUZXh0IEVuY29kZSAoUHJvbXB0KSJ9fSwgIjgiOiB7ImlucHV0cyI6IHsic2FtcGxlcyI6IFsiMTMiLCAwXSwgInZhZSI6IFsiMTAiLCAwXX0sICJjbGFzc190eXBlIjogIlZBRURlY29kZSIsICJfbWV0YSI6IHsidGl0bGUiOiAiVkFFIERlY29kZSJ9fSwgIjkiOiB7ImlucHV0cyI6IHsiZmlsZW5hbWVfcHJlZml4IjogIkNvbWZ5VUkiLCAiaW1hZ2VzIjogWyI4IiwgMF19LCAiY2xhc3NfdHlwZSI6ICJTYXZlSW1hZ2UiLCAiX21ldGEiOiB7InRpdGxlIjogIlNhdmUgSW1hZ2UifX0sICIxMCI6IHsiaW5wdXRzIjogeyJ2YWVfbmFtZSI6ICJhZS5zYWZldGVuc29ycyJ9LCAiY2xhc3NfdHlwZSI6ICJWQUVMb2FkZXIiLCAiX21ldGEiOiB7InRpdGxlIjogIkxvYWQgVkFFIn19LCAi\"}]}",
7212+
"status": 200,
7213+
"statusText": "OK",
7214+
"headers": {
7215+
"connection": "keep-alive",
7216+
"content-type": "application/json; charset=utf-8",
7217+
"strict-transport-security": "max-age=31536000; includeSubDomains"
7218+
}
7219+
}
7220+
},
7221+
"517465310e43388100ec05182659dd72409b2bc64a58eee092a4c7a402d220a2": {
7222+
"url": "",
7223+
"init": {},
7224+
"response": {
7225+
"body": "",
7226+
"status": 200,
7227+
"statusText": "OK",
7228+
"headers": {
7229+
"content-type": "image/jpeg"
7230+
}
7231+
}
71587232
}
71597233
}

0 commit comments

Comments
 (0)