Skip to content

Commit 9d35e97

Browse files
authored
[inference provider] Wavespeed.ai Adds Image-to-Video Generation (#1818)
**What’s in this PR** Wavespeed.ai Adds Image-to-Video Generation **Test** ``` pnpm --filter @huggingface/inference test "test/InferenceClient.spec.ts" -t "Wavespeed AI" > @huggingface/[email protected] test /Users/shanliu/work/huggingface.js/packages/inference > vitest run --config vitest.config.mts test/InferenceClient.spec.ts -t 'Wavespeed AI' RUN v0.34.6 /Users/shanliu/work/huggingface.js/packages/inference ✓ test/InferenceClient.spec.ts (126) 113515ms ✓ InferenceClient (126) 113514ms ↓ backward compatibility (1) [skipped] ↓ works with old HfInference name [skipped] ↓ HF Inference (50) [skipped] *** *** ✓ Wavespeed AI (6) 113514ms ✓ textToImage - black-forest-labs/FLUX.1-schnell 6774ms ✓ textToImage - openfree/flux-chatgpt-ghibli-lora 11526ms ✓ textToImage - linoyts/yarn_art_Flux_LoRA 9074ms ✓ textToVideo - Wan-AI/Wan2.1-T2V-14B 113484ms ✓ imageToImage - HiDream-ai/HiDream-E1-Full 13074ms ✓ imageToVideo - Wan-AI/Wan2.1-I2V-14B-480P 54512ms ↓ PublicAI (2) [skipped] ↓ chatCompletion [skipped] ↓ chatCompletion stream [skipped] ↓ Baseten (2) [skipped] ↓ chatCompletion - Qwen3 235B Instruct [skipped] ↓ chatCompletion stream - Qwen3 235B [skipped] ↓ clarifai (2) [skipped] ↓ chatCompletion - DeepSeek-V3_1 [skipped] ↓ chatCompletion stream - DeepSeek-V3_1 [skipped] Test Files 1 passed (1) Tests 6 passed | 120 skipped (126) ```
1 parent 071a0ba commit 9d35e97

File tree

3 files changed

+48
-2
lines changed

3 files changed

+48
-2
lines changed

packages/inference/src/lib/getProviderHelper.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -178,6 +178,7 @@ export const PROVIDERS: Record<InferenceProvider, Partial<Record<InferenceTask,
178178
"text-to-image": new Wavespeed.WavespeedAITextToImageTask(),
179179
"text-to-video": new Wavespeed.WavespeedAITextToVideoTask(),
180180
"image-to-image": new Wavespeed.WavespeedAIImageToImageTask(),
181+
"image-to-video": new Wavespeed.WavespeedAIImageToVideoTask(),
181182
},
182183
"zai-org": {
183184
conversational: new Zai.ZaiConversationalTask(),

packages/inference/src/providers/wavespeed.ts

Lines changed: 26 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,17 @@
11
import type { TextToImageArgs } from "../tasks/cv/textToImage.js";
22
import type { ImageToImageArgs } from "../tasks/cv/imageToImage.js";
33
import type { TextToVideoArgs } from "../tasks/cv/textToVideo.js";
4+
import type { ImageToVideoArgs } from "../tasks/cv/imageToVideo.js";
45
import type { BodyParams, RequestArgs, UrlParams } from "../types.js";
56
import { delay } from "../utils/delay.js";
67
import { omit } from "../utils/omit.js";
78
import { base64FromBytes } from "../utils/base64FromBytes.js";
8-
import type { TextToImageTaskHelper, TextToVideoTaskHelper, ImageToImageTaskHelper } from "./providerHelper.js";
9+
import type {
10+
TextToImageTaskHelper,
11+
TextToVideoTaskHelper,
12+
ImageToImageTaskHelper,
13+
ImageToVideoTaskHelper,
14+
} from "./providerHelper.js";
915
import { TaskProviderHelper } from "./providerHelper.js";
1016
import {
1117
InferenceClientInputError,
@@ -72,7 +78,9 @@ abstract class WavespeedAITask extends TaskProviderHelper {
7278
return `/api/v3/${params.model}`;
7379
}
7480

75-
preparePayload(params: BodyParams<ImageToImageArgs | TextToImageArgs | TextToVideoArgs>): Record<string, unknown> {
81+
preparePayload(
82+
params: BodyParams<ImageToImageArgs | TextToImageArgs | TextToVideoArgs | ImageToVideoArgs>
83+
): Record<string, unknown> {
7684
const payload: Record<string, unknown> = {
7785
...omit(params.args, ["inputs", "parameters"]),
7886
...params.args.parameters,
@@ -189,3 +197,19 @@ export class WavespeedAIImageToImageTask extends WavespeedAITask implements Imag
189197
};
190198
}
191199
}
200+
201+
export class WavespeedAIImageToVideoTask extends WavespeedAITask implements ImageToVideoTaskHelper {
202+
constructor() {
203+
super(WAVESPEEDAI_API_BASE_URL);
204+
}
205+
206+
async preparePayloadAsync(args: ImageToVideoArgs): Promise<RequestArgs> {
207+
return {
208+
...args,
209+
inputs: args.parameters?.prompt,
210+
image: base64FromBytes(
211+
new Uint8Array(args.inputs instanceof ArrayBuffer ? args.inputs : await (args.inputs as Blob).arrayBuffer())
212+
),
213+
};
214+
}
215+
}

packages/inference/test/InferenceClient.spec.ts

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2336,6 +2336,13 @@ describe.skip("InferenceClient", () => {
23362336
adapter: "lora",
23372337
adapterWeightsPath: "pytorch_lora_weights.safetensors",
23382338
},
2339+
"Wan-AI/Wan2.1-I2V-14B-480P": {
2340+
provider: "wavespeed",
2341+
hfModelId: "Wan-AI/Wan2.1-I2V-14B-480P",
2342+
providerId: "wavespeed-ai/wan-2.1/i2v-480p",
2343+
status: "live",
2344+
task: "image-to-video",
2345+
},
23392346
};
23402347
it(`textToImage - black-forest-labs/FLUX.1-schnell`, async () => {
23412348
const res = await client.textToImage({
@@ -2400,6 +2407,20 @@ describe.skip("InferenceClient", () => {
24002407
});
24012408
expect(res).toBeInstanceOf(Blob);
24022409
});
2410+
it(`imageToVideo - Wan-AI/Wan2.1-I2V-14B-480P`, async () => {
2411+
const res = await client.imageToVideo({
2412+
model: "Wan-AI/Wan2.1-I2V-14B-480P",
2413+
provider: "wavespeed",
2414+
inputs: new Blob([readTestFile("cheetah.png")], { type: "image/png" }),
2415+
parameters: {
2416+
prompt: "The leopard chases its prey",
2417+
guidance_scale: 5,
2418+
num_inference_steps: 29,
2419+
seed: -1,
2420+
},
2421+
});
2422+
expect(res).toBeInstanceOf(Blob);
2423+
});
24032424
},
24042425
TIMEOUT
24052426
);

0 commit comments

Comments
 (0)