Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
192 changes: 181 additions & 11 deletions ort-perf-models.js
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@

const models = [
{ n: "mobilenet-v2", g: "img224", m: "mobilenet_v2/model-12.onnx" },
{ n: "mobilenet-v2", g: "img224", m: "mobilenetv2-12/mobilenetv2-12.onnx" },
{ e: "fp16", n: "mobilenet-v2-fp16", g: "img224-fp16", m: "mobilenet_v2/model-12-fp16.onnx" },
{ n: "albert-base-v2", g: "bert64", m: "tjs/albert-base-v2/onnx/model.onnx" },
{ n: "bert-base-uncased", g: "bert64", m: "tjs/bert-base-uncased/onnx/model.onnx" },
Expand All @@ -12,9 +12,26 @@ const models = [
{ n: "t5-decoder-seq1", g: "t5-decoder", m: "tjs/t5-small/onnx/decoder_model_merged.onnx", o: "&seqlen=1" },
{ n: "t5-decoder-iob-seq1", g: "t5-decoder", m: "tjs/t5-small/onnx/decoder_model_merged.onnx", o: "&seqlen=1&io_binding=1" },
{ n: "t5-v1.1-encoder", g: "t5-encoder", m: "tjs/google/t5-v1_1-small/onnx/encoder_model.onnx" },
{ n: "t5-v1.1-decoder-seq1", g: "flan-t5-decoder", m: "tjs/google/t5-v1_1-small/onnx/decoder_model_merged.onnx", o: "&seqlen=1" },
{ n: "swin2SR-lightweight-x2-64", g: "swin2SR-lightweight-x2-64", m: "swin2SR-lightweight-x2-64/model.onnx" },
{
n: "superb/wav2vec2-base-superb-ks",
g: "superb/wav2vec2-base-superb-ks",
m: "superb/wav2vec2-base-superb-ks/onnx/model.onnx"
},

{
n: "t5-v1.1-decoder-seq1",
g: "flan-t5-decoder",
m: "tjs/google/t5-v1_1-small/onnx/decoder_model_merged.onnx",
o: "&seqlen=1"
},
{ n: "flan-t5-encoder", g: "t5-encoder", m: "tjs/google/flan-t5-small/onnx/encoder_model.onnx" },
{ n: "flan-t5-decoder-seq1", g: "flan-t5-decoder", m: "tjs/google/flan-t5-small/onnx/decoder_model_merged.onnx", o: "&seqlen=1" },
{
n: "flan-t5-decoder-seq1",
g: "flan-t5-decoder",
m: "tjs/google/flan-t5-small/onnx/decoder_model_merged.onnx",
o: "&seqlen=1"
},

{ n: "gpt-neo-125m-seq1", g: "llm-decoder", m: "tjs/EleutherAI/gpt-neo-125M/onnx/decoder_model_merged.onnx", o: "&seqlen=1" },
{ n: "distilgpt2-seq1", g: "llm-decoder", m: "tjs/distilgpt2/onnx/decoder_model_merged.onnx", o: "&seqlen=1" },
Expand All @@ -36,11 +53,31 @@ const models = [
{ n: "sam-h-decoder-static", g: "sam-decoder", m: "sam/segment-anything-vit-h-static-shapes-static.onnx" },
{ n: "sam-mobile-encoder", g: "sam-mobile-encoder", m: "sam/mobile_sam-encoder.onnx" },
{ n: "bart-large-encoder", g: "bert64", m: "tjs/facebook/bart-large-cnn/onnx/encoder_model.onnx" },
{ n: "distilbert-base-uncased-mnli", g: "bert64", m: "tjs/distilbert-base-uncased-mnli/onnx/model.onnx", o: "&seqlen=50" },
{
n: "distilbert-base-uncased-mnli",
g: "bert64",
m: "tjs/distilbert-base-uncased-mnli/onnx/model.onnx",
o: "&seqlen=50"
},
{ n: "distilbart-cnn-6-6-encoder", g: "bert64", m: "tjs/distilbart-cnn-6-6/onnx/encoder_model.onnx", o: "&seqlen=168" },
{ n: "distilbart-cnn-6-6-decoder", g: "bart-cnn", m: "tjs/distilbart-cnn-6-6/onnx/decoder_model_merged.onnx", o: "&seqlen=168&min_query_count=5" },
{ n: "vit-gpt2-image-captioning-encoder", g: "img224", m: "tjs/vit-gpt2-image-captioning/onnx/encoder_model.onnx", o: "&seqlen=168" },
{ n: "vit-gpt2-image-captioning-decoder", g: "bart-large-12", m: "tjs/vit-gpt2-image-captioning/onnx/decoder_model_merged.onnx", o: "&seqlen=168" },
{
n: "distilbart-cnn-6-6-decoder",
g: "bart-cnn",
m: "tjs/distilbart-cnn-6-6/onnx/decoder_model_merged.onnx",
o: "&seqlen=168"
},
{
n: "vit-gpt2-image-captioning-encoder",
g: "img224",
m: "tjs/vit-gpt2-image-captioning/onnx/encoder_model.onnx",
o: "&seqlen=168"
},
{
n: "vit-gpt2-image-captioning-decoder",
g: "bart-large-12",
m: "tjs/vit-gpt2-image-captioning/onnx/decoder_model_merged.onnx",
o: "&seqlen=168"
},
{ n: "yolo-small", g: "img640x480", m: "tjs/hustvl/yolos-small/onnx/model.onnx" },
{ n: "detr-resnet-50", g: "detr", m: "tjs/facebook/detr-resnet-50/onnx/model.onnx" },
{ n: "detr-resnet-50-fp16", g: "detr", m: "tjs/facebook/detr-resnet-50/onnx/model-fp16.onnx" },
Expand Down Expand Up @@ -84,18 +121,136 @@ const models = [
{ n: "sd-turbo-unet-opt", g: "sd-turbo-unet", m: "sd-opt/unet/model.onnx", nowasm: true },
{ n: "sd-turbo-vae-opt", g: "sd-turbo-vae", m: "sd-opt/vae_decoder/model.onnx", nowasm: true },

{ n: "segformer", g: "img640x640", m: "partya/SegFormer/segformer-b5-finetuned-ade-640-640.onnx" },
{
n: "Xenova/vitmatte-small-distinctions-646",
g: "img640x640",
m: "tjs/Xenova/vitmatte-small-distinctions-646/onnx/model.onnx",
device: "cpu"
},
{
n: "Xenova/TinyLlama-1.1B-Chat-v1.0",
g: "llama",
m: "tjs/Xenova/TinyLlama-1.1B-Chat-v1.0/onnx/quantized_model.onnx"
},
{
n: "TinyLlama-1.1B-Chat-v1.0-int4",
g: "llama",
m: "tjs/TinyLlama-1.1B-Chat-v1.0-int4/onnx/decoder_model_merged.onnx"
},
{
n: "Xenova/slimsam-77-uniform-decoder",
g: "slimsam-decoder",
m: "slimsam-77-uniform/onnx/prompt_encoder_mask_decoder.onnx"
},
{
n: "phi2-int4",
g: "phi2",
m: "tjs/phi2-int4/onnx/decoder_model_merged.onnx"
},
{
n: "Xenova/yolov9-c",
g: "img640x640",
m: "tjs/Xenova/yolov9-c/onnx/model.onnx"
},
{ p: "wasm", n: "NbitMatMul", g: "NbitMatMul", m: "nbitmatmul/model-int4-combined.onnx" },
{ p: "wasm", n: "NBitMatMulFP16", g: "NbitMatMulFP16", m: "nbitmatmul_fp_quantize/model.onnx" },
{ n: "-", g: "-", m: "p-" },
{ n: "-", g: "-", m: "p-" },
{ n: "-", g: "-", m: "p-" },

// need to test
{ e: "new", n: "mms-tts-eng", g: "bert64", m: "tjs/mms-tts-eng/onnx/model.onnx" },
// transformers.js demo example
{ e: "tjs-demo", n: "t5-encoder", g: "t5-encoder", m: "tjs/t5-small/onnx/encoder_model.onnx", o: "&seqlen=128" },
{
e: "tjs-demo",
n: "t5-decoder-seq1",
g: "t5-decoder",
m: "tjs/t5-small/onnx/decoder_model_merged.onnx",
o: "&seqlen=1&enc_seqlen=128"
},
{
e: "tjs-demo",
n: "distilgpt2",
g: "llm-decoder",
m: "tjs/distilgpt2/onnx/decoder_model_merged.onnx",
o: "&seqlen=16"
},
{ e: "tjs-demo", n: "bert-base-cased", g: "bert64", m: "tjs/bert-base-cased/onnx/model.onnx", o: "&seqlen=9" },
{
e: "tjs-demo",
n: "bert-base-sentiment",
g: "bert64",
m: "tjs/bert-base-multilingual-uncased-sentiment/onnx/model.onnx",
o: "&seqlen=63"
},
{
e: "tjs-demo",
n: "distilbert-base-uncased-mnli",
g: "bert64",
m: "tjs/distilbert-base-uncased-mnli/onnx/model.onnx",
o: "&seqlen=50"
},
{
e: "tjs-demo",
n: "distilbert-distilled-squad",
g: "bert64",
m: "tjs/distilbert-base-cased-distilled-squad/onnx/model.onnx",
o: "&seqlen=262"
},
{
e: "tjs-demo",
n: "distilbart-cnn-6-6-encoder",
g: "bert64",
m: "tjs/distilbart-cnn-6-6/onnx/encoder_model.onnx",
o: "&seqlen=168"
},
{
e: "tjs-demo",
n: "distilbart-cnn-6-6-decoder",
g: "bart-cnn",
m: "tjs/distilbart-cnn-6-6/onnx/decoder_model_merged.onnx",
o: "&seqlen=168"
},
{
e: "tjs-demo",
n: "whisper-decoder-seq1",
g: "whisper-decoder",
m: "tjs/openai/whisper-tiny/onnx/decoder_model_merged.onnx",
o: "&seqlen=1"
},
{ e: "tjs-demo", n: "whisper-encoder", g: "whisper-encoder", m: "tjs/openai/whisper-tiny/onnx/encoder_model.onnx" },
{
e: "tjs-demo",
n: "vit-gpt2-image-captioning-encoder",
g: "img224",
m: "tjs/vit-gpt2-image-captioning/onnx/encoder_model.onnx",
o: "&seqlen=168"
},
{
e: "tjs-demo",
n: "vit-gpt2-image-captioning-decoder",
g: "bart-large-12",
m: "tjs/vit-gpt2-image-captioning/onnx/decoder_model_merged.onnx",
o: "&seqlen=168"
},
{ e: "tjs-demo", n: "vit-base-patch16-224", g: "img224", m: "tjs/google/vit-base-patch16-224/onnx/model.onnx" },
{ e: "tjs-demo", n: "clip-vit-base-patch16", g: "clip", m: "tjs/openai/clip-vit-base-patch16/onnx/model.onnx" },
{ e: "tjs-demo", n: "detr-resnet-50", g: "detr", m: "tjs/facebook/detr-resnet-50/onnx/model.onnx" },

// stable-diffusion
{ e: "sd", n: "sd-unet-fp16", g: "sd-unet", m: "sd-fp16/unet/model.onnx" },
{ e: "sd", n: "sd-vae-fp16", g: "sd-vae", m: "sd-fp16/vae_decoder/model.onnx" },
{ e: "sd", n: "lcm-vae", g: "sd-vae-fp32", m: "lcm/vae_decoder/model.onnx" },
{ e: "sd", n: "lcm-unet", g: "sd-unet-fp32", m: "lcm/unet/model.onnx" },
{
e: "sd",
n: "sd-win-unet-fp16",
g: "sd-unet",
m: "sd-win/Stable-Diffusion-v1.5-unet-fixed-size-batch-1-float16-no-shape-ops-embedded-weights.onnx"
},
{ e: "sd", n: "sd-win-vae-fp16", g: "sd-vae", m: "sd-win/sd2.1-inpainting-vae-decoder-float16-zeroed-weights.onnx" },

// ----------- not working -----------

Expand All @@ -111,19 +266,34 @@ const models = [
{ e: "error", n: "mobilevit", g: "mobilevit", m: "tjs/apple/mobilevit-small/onnx/model.onnx" },

// matmul fails
{ e: "error", n: "tiny_starcoder_py", g: "starcoder", m: "tjs/bigcode/tiny_starcoder_py/onnx/decoder_model_merged.onnx" },
{
e: "error",
n: "tiny_starcoder_py",
g: "starcoder",
m: "tjs/bigcode/tiny_starcoder_py/onnx/decoder_model_merged.onnx"
},

// decoder: Gather" failed. Error: Error: no GPU data for input: 1238119040
{ e: "error", n: "bart-large-decoder", g: "bart-large", m: "tjs/facebook/bart-large-cnn/onnx/decoder_model_merged.ort" },
{
e: "error",
n: "bart-large-decoder",
g: "bart-large",
m: "tjs/facebook/bart-large-cnn/onnx/decoder_model_merged.ort"
},
{ e: "error", n: "bart-large-cnn", g: "bart-large", m: "tjs/facebook/bart-large-cnn/onnx/decoder_model_merged.ort" },

// OOM
{ e: "error", n: "codegen-350M-mono", g: "llm-decoder", m: "tjs/Salesforce/codegen-350M-mono/onnx/decoder_model_merged.ort" },
{
e: "error",
n: "codegen-350M-mono",
g: "llm-decoder",
m: "tjs/Salesforce/codegen-350M-mono/onnx/decoder_model_merged.ort"
},

// Gather fails
{ e: "error", n: "xlm-roberta-base", g: "bert64", m: "tjs/xlm-roberta-base/onnx/model.ort" },
];

if (typeof module !== 'undefined' && typeof module.exports !== 'undefined') {
module.exports = { models };
}
}
51 changes: 47 additions & 4 deletions ort-perf.html
Original file line number Diff line number Diff line change
Expand Up @@ -562,8 +562,47 @@ <h1>ort-web perf</h1>
feed['use_cache_branch'] = new ort.Tensor("bool", [false], [1]);
return feed;
}
throw new Error(`unknown gendata ${gen}`);
}
if (gen == "NbitMatMul") {
feed['X'] = fillTensor([512, 512], "float32", 0);
return feed;
}
if (gen == "NbitMatMulFP16") {
feed['X'] = fillTensor([512, 512], "float16", 0);
return feed;
}
if (gen == "llama") {
const tokens = [24446n, 502n, 546n, 262n, 46371n, 286n, 257n, 2588n, 392n, 7496n];
feed['input_ids'] = new ort.Tensor(new BigInt64Array(tokens), [1, tokens.length]);
feed['attention_mask'] = fillTensor([1, 1], "int64", 1n);
feed['position_ids'] = fillTensor([1, tokens.length], "int64", 1n);
const decoder_shape = [1, 4, 0, 64];
for (var i = 0; i < 22; i++) {
feed['past_key_values.' + i + '.key'] = fillTensor(decoder_shape, 'float16', 1);
feed['past_key_values.' + i + '.value'] = fillTensor(decoder_shape, 'float16', 1);
}
return feed;
}
if (gen == "phi2") {
const tokens = [24446n, 502n, 546n, 262n, 46371n, 286n, 257n, 2588n, 392n, 7496n];
feed['input_ids'] = new ort.Tensor(new BigInt64Array(tokens), [1, tokens.length]);
feed['attention_mask'] = fillTensor([1, 1], "int64", 1n);
feed['position_ids'] = fillTensor([1, tokens.length], "int64", 1n);
const decoder_shape = [1, 32, 0, 80];
for (var i = 0; i < 32; i++) {
feed['past_key_values.' + i + '.key'] = fillTensor(decoder_shape, 'float16', 1);
feed['past_key_values.' + i + '.value'] = fillTensor(decoder_shape, 'float16', 1);
}
return feed;
}
if (gen == "slimsam-decoder") {
feed["image_embeddings"] = fillTensor([1, 256, 64, 64], "float32", 0.5);
feed["image_positional_embeddings"] = fillTensor([1, 256, 64, 64], "float32", 0.5);
feed["input_labels"] = new ort.Tensor(new BigInt64Array([1n]), [1, 1, 1]);
feed["input_points"] = new ort.Tensor(new Float32Array([400., 500.]), [1, 1, 1, 2]);;
return feed;
}
throw new Error(`unknown gendata ${gen}`);
}

function create_download_link(cons_cout) {
let link = document.getElementById('download').childNodes[0];
Expand Down Expand Up @@ -720,9 +759,12 @@ <h1>ort-web perf</h1>
use_ort_model_bytes_directly: "1",
use_ort_model_bytes_for_initializers: "1",
disable_cpu_ep_fallback: config.prevent_fallback,
log_severity_level: config.verbose,
enableProfiling: config.profiler > 0,
graph_optimization_level: 99
}
},
freeDimensionOverrides: { batch_size: 1, },
// freeDimensionOverrides: { batch_size: 1, },
};

if (config.verbose) {
Expand All @@ -746,6 +788,7 @@ <h1>ort-web perf</h1>
name: "webgpu",
preferredLayout: config.preferred_layout
}];
opt.logLevel = "verbose";
if (config.profiler) {
ort.env.webgpu.profilingMode = 'default';
}
Expand All @@ -772,7 +815,7 @@ <h1>ort-web perf</h1>
}

// opt.optimizedModelFilePath = 'opt.onnx';
// opt.graphOptimizationLevel = "disabled";
opt.graphOptimizationLevel = "disabled";

log(`loading... ${config.name}, ${config.provider}`);
const model_bytes = await fetchAndCache("models/" + config.model);
Expand Down