|
| 1 | +# Copyright (c) 2021 - present / Neuralmagic, Inc. All Rights Reserved. |
| 2 | +# |
| 3 | +# Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | +# you may not use this file except in compliance with the License. |
| 5 | +# You may obtain a copy of the License at |
| 6 | +# |
| 7 | +# http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | +# |
| 9 | +# Unless required by applicable law or agreed to in writing, |
| 10 | +# software distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | +# See the License for the specific language governing permissions and |
| 13 | +# limitations under the License. |
| 14 | + |
| 15 | +""" |
| 16 | +Analysis script for ONNX models with the DeepSparse engine. |
| 17 | +
|
| 18 | +########## |
| 19 | +Command help: |
| 20 | +usage: deepsparse.analyze [-h] [-wi NUM_WARMUP_ITERATIONS] |
| 21 | + [-bi NUM_ITERATIONS] [-ncores NUM_CORES] |
| 22 | + [-b BATCH_SIZE] [-ks KERNEL_SPARSITY] |
| 23 | + [-ksf KERNEL_SPARSITY_FILE] |
| 24 | + [--optimization OPTIMIZATION] [-i INPUT_SHAPES] [-q] |
| 25 | + [-x EXPORT_PATH] |
| 26 | + model_path |
| 27 | +
|
| 28 | +Analyze ONNX models in the DeepSparse Engine |
| 29 | +
|
| 30 | +positional arguments: |
| 31 | + model_path Path to an ONNX model file or SparseZoo model stub |
| 32 | +
|
| 33 | +optional arguments: |
| 34 | + -h, --help show this help message and exit |
| 35 | + -wi NUM_WARMUP_ITERATIONS, --num_warmup_iterations NUM_WARMUP_ITERATIONS |
| 36 | + The number of warmup runs that will be executed before |
| 37 | + the actual benchmarking |
| 38 | + -bi NUM_ITERATIONS, --num_iterations NUM_ITERATIONS |
| 39 | + The number of times the benchmark will be run |
| 40 | + -ncores NUM_CORES, --num_cores NUM_CORES |
| 41 | + The number of physical cores to run the analysis on, |
| 42 | + defaults to all physical cores available on the system |
| 43 | + -b BATCH_SIZE, --batch_size BATCH_SIZE |
| 44 | + The number of inputs that will run through the model |
| 45 | + at a time |
| 46 | + -ks KERNEL_SPARSITY, --kernel_sparsity KERNEL_SPARSITY |
| 47 | + Impose kernel sparsity for all convolutions. [0.0-1.0] |
| 48 | + -ksf KERNEL_SPARSITY_FILE, --kernel_sparsity_file KERNEL_SPARSITY_FILE |
| 49 | + Filepath to per-layer kernel sparsities JSON |
| 50 | + --optimization OPTIMIZATION |
| 51 | + To enable or disable optimizations (Tensor Columns) |
| 52 | + -i INPUT_SHAPES, --input_shapes INPUT_SHAPES |
| 53 | + Override the shapes of the inputs, i.e. -shapes |
| 54 | + "[1,2,3],[4,5,6],[7,8,9]" results in input0=[1,2,3] |
| 55 | + input1=[4,5,6] input2=[7,8,9] |
| 56 | + -q, --quiet Lower logging verbosity |
| 57 | + -x EXPORT_PATH, --export_path EXPORT_PATH |
| 58 | + Store results into a JSON file |
| 59 | +""" |
| 60 | + |
| 61 | +import argparse |
| 62 | +import json |
| 63 | +import os |
| 64 | + |
| 65 | +from deepsparse import analyze_model |
| 66 | +from deepsparse.utils import ( |
| 67 | + generate_random_inputs, |
| 68 | + model_to_path, |
| 69 | + override_onnx_input_shapes, |
| 70 | + parse_input_shapes, |
| 71 | +) |
| 72 | + |
| 73 | + |
| 74 | +def parse_args(): |
| 75 | + parser = argparse.ArgumentParser( |
| 76 | + description="Analyze ONNX models in the DeepSparse Engine" |
| 77 | + ) |
| 78 | + |
| 79 | + parser.add_argument( |
| 80 | + "model_path", |
| 81 | + type=str, |
| 82 | + help="Path to an ONNX model file or SparseZoo model stub", |
| 83 | + ) |
| 84 | + parser.add_argument( |
| 85 | + "-wi", |
| 86 | + "--num_warmup_iterations", |
| 87 | + help="The number of warmup runs that will be executed before the \ |
| 88 | + actual benchmarking", |
| 89 | + type=int, |
| 90 | + default=5, |
| 91 | + ) |
| 92 | + parser.add_argument( |
| 93 | + "-bi", |
| 94 | + "--num_iterations", |
| 95 | + help="The number of times the benchmark will be run", |
| 96 | + type=int, |
| 97 | + default=5, |
| 98 | + ) |
| 99 | + parser.add_argument( |
| 100 | + "-ncores", |
| 101 | + "--num_cores", |
| 102 | + type=int, |
| 103 | + default=None, |
| 104 | + help=( |
| 105 | + "The number of physical cores to run the analysis on, " |
| 106 | + "defaults to all physical cores available on the system" |
| 107 | + ), |
| 108 | + ) |
| 109 | + parser.add_argument( |
| 110 | + "-b", |
| 111 | + "--batch_size", |
| 112 | + help="The number of inputs that will run through the model at a time", |
| 113 | + type=int, |
| 114 | + default=1, |
| 115 | + ) |
| 116 | + parser.add_argument( |
| 117 | + "-ks", |
| 118 | + "--kernel_sparsity", |
| 119 | + help="Impose kernel sparsity for all convolutions. [0.0-1.0]", |
| 120 | + type=float, |
| 121 | + ) |
| 122 | + parser.add_argument( |
| 123 | + "-ksf", |
| 124 | + "--kernel_sparsity_file", |
| 125 | + help="Filepath to per-layer kernel sparsities JSON", |
| 126 | + type=str, |
| 127 | + ) |
| 128 | + parser.add_argument( |
| 129 | + "--optimization", |
| 130 | + help="To enable or disable optimizations (Tensor Columns)", |
| 131 | + type=bool, |
| 132 | + default=True, |
| 133 | + ) |
| 134 | + parser.add_argument( |
| 135 | + "-i", |
| 136 | + "--input_shapes", |
| 137 | + help="Override the shapes of the inputs, " |
| 138 | + 'i.e. -shapes "[1,2,3],[4,5,6],[7,8,9]" results in ' |
| 139 | + "input0=[1,2,3] input1=[4,5,6] input2=[7,8,9]", |
| 140 | + type=str, |
| 141 | + default="", |
| 142 | + ) |
| 143 | + parser.add_argument( |
| 144 | + "-q", |
| 145 | + "--quiet", |
| 146 | + help="Lower logging verbosity", |
| 147 | + action="store_true", |
| 148 | + default=False, |
| 149 | + ) |
| 150 | + parser.add_argument( |
| 151 | + "-x", |
| 152 | + "--export_path", |
| 153 | + help="Store results into a JSON file", |
| 154 | + type=str, |
| 155 | + default=None, |
| 156 | + ) |
| 157 | + |
| 158 | + return parser.parse_args() |
| 159 | + |
| 160 | + |
| 161 | +def layer_info_to_string(li, format_str): |
| 162 | + if li["name"] == "sub_pyramid": |
| 163 | + return format_str.format(li["name"], "[]", "[]", "[]", 0, 0, 0, 0, "") |
| 164 | + else: |
| 165 | + return format_str.format( |
| 166 | + li["name"], |
| 167 | + "{}".format(list(li["output_dims"].values())), |
| 168 | + "{}".format(list(li["kernel_dims"].values())), |
| 169 | + "{}".format(list(li["strides"].values())), |
| 170 | + li["activation_sparsity"], |
| 171 | + li["average_run_time_in_ms"], |
| 172 | + li["average_utilization"] * 100.0, |
| 173 | + li["average_teraflops_per_second"], |
| 174 | + li["canonical_name"], |
| 175 | + ) |
| 176 | + |
| 177 | + |
| 178 | +def construct_layer_table(result): |
| 179 | + table_str = ( |
| 180 | + "Name | OutDims | " |
| 181 | + "KerDims | Strides | ActSpars | " |
| 182 | + "Time(ms) | Util(%) | TFLOPS | Canonical Name\n" |
| 183 | + ) |
| 184 | + info_format_base = ( |
| 185 | + "{:26} | {:26} | {:12} | {: >#08.4f} | " |
| 186 | + "{: >#08.4f} | {: >#08.4f} | {: >#08.4f} | {:12}" |
| 187 | + ) |
| 188 | + for li in result["layer_info"]: |
| 189 | + table_str += layer_info_to_string( |
| 190 | + li, |
| 191 | + "{:28}| " + info_format_base + "\n", |
| 192 | + ) |
| 193 | + for sub_li in li["sub_layer_info"]: |
| 194 | + table_str += layer_info_to_string( |
| 195 | + sub_li, |
| 196 | + " {:26}| " + info_format_base + "\n", |
| 197 | + ) |
| 198 | + |
| 199 | + table_str += "Total Time(MS): {:05f}\n".format(result["average_total_time"]) |
| 200 | + table_str += "Items per second: {:05f}\n".format(result["items_per_second"]) |
| 201 | + table_str += "Batch Size: {}\n".format(result["batch_size"]) |
| 202 | + table_str += "Number of threads: {}\n".format(result["num_threads"]) |
| 203 | + |
| 204 | + return table_str |
| 205 | + |
| 206 | + |
| 207 | +def process_line_item(total_layer_time, detailed_layer_time, li, strip_name): |
| 208 | + if "average_run_time_in_ms" not in li: |
| 209 | + # nothing to process |
| 210 | + return |
| 211 | + |
| 212 | + layer_type = li["name"] |
| 213 | + if strip_name: |
| 214 | + # peel off unique number |
| 215 | + layer_type = layer_type.rsplit("_", 1)[0] |
| 216 | + # peel off ks percentage |
| 217 | + layer_type = layer_type.rsplit("-", 1)[0] |
| 218 | + |
| 219 | + avg_layer_time = li["average_run_time_in_ms"] |
| 220 | + |
| 221 | + if layer_type in total_layer_time: |
| 222 | + total_layer_time[layer_type] += avg_layer_time |
| 223 | + else: |
| 224 | + total_layer_time[layer_type] = avg_layer_time |
| 225 | + |
| 226 | + # Record detailed layer types as well |
| 227 | + if "kernel_dims" in li: |
| 228 | + kerdims = list(li["kernel_dims"].values()) |
| 229 | + if kerdims: |
| 230 | + detailed_layer_type = f"{layer_type}|kernel={kerdims}" |
| 231 | + if detailed_layer_type in detailed_layer_time: |
| 232 | + detailed_layer_time[detailed_layer_type] += avg_layer_time |
| 233 | + else: |
| 234 | + detailed_layer_time[detailed_layer_type] = avg_layer_time |
| 235 | + |
| 236 | + |
| 237 | +def construct_layer_statistics(result): |
| 238 | + # Percentage Statistics |
| 239 | + total_layer_time = {} |
| 240 | + detailed_layer_time = {} |
| 241 | + for li in result["layer_info"]: |
| 242 | + if len(li["sub_layer_info"]) == 0: |
| 243 | + process_line_item(total_layer_time, detailed_layer_time, li, True) |
| 244 | + else: |
| 245 | + for sli in li["sub_layer_info"]: |
| 246 | + process_line_item(total_layer_time, detailed_layer_time, sli, False) |
| 247 | + |
| 248 | + summed_total_time = 0.0 |
| 249 | + for k, v in total_layer_time.items(): |
| 250 | + summed_total_time += v |
| 251 | + |
| 252 | + perc_str = "== Layer Breakdown ==\n" |
| 253 | + perc_str += "Name | Summed Time | Percent Taken\n" |
| 254 | + for name, val in total_layer_time.items(): |
| 255 | + # Print summary for this type of layer |
| 256 | + perc_str += "{:30} | {:8.3f} | {:4.2f}%\n".format( |
| 257 | + name, val, (val / summed_total_time) * 100.0 |
| 258 | + ) |
| 259 | + |
| 260 | + # Do the same for any sub-types recorded (there can be none) |
| 261 | + sublayers = [ |
| 262 | + (key.split("|", 1)[1], value) |
| 263 | + for key, value in detailed_layer_time.items() |
| 264 | + if name == key.split("|", 1)[0] |
| 265 | + ] |
| 266 | + for subname, subval in sublayers: |
| 267 | + perc_str += " {:28} | {:8.3f} | {:4.2f}%\n".format( |
| 268 | + subname, subval, (subval / summed_total_time) * 100.0 |
| 269 | + ) |
| 270 | + |
| 271 | + batch_size = int(result["batch_size"]) |
| 272 | + perc_str += "== Summed Total Time: {:.4f} ms\n".format(summed_total_time) |
| 273 | + perc_str += "== Items per second: {:.4f}\n".format( |
| 274 | + (1000.0 / summed_total_time) * batch_size |
| 275 | + ) |
| 276 | + |
| 277 | + return perc_str |
| 278 | + |
| 279 | + |
| 280 | +def main(): |
| 281 | + args = parse_args() |
| 282 | + |
| 283 | + input_shapes = parse_input_shapes(args.input_shapes) |
| 284 | + |
| 285 | + if args.optimization: |
| 286 | + os.environ["WAND_ENABLE_SP_BENCH"] = "1" |
| 287 | + |
| 288 | + # Imposed KS can take either a float or a file, so overwrite with file if we have it |
| 289 | + imposed_kernel_sparsity = args.kernel_sparsity |
| 290 | + if args.kernel_sparsity_file: |
| 291 | + imposed_kernel_sparsity = args.kernel_sparsity_file |
| 292 | + |
| 293 | + orig_model_path = args.model_path |
| 294 | + model_path = model_to_path(args.model_path) |
| 295 | + |
| 296 | + print("Analyzing model: {}".format(orig_model_path)) |
| 297 | + |
| 298 | + if input_shapes: |
| 299 | + with override_onnx_input_shapes(model_path, input_shapes) as tmp_path: |
| 300 | + input_list = generate_random_inputs(tmp_path, args.batch_size) |
| 301 | + else: |
| 302 | + input_list = generate_random_inputs(model_path, args.batch_size) |
| 303 | + |
| 304 | + result = analyze_model( |
| 305 | + model_path, |
| 306 | + input_list, |
| 307 | + batch_size=args.batch_size, |
| 308 | + num_cores=args.num_cores, |
| 309 | + num_iterations=args.num_iterations, |
| 310 | + num_warmup_iterations=args.num_warmup_iterations, |
| 311 | + optimization_level=args.optimization, |
| 312 | + imposed_ks=imposed_kernel_sparsity, |
| 313 | + input_shapes=input_shapes, |
| 314 | + ) |
| 315 | + |
| 316 | + if args.quiet: |
| 317 | + print(construct_layer_table(result)) |
| 318 | + print(construct_layer_statistics(result)) |
| 319 | + |
| 320 | + if args.export_path: |
| 321 | + # Export results |
| 322 | + print("Saving analysis results to JSON file at {}".format(args.export_path)) |
| 323 | + with open(args.export_path, "w") as out: |
| 324 | + json.dump(result, out, indent=2) |
| 325 | + |
| 326 | + |
| 327 | +if __name__ == "__main__": |
| 328 | + main() |
0 commit comments