diff --git a/.ci/docker/requirements.txt b/.ci/docker/requirements.txt index f1f105122e..10dceb772d 100644 --- a/.ci/docker/requirements.txt +++ b/.ci/docker/requirements.txt @@ -15,7 +15,6 @@ pypandoc==1.15 pandocfilters==1.5.1 markdown==3.8.2 - # PyTorch Theme -e git+https://github.com/pytorch/pytorch_sphinx_theme.git@pytorch_sphinx_theme2#egg=pytorch_sphinx_theme2 @@ -51,12 +50,8 @@ onnxruntime evaluate accelerate>=0.20.1 - importlib-metadata==6.8.0 -# PyTorch Theme --e git+https://github.com/pytorch/pytorch_sphinx_theme.git#egg=pytorch_sphinx_theme - ipython sphinxcontrib.katex diff --git a/.gitignore b/.gitignore index b84ca258ca..3f1f927ee3 100644 --- a/.gitignore +++ b/.gitignore @@ -5,6 +5,8 @@ advanced pytorch_basics /recipes prototype +/unstable +sg_execution_times.rst #data things _data/ diff --git a/.jenkins/download_data.py b/.jenkins/download_data.py index cc07c72561..939e63fc7a 100644 --- a/.jenkins/download_data.py +++ b/.jenkins/download_data.py @@ -12,7 +12,7 @@ BEGINNER_DATA_DIR = REPO_BASE_DIR / "beginner_source" / "data" INTERMEDIATE_DATA_DIR = REPO_BASE_DIR / "intermediate_source" / "data" ADVANCED_DATA_DIR = REPO_BASE_DIR / "advanced_source" / "data" -PROTOTYPE_DATA_DIR = REPO_BASE_DIR / "prototype_source" / "data" +PROTOTYPE_DATA_DIR = REPO_BASE_DIR / "unstable_source" / "data" FILES_TO_RUN = os.getenv("FILES_TO_RUN") @@ -106,7 +106,7 @@ def download_lenet_mnist() -> None: ) def download_gpu_quantization_torchao() -> None: - # Download SAM model checkpoint for prototype_source/gpu_quantization_torchao_tutorial.py + # Download SAM model checkpoint unstable_source/gpu_quantization_torchao_tutorial.py download_url_to_file("https://dl.fbaipublicfiles.com/segment_anything/sam_vit_h_4b8939.pth", prefix=PROTOTYPE_DATA_DIR, dst="sam_vit_h_4b8939.pth", diff --git a/.jenkins/validate_tutorials_built.py b/.jenkins/validate_tutorials_built.py index d34cf62b49..d31a2ec1e4 100644 --- a/.jenkins/validate_tutorials_built.py +++ b/.jenkins/validate_tutorials_built.py @@ -18,28 +18,17 @@ "beginner_source/examples_nn/polynomial_module", "beginner_source/examples_nn/dynamic_net", "beginner_source/examples_nn/polynomial_optim", - "beginner_source/former_torchies/autograd_tutorial_old", - "beginner_source/former_torchies/tensor_tutorial_old", "beginner_source/examples_autograd/polynomial_autograd", "beginner_source/examples_autograd/polynomial_custom_function", "intermediate_source/mnist_train_nas", # used by ax_multiobjective_nas_tutorial.py "intermediate_source/torch_compile_conv_bn_fuser", "intermediate_source/_torch_export_nightly_tutorial", # does not work on release "advanced_source/usb_semisup_learn", # fails with CUDA OOM error, should try on a different worker - "prototype_source/fx_graph_mode_ptq_dynamic", - "prototype_source/vmap_recipe", - "prototype_source/torchscript_freezing", - "prototype_source/nestedtensor", - "prototype_source/gpu_direct_storage", # requires specific filesystem + GPUDirect Storage to be set up - "recipes_source/recipes/saving_and_loading_models_for_inference", - "recipes_source/recipes/saving_multiple_models_in_one_file", + "unstable_source/gpu_direct_storage", # requires specific filesystem + GPUDirect Storage to be set up "recipes_source/recipes/tensorboard_with_pytorch", "recipes_source/recipes/what_is_state_dict", "recipes_source/recipes/profiler_recipe", - "recipes_source/recipes/save_load_across_devices", "recipes_source/recipes/warmstarting_model_using_parameters_from_a_different_model", - "recipes_source/recipes/dynamic_quantization", - "recipes_source/recipes/saving_and_loading_a_general_checkpoint", "recipes_source/recipes/benchmark", "recipes_source/recipes/tuning_guide", "recipes_source/recipes/zeroing_out_gradients", @@ -47,7 +36,6 @@ "recipes_source/recipes/timer_quick_start", "recipes_source/recipes/amp_recipe", "recipes_source/recipes/Captum_Recipe", - "intermediate_source/text_to_speech_with_torchaudio", "intermediate_source/tensorboard_profiler_tutorial", # reenable after 2.0 release. "advanced_source/semi_structured_sparse", # reenable after 3303 is fixed. "intermediate_source/torchrec_intro_tutorial.py", #failing with 2.8 reenable after 3498 diff --git a/.lintrunner.toml b/.lintrunner.toml index af93353ef8..0e4bf4aa46 100644 --- a/.lintrunner.toml +++ b/.lintrunner.toml @@ -66,15 +66,15 @@ exclude_patterns = [ "intermediate_source/tiatoolbox_tutorial.rst", "intermediate_source/torch_compile_tutorial.py", "intermediate_source/transformer_building_blocks.py", - "prototype_source/README.md", - "prototype_source/README.txt", - "prototype_source/backend_config_tutorial.rst", - "prototype_source/gpu_direct_storage.py", - "prototype_source/inductor_cpp_wrapper_tutorial.rst", - "prototype_source/inductor_windows.rst", - "prototype_source/maskedtensor_advanced_semantics.py", - "prototype_source/max_autotune_on_CPU_tutorial.rst", - "prototype_source/vmap_recipe.py", + "unstable_source/README.md", + "unstable_source/README.txt", + "unstable_source/backend_config_tutorial.rst", + "unstable_source/gpu_direct_storage.py", + "unstable_source/inductor_cpp_wrapper_tutorial.rst", + "unstable_source/inductor_windows.rst", + "unstable_source/maskedtensor_advanced_semantics.py", + "unstable_source/max_autotune_on_CPU_tutorial.rst", + "unstable_source/vmap_recipe.py", "recipes_source/README.txt", "recipes_source/amx.rst", "recipes_source/compiling_optimizer.rst", @@ -150,7 +150,7 @@ exclude_patterns = [ "intermediate_source/README.txt", "intermediate_source/TP_tutorial.rst", "intermediate_source/inductor_debug_cpu.py", - "prototype_source/README.txt", + "unstable_source/README.txt", "recipes_source/README.txt", "recipes_source/recipes/README.txt", "recipes_source/xeon_run_cpu.rst", diff --git a/conf.py b/conf.py index 4ad30e0e8b..6b50bf4ce4 100644 --- a/conf.py +++ b/conf.py @@ -170,9 +170,9 @@ def wrapper(*args, **kwargs): "intermediate_source", "advanced_source", "recipes_source", - "prototype_source", + "unstable_source", ], - "gallery_dirs": ["beginner", "intermediate", "advanced", "recipes", "prototype"], + "gallery_dirs": ["beginner", "intermediate", "advanced", "recipes", "unstable"], "filename_pattern": re.compile(SPHINX_SHOULD_RUN), "promote_jupyter_magic": True, "backreferences_dir": None, @@ -202,7 +202,6 @@ def wrapper(*args, **kwargs): html_theme_options = { "navigation_with_keys": False, "analytics_id": "GTM-T8XT4PS", - "pytorch_project": "tutorials", "logo": { "text": "", }, @@ -233,6 +232,7 @@ def wrapper(*args, **kwargs): "navbar_start": ["pytorch_version"], "navbar_center": "navbar-nav", "display_version": True, + "pytorch_project": "tutorials", } theme_variables = pytorch_sphinx_theme2.get_theme_variables() @@ -247,7 +247,7 @@ def wrapper(*args, **kwargs): "github_version": "main", "doc_path": ".", "library_links": theme_variables.get("library_links", []), - "pytorch_project": "tutorials", + #"pytorch_project": "tutorials", } diff --git a/index.rst b/index.rst index d305716992..d9a1356861 100644 --- a/index.rst +++ b/index.rst @@ -831,10 +831,10 @@ Additional Resources :maxdepth: 1 :hidden: - recipes/recipes_index + recipes_index .. toctree:: :maxdepth: 1 :hidden: - prototype/prototype_index + unstable_index diff --git a/recipes_source/recipes_index.rst b/recipes_index.rst similarity index 59% rename from recipes_source/recipes_index.rst rename to recipes_index.rst index 1c528ed289..53239633b6 100644 --- a/recipes_source/recipes_index.rst +++ b/recipes_index.rst @@ -33,114 +33,114 @@ from our full-length tutorials. .. customcarditem:: :header: Defining a Neural Network :card_description: Learn how to use PyTorch's torch.nn package to create and define a neural network for the MNIST dataset. - :image: ../_static/img/thumbnails/cropped/defining-a-network.PNG - :link: ../recipes/recipes/defining_a_neural_network.html + :image: _static/img/thumbnails/cropped/defining-a-network.PNG + :link: recipesrecipes/defining_a_neural_network.html :tags: Basics .. customcarditem:: :header: What is a state_dict in PyTorch :card_description: Learn how state_dict objects and Python dictionaries are used in saving or loading models from PyTorch. - :image: ../_static/img/thumbnails/cropped/what-is-a-state-dict.PNG - :link: ../recipes/recipes/what_is_state_dict.html + :image: _static/img/thumbnails/cropped/what-is-a-state-dict.PNG + :link: recipes/recipes/what_is_state_dict.html :tags: Basics .. customcarditem:: :header: Warmstarting model using parameters from a different model in PyTorch :card_description: Learn how warmstarting the training process by partially loading a model or loading a partial model can help your model converge much faster than training from scratch. - :image: ../_static/img/thumbnails/cropped/warmstarting-models.PNG - :link: ../recipes/recipes/warmstarting_model_using_parameters_from_a_different_model.html + :image: _static/img/thumbnails/cropped/warmstarting-models.PNG + :link: recipes/recipes/warmstarting_model_using_parameters_from_a_different_model.html :tags: Basics .. customcarditem:: :header: Zeroing out gradients in PyTorch :card_description: Learn when you should zero out gradients and how doing so can help increase the accuracy of your model. - :image: ../_static/img/thumbnails/cropped/zeroing-out-gradients.PNG - :link: ../recipes/recipes/zeroing_out_gradients.html + :image: _static/img/thumbnails/cropped/zeroing-out-gradients.PNG + :link: recipes/recipes/zeroing_out_gradients.html :tags: Basics .. customcarditem:: :header: PyTorch Benchmark :card_description: Learn how to use PyTorch's benchmark module to measure and compare the performance of your code - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/recipes/benchmark.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/recipes/benchmark.html :tags: Basics .. customcarditem:: :header: PyTorch Benchmark (quick start) :card_description: Learn how to measure snippet run times and collect instructions. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/recipes/timer_quick_start.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/recipes/timer_quick_start.html :tags: Basics .. customcarditem:: :header: PyTorch Profiler :card_description: Learn how to use PyTorch's profiler to measure operators time and memory consumption - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/recipes/profiler_recipe.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/recipes/profiler_recipe.html :tags: Basics .. customcarditem:: :header: PyTorch Profiler with Instrumentation and Tracing Technology API (ITT API) support :card_description: Learn how to use PyTorch's profiler with Instrumentation and Tracing Technology API (ITT API) to visualize operators labeling in Intel® VTune™ Profiler GUI - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/profile_with_itt.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/profile_with_itt.html :tags: Basics .. customcarditem:: :header: Torch Compile IPEX Backend :card_description: Learn how to use torch.compile IPEX backend - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compile_backend_ipex.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compile_backend_ipex.html :tags: Basics .. customcarditem:: :header: Dynamic Compilation Control with ``torch.compiler.set_stance`` :card_description: Learn how to use torch.compiler.set_stance - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compiler_set_stance_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compiler_set_stance_tutorial.html :tags: Compiler .. customcarditem:: :header: Reasoning about Shapes in PyTorch :card_description: Learn how to use the meta device to reason about shapes in your model. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/recipes/reasoning_about_shapes.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/recipes/reasoning_about_shapes.html :tags: Basics .. customcarditem:: :header: Tips for Loading an nn.Module from a Checkpoint :card_description: Learn tips for loading an nn.Module from a checkpoint. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/recipes/module_load_state_dict_tips.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/recipes/module_load_state_dict_tips.html :tags: Basics .. customcarditem:: :header: (beta) Using TORCH_LOGS to observe torch.compile :card_description: Learn how to use the torch logging APIs to observe the compilation process. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_logs.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_logs.html :tags: Basics .. customcarditem:: :header: Extension points in nn.Module for loading state_dict and tensor subclasses :card_description: New extension points in nn.Module. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/recipes/swap_tensors.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/recipes/swap_tensors.html :tags: Basics .. customcarditem:: :header: torch.export AOTInductor Tutorial for Python runtime :card_description: Learn an end-to-end example of how to use AOTInductor for python runtime. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_export_aoti_python.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_export_aoti_python.html :tags: Basics .. customcarditem:: :header: Demonstration of torch.export flow, common challenges and the solutions to address them :card_description: Learn how to export models for popular usecases - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_export_challenges_solutions.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_export_challenges_solutions.html :tags: Compiler,TorchCompile .. Interpretability @@ -148,15 +148,15 @@ from our full-length tutorials. .. customcarditem:: :header: Model Interpretability using Captum :card_description: Learn how to use Captum attribute the predictions of an image classifier to their corresponding image features and visualize the attribution results. - :image: ../_static/img/thumbnails/cropped/model-interpretability-using-captum.png - :link: ../recipes/recipes/Captum_Recipe.html + :image: _static/img/thumbnails/cropped/model-interpretability-using-captum.png + :link: recipes/recipes/Captum_Recipe.html :tags: Interpretability,Captum .. customcarditem:: :header: How to use TensorBoard with PyTorch :card_description: Learn basic usage of TensorBoard with PyTorch, and how to visualize data in TensorBoard UI - :image: ../_static/img/thumbnails/tensorboard_scalars.png - :link: ../recipes/recipes/tensorboard_with_pytorch.html + :image: _static/img/thumbnails/tensorboard_scalars.png + :link: recipes/recipes/tensorboard_with_pytorch.html :tags: Visualization,TensorBoard .. Automatic Mixed Precision @@ -164,8 +164,8 @@ from our full-length tutorials. .. customcarditem:: :header: Automatic Mixed Precision :card_description: Use torch.cuda.amp to reduce runtime and save memory on NVIDIA GPUs. - :image: ../_static/img/thumbnails/cropped/amp.png - :link: ../recipes/recipes/amp_recipe.html + :image: _static/img/thumbnails/cropped/amp.png + :link: recipes/recipes/amp_recipe.html :tags: Model-Optimization .. Performance @@ -173,22 +173,22 @@ from our full-length tutorials. .. customcarditem:: :header: Performance Tuning Guide :card_description: Tips for achieving optimal performance. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/recipes/tuning_guide.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/recipes/tuning_guide.html :tags: Model-Optimization .. customcarditem:: :header: Optimizing CPU Performance on Intel® Xeon® with run_cpu Script :card_description: How to use run_cpu script for optimal runtime configurations on Intel® Xeon CPUs. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/xeon_run_cpu.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/xeon_run_cpu.html :tags: Model-Optimization .. customcarditem:: :header: PyTorch Inference Performance Tuning on AWS Graviton Processors :card_description: Tips for achieving the best inference performance on AWS Graviton CPUs - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/inference_tuning_on_aws_graviton.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/inference_tuning_on_aws_graviton.html :tags: Model-Optimization .. Leverage Advanced Matrix Extensions @@ -196,8 +196,8 @@ from our full-length tutorials. .. customcarditem:: :header: Leverage Intel® Advanced Matrix Extensions :card_description: Learn to leverage Intel® Advanced Matrix Extensions. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/amx.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/amx.html :tags: Model-Optimization .. (beta) Utilizing Torch Function modes with torch.compile @@ -205,8 +205,8 @@ from our full-length tutorials. .. customcarditem:: :header: (beta) Utilizing Torch Function modes with torch.compile :card_description: Override torch operators with Torch Function modes and torch.compile - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compile_torch_function_modes.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compile_torch_function_modes.html :tags: Model-Optimization .. (beta) Compiling the Optimizer with torch.compile @@ -214,8 +214,8 @@ from our full-length tutorials. .. customcarditem:: :header: (beta) Compiling the Optimizer with torch.compile :card_description: Speed up the optimizer using torch.compile - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/compiling_optimizer.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/compiling_optimizer.html :tags: Model-Optimization .. (beta) Running the compiled optimizer with an LR Scheduler @@ -223,16 +223,16 @@ from our full-length tutorials. .. customcarditem:: :header: (beta) Running the compiled optimizer with an LR Scheduler :card_description: Speed up training with LRScheduler and torch.compiled optimizer - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/compiling_optimizer_lr_scheduler.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/compiling_optimizer_lr_scheduler.html :tags: Model-Optimization .. (beta) Explicit horizontal fusion with foreach_map and torch.compile .. customcarditem:: :header: (beta) Explicit horizontal fusion with foreach_map and torch.compile :card_description: Horizontally fuse pointwise ops with torch.compile - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/foreach_map.py + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/foreach_map.py :tags: Model-Optimization .. Using User-Defined Triton Kernels with ``torch.compile`` @@ -240,8 +240,8 @@ from our full-length tutorials. .. customcarditem:: :header: Using User-Defined Triton Kernels with ``torch.compile`` :card_description: Learn how to use user-defined kernels with ``torch.compile`` - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compile_user_defined_triton_kernel_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compile_user_defined_triton_kernel_tutorial.html :tags: Model-Optimization .. Compile Time Caching in ``torch.compile`` @@ -249,8 +249,8 @@ from our full-length tutorials. .. customcarditem:: :header: Compile Time Caching in ``torch.compile`` :card_description: Learn how to use compile time caching in ``torch.compile`` - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compile_caching_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compile_caching_tutorial.html :tags: Model-Optimization .. Compile Time Caching Configurations @@ -258,8 +258,8 @@ from our full-length tutorials. .. customcarditem:: :header: Compile Time Caching Configurations :card_description: Learn how to configure compile time caching in ``torch.compile`` - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/torch_compile_caching_configuration_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/torch_compile_caching_configuration_tutorial.html :tags: Model-Optimization .. Reducing Cold Start Compilation Time with Regional Compilation @@ -267,8 +267,8 @@ from our full-length tutorials. .. customcarditem:: :header: Reducing torch.compile cold start compilation time with regional compilation :card_description: Learn how to use regional compilation to control cold start compile time - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/regional_compilation.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/regional_compilation.html :tags: Model-Optimization .. Intel(R) Extension for PyTorch* @@ -276,8 +276,8 @@ from our full-length tutorials. .. customcarditem:: :header: Intel® Extension for PyTorch* :card_description: Introduction of Intel® Extension for PyTorch* - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/intel_extension_for_pytorch.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/intel_extension_for_pytorch.html :tags: Model-Optimization .. Intel(R) Neural Compressor for PyTorch* @@ -285,8 +285,8 @@ from our full-length tutorials. .. customcarditem:: :header: Intel® Neural Compressor for PyTorch :card_description: Ease-of-use quantization for PyTorch with Intel® Neural Compressor. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/intel_neural_compressor_for_pytorch.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/intel_neural_compressor_for_pytorch.html :tags: Quantization,Model-Optimization .. Distributed Training @@ -294,43 +294,43 @@ from our full-length tutorials. .. customcarditem:: :header: Getting Started with DeviceMesh :card_description: Learn how to use DeviceMesh - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/distributed_device_mesh.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/distributed_device_mesh.html :tags: Distributed-Training .. customcarditem:: :header: Shard Optimizer States with ZeroRedundancyOptimizer :card_description: How to use ZeroRedundancyOptimizer to reduce memory consumption. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/zero_redundancy_optimizer.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/zero_redundancy_optimizer.html :tags: Distributed-Training .. customcarditem:: :header: Direct Device-to-Device Communication with TensorPipe RPC :card_description: How to use RPC with direct GPU-to-GPU communication. - :image: ../_static/img/thumbnails/cropped/profiler.png - :link: ../recipes/cuda_rpc.html + :image: _static/img/thumbnails/cropped/profiler.png + :link: recipes/cuda_rpc.html :tags: Distributed-Training .. customcarditem:: :header: Getting Started with Distributed Checkpoint (DCP) :card_description: Learn how to checkpoint distributed models with Distributed Checkpoint package. - :image: ../_static/img/thumbnails/cropped/Getting-Started-with-DCP.png - :link: ../recipes/distributed_checkpoint_recipe.html + :image: _static/img/thumbnails/cropped/Getting-Started-with-DCP.png + :link: recipes/distributed_checkpoint_recipe.html :tags: Distributed-Training .. customcarditem:: :header: Asynchronous Checkpointing (DCP) :card_description: Learn how to checkpoint distributed models with Distributed Checkpoint package. - :image: ../_static/img/thumbnails/cropped/Getting-Started-with-DCP.png - :link: ../recipes/distributed_async_checkpoint_recipe.html + :image: _static/img/thumbnails/cropped/Getting-Started-with-DCP.png + :link: recipes/distributed_async_checkpoint_recipe.html :tags: Distributed-Training .. customcarditem:: :header: Getting Started with CommDebugMode :card_description: Learn how to use CommDebugMode for DTensors - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../recipes/distributed_comm_debug_mode.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: recipes/distributed_comm_debug_mode.html :tags: Distributed-Training .. End of tutorial card section @@ -342,44 +342,44 @@ from our full-length tutorials. :maxdepth: 2 :hidden: - /recipes/recipes/defining_a_neural_network - /recipes/torch_logs - /recipes/recipes/what_is_state_dict - /recipes/recipes/warmstarting_model_using_parameters_from_a_different_model - /recipes/recipes/zeroing_out_gradients - /recipes/recipes/profiler_recipe - /recipes/recipes/profile_with_itt - /recipes/recipes/Captum_Recipe - /recipes/recipes/tensorboard_with_pytorch - /recipes/recipes/dynamic_quantization - /recipes/recipes/amp_recipe - /recipes/recipes/tuning_guide - /recipes/recipes/xeon_run_cpu - /recipes/recipes/intel_extension_for_pytorch - /recipes/compiling_optimizer - /recipes/recipes/timer_quick_start - /recipes/torch_compile_backend_ipex - /recipes/zero_redundancy_optimizer - /recipes/cuda_rpc - /recipes/distributed_comm_debug_mode - /recipes/torch_export_challenges_solutions - /recipes/recipes/benchmark - /recipes/recipes/module_load_state_dict_tips - /recipes/recipes/reasoning_about_shapes - /recipes/recipes/swap_tensors - /recipes/torch_export_aoti_python - /recipes/recipes/tensorboard_with_pytorch - /recipes/inference_tuning_on_aws_graviton - /recipes/amx - /recipes/torch_compile_torch_function_modes - /recipes/compiling_optimizer_lr_scheduler - /recipes/foreach_map - /recipes/torch_compile_user_defined_triton_kernel_tutorial - /recipes/torch_compile_caching_tutorial - /recipes/torch_compile_caching_configuration_tutorial - /recipes/regional_compilation - /recipes/intel_extension_for_pytorch.html - /recipes/intel_neural_compressor_for_pytorch - /recipes/distributed_device_mesh - /recipes/distributed_checkpoint_recipe - /recipes/distributed_async_checkpoint_recipe + recipes/recipes/defining_a_neural_network + recipes/torch_logs + recipes/recipes/what_is_state_dict + recipes/recipes/warmstarting_model_using_parameters_from_a_different_model + recipes/recipes/zeroing_out_gradients + recipes/recipes/profiler_recipe + recipes/recipes/profile_with_itt + recipes/recipes/Captum_Recipe + recipes/recipes/tensorboard_with_pytorch + recipes/recipes/dynamic_quantization + recipes/recipes/amp_recipe + recipes/recipes/tuning_guide + recipes/recipes/xeon_run_cpu + recipes/recipes/intel_extension_for_pytorch + recipes/compiling_optimizer + recipes/recipes/timer_quick_start + recipes/torch_compile_backend_ipex + recipes/zero_redundancy_optimizer + recipes/cuda_rpc + recipes/distributed_comm_debug_mode + recipes/torch_export_challenges_solutions + recipes/recipes/benchmark + recipes/recipes/module_load_state_dict_tips + recipes/recipes/reasoning_about_shapes + recipes/recipes/swap_tensors + recipes/torch_export_aoti_python + recipes/recipes/tensorboard_with_pytorch + recipes/inference_tuning_on_aws_graviton + recipes/amx + recipes/torch_compile_torch_function_modes + recipes/compiling_optimizer_lr_scheduler + recipes/foreach_map + recipes/torch_compile_user_defined_triton_kernel_tutorial + recipes/torch_compile_caching_tutorial + recipes/torch_compile_caching_configuration_tutorial + recipes/regional_compilation + recipes/intel_extension_for_pytorch.html + recipes/intel_neural_compressor_for_pytorch + recipes/distributed_device_mesh + recipes/distributed_checkpoint_recipe + recipes/distributed_async_checkpoint_recipe diff --git a/prototype_source/prototype_index.rst b/unstable_index.rst similarity index 65% rename from prototype_source/prototype_index.rst rename to unstable_index.rst index fbe3744c52..1c10999632 100644 --- a/prototype_source/prototype_index.rst +++ b/unstable_index.rst @@ -41,8 +41,8 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: (prototype) Accelerating BERT with semi-structured (2:4) sparsity :card_description: Prune BERT to be 2:4 sparse and accelerate for inference. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: prototype/semi_structured_sparse.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/semi_structured_sparse.html :tags: Model-Optimiziation .. Modules @@ -50,8 +50,8 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: Skipping Module Parameter Initialization in PyTorch 1.10 :card_description: Describes skipping parameter initialization during module construction in PyTorch 1.10, avoiding wasted computation. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/skip_param_init.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/skip_param_init.html :tags: Modules .. vmap @@ -59,8 +59,8 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: Using torch.vmap :card_description: Learn about torch.vmap, an autovectorizer for PyTorch operations. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/vmap_recipe.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/vmap_recipe.html :tags: vmap .. NestedTensor @@ -68,8 +68,8 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: Nested Tensor :card_description: Learn about nested tensors, the new way to batch heterogeneous-length data - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/nestedtensor.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/nestedtensor.html :tags: NestedTensor .. MaskedTensor @@ -77,29 +77,29 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: MaskedTensor Overview :card_description: Learn about masked tensors, the source of truth for specified and unspecified values - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/maskedtensor_overview.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/maskedtensor_overview.html :tags: MaskedTensor .. customcarditem:: :header: Masked Tensor Sparsity :card_description: Learn about how to leverage sparse layouts (e.g. COO and CSR) in MaskedTensor - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/maskedtensor_sparsity.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/maskedtensor_sparsity.html :tags: MaskedTensor .. customcarditem:: :header: Masked Tensor Advanced Semantics :card_description: Learn more about Masked Tensor's advanced semantics (reductions and comparing vs. NumPy's MaskedArray) - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/maskedtensor_advanced_semantics.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/maskedtensor_advanced_semantics.html :tags: MaskedTensor .. customcarditem:: :header: MaskedTensor: Simplifying Adagrad Sparse Semantics :card_description: See a showcase on how masked tensors can enable sparse semantics and provide for a cleaner dev experience - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/maskedtensor_adagrad.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/maskedtensor_adagrad.html :tags: MaskedTensor .. Model-Optimization @@ -107,53 +107,53 @@ decide if we want to upgrade the level of commitment or to fail fast. .. customcarditem:: :header: Inductor Cpp Wrapper Tutorial :card_description: Speed up your models with Inductor Cpp Wrapper - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/inductor_cpp_wrapper_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/inductor_cpp_wrapper_tutorial.html :tags: Model-Optimization .. customcarditem:: :header: Inductor Windows CPU Tutorial :card_description: Speed up your models with Inductor On Windows CPU - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/inductor_windows.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/inductor_windows.html :tags: Model-Optimization .. customcarditem:: :header: Use max-autotune compilation on CPU to gain additional performance boost :card_description: Tutorial for max-autotune mode on CPU to gain additional performance boost - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/max_autotune_on_CPU_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/max_autotune_on_CPU_tutorial.html :tags: Model-Optimization .. Distributed .. customcarditem:: :header: Flight Recorder Tutorial :card_description: Debug stuck jobs easily with Flight Recorder - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/flight_recorder_tutorial.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/flight_recorder_tutorial.html :tags: Distributed, Debugging, FlightRecorder .. customcarditem:: :header: Context Parallel Tutorial :card_description: Parallelize the attention computation along sequence dimension - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/context_parallel.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/context_parallel.html :tags: Distributed, Context Parallel .. Integration .. customcarditem:: :header: Out-of-tree extension autoloading in Python :card_description: Learn how to improve the seamless integration of out-of-tree extension with PyTorch based on the autoloading mechanism. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/python_extension_autoload.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/python_extension_autoload.html :tags: Extending-PyTorch, Frontend-APIs .. GPUDirect Storage .. customcarditem:: :header: (prototype) Using GPUDirect Storage :card_description: Learn how to use GPUDirect Storage in PyTorch. - :image: ../_static/img/thumbnails/cropped/generic-pytorch-logo.png - :link: ../prototype/gpu_direct_storage.html + :image: _static/img/thumbnails/cropped/generic-pytorch-logo.png + :link: unstable/gpu_direct_storage.html :tags: GPUDirect-Storage .. End of tutorial card section @@ -166,18 +166,18 @@ decide if we want to upgrade the level of commitment or to fail fast. :maxdepth: 2 :hidden: - /prototype/context_parallel - /prototype/flight_recorder_tutorial - /prototype/inductor_cpp_wrapper_tutorial - /prototype/inductor_windows - /prototype/vmap_recipe - /prototype/vulkan_workflow - /prototype/nestedtensor - /prototype/maskedtensor_overview - /prototype/maskedtensor_sparsity - /prototype/maskedtensor_advanced_semantics - /prototype/maskedtensor_adagrad - /prototype/python_extension_autoload - /prototype/gpu_direct_storage.html - /prototype/max_autotune_on_CPU_tutorial - /prototype/skip_param_init.html + unstable/context_parallel + unstable/flight_recorder_tutorial + unstable/inductor_cpp_wrapper_tutorial + unstable/inductor_windows + unstable/vmap_recipe + unstable/vulkan_workflow + unstable/nestedtensor + unstable/maskedtensor_overview + unstable/maskedtensor_sparsity + unstable/maskedtensor_advanced_semantics + unstable/maskedtensor_adagrad + unstable/python_extension_autoload + unstable/gpu_direct_storage.html + unstable/max_autotune_on_CPU_tutorial + unstable/skip_param_init.html diff --git a/prototype_source/README.md b/unstable_source/README.md similarity index 100% rename from prototype_source/README.md rename to unstable_source/README.md diff --git a/prototype_source/README.txt b/unstable_source/README.txt similarity index 100% rename from prototype_source/README.txt rename to unstable_source/README.txt diff --git a/prototype_source/backend_config_tutorial.rst b/unstable_source/backend_config_tutorial.rst similarity index 100% rename from prototype_source/backend_config_tutorial.rst rename to unstable_source/backend_config_tutorial.rst diff --git a/prototype_source/context_parallel.rst b/unstable_source/context_parallel.rst similarity index 100% rename from prototype_source/context_parallel.rst rename to unstable_source/context_parallel.rst diff --git a/prototype_source/distributed_rpc_profiling.rst b/unstable_source/distributed_rpc_profiling.rst similarity index 100% rename from prototype_source/distributed_rpc_profiling.rst rename to unstable_source/distributed_rpc_profiling.rst diff --git a/prototype_source/flight_recorder_tutorial.rst b/unstable_source/flight_recorder_tutorial.rst similarity index 99% rename from prototype_source/flight_recorder_tutorial.rst rename to unstable_source/flight_recorder_tutorial.rst index 2e643b133c..e029659890 100644 --- a/prototype_source/flight_recorder_tutorial.rst +++ b/unstable_source/flight_recorder_tutorial.rst @@ -1,4 +1,4 @@ -(prototype) Flight Recorder for Debugging Stuck Jobs +Flight Recorder for Debugging Stuck Jobs ==================================================== **Author**: `Chirag Pandya `_, `Junjie Wang `_ diff --git a/prototype_source/gpu_direct_storage.py b/unstable_source/gpu_direct_storage.py similarity index 100% rename from prototype_source/gpu_direct_storage.py rename to unstable_source/gpu_direct_storage.py diff --git a/prototype_source/gpu_quantization_torchao_tutorial.py b/unstable_source/gpu_quantization_torchao_tutorial.py similarity index 100% rename from prototype_source/gpu_quantization_torchao_tutorial.py rename to unstable_source/gpu_quantization_torchao_tutorial.py diff --git a/prototype_source/inductor_cpp_wrapper_tutorial.rst b/unstable_source/inductor_cpp_wrapper_tutorial.rst similarity index 100% rename from prototype_source/inductor_cpp_wrapper_tutorial.rst rename to unstable_source/inductor_cpp_wrapper_tutorial.rst diff --git a/prototype_source/inductor_windows.rst b/unstable_source/inductor_windows.rst similarity index 100% rename from prototype_source/inductor_windows.rst rename to unstable_source/inductor_windows.rst diff --git a/prototype_source/inductor_windows_cpu.rst b/unstable_source/inductor_windows_cpu.rst similarity index 100% rename from prototype_source/inductor_windows_cpu.rst rename to unstable_source/inductor_windows_cpu.rst diff --git a/prototype_source/ios_coreml_workflow.rst b/unstable_source/ios_coreml_workflow.rst similarity index 100% rename from prototype_source/ios_coreml_workflow.rst rename to unstable_source/ios_coreml_workflow.rst diff --git a/prototype_source/ios_gpu_workflow.rst b/unstable_source/ios_gpu_workflow.rst similarity index 100% rename from prototype_source/ios_gpu_workflow.rst rename to unstable_source/ios_gpu_workflow.rst diff --git a/prototype_source/lite_interpreter.rst b/unstable_source/lite_interpreter.rst similarity index 100% rename from prototype_source/lite_interpreter.rst rename to unstable_source/lite_interpreter.rst diff --git a/prototype_source/maskedtensor_adagrad.py b/unstable_source/maskedtensor_adagrad.py similarity index 99% rename from prototype_source/maskedtensor_adagrad.py rename to unstable_source/maskedtensor_adagrad.py index 445da1e0e2..d4eca31c5c 100644 --- a/prototype_source/maskedtensor_adagrad.py +++ b/unstable_source/maskedtensor_adagrad.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -(Prototype) Efficiently writing "sparse" semantics for Adagrad with MaskedTensor +Efficiently writing "sparse" semantics for Adagrad with MaskedTensor ================================================================================ """ diff --git a/prototype_source/maskedtensor_advanced_semantics.py b/unstable_source/maskedtensor_advanced_semantics.py similarity index 99% rename from prototype_source/maskedtensor_advanced_semantics.py rename to unstable_source/maskedtensor_advanced_semantics.py index 7a02330421..3517691611 100644 --- a/prototype_source/maskedtensor_advanced_semantics.py +++ b/unstable_source/maskedtensor_advanced_semantics.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -(Prototype) MaskedTensor Advanced Semantics +MaskedTensor Advanced Semantics =========================================== """ diff --git a/prototype_source/maskedtensor_overview.py b/unstable_source/maskedtensor_overview.py similarity index 99% rename from prototype_source/maskedtensor_overview.py rename to unstable_source/maskedtensor_overview.py index 2882869367..955268e0d7 100644 --- a/prototype_source/maskedtensor_overview.py +++ b/unstable_source/maskedtensor_overview.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -(Prototype) MaskedTensor Overview +MaskedTensor Overview ********************************* """ diff --git a/prototype_source/maskedtensor_sparsity.py b/unstable_source/maskedtensor_sparsity.py similarity index 99% rename from prototype_source/maskedtensor_sparsity.py rename to unstable_source/maskedtensor_sparsity.py index 1985135714..a1353805f1 100644 --- a/prototype_source/maskedtensor_sparsity.py +++ b/unstable_source/maskedtensor_sparsity.py @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- """ -(Prototype) MaskedTensor Sparsity +MaskedTensor Sparsity ================================= """ diff --git a/prototype_source/max_autotune_on_CPU_tutorial.rst b/unstable_source/max_autotune_on_CPU_tutorial.rst similarity index 100% rename from prototype_source/max_autotune_on_CPU_tutorial.rst rename to unstable_source/max_autotune_on_CPU_tutorial.rst diff --git a/prototype_source/nestedtensor.py b/unstable_source/nestedtensor.py similarity index 100% rename from prototype_source/nestedtensor.py rename to unstable_source/nestedtensor.py diff --git a/prototype_source/nnapi_mobilenetv2.rst b/unstable_source/nnapi_mobilenetv2.rst similarity index 100% rename from prototype_source/nnapi_mobilenetv2.rst rename to unstable_source/nnapi_mobilenetv2.rst diff --git a/prototype_source/openvino_quantizer.rst b/unstable_source/openvino_quantizer.rst similarity index 100% rename from prototype_source/openvino_quantizer.rst rename to unstable_source/openvino_quantizer.rst diff --git a/prototype_source/python_extension_autoload.rst b/unstable_source/python_extension_autoload.rst similarity index 100% rename from prototype_source/python_extension_autoload.rst rename to unstable_source/python_extension_autoload.rst diff --git a/prototype_source/semi_structured_sparse.rst b/unstable_source/semi_structured_sparse.rst similarity index 100% rename from prototype_source/semi_structured_sparse.rst rename to unstable_source/semi_structured_sparse.rst diff --git a/prototype_source/skip_param_init.rst b/unstable_source/skip_param_init.rst similarity index 100% rename from prototype_source/skip_param_init.rst rename to unstable_source/skip_param_init.rst diff --git a/prototype_source/tracing_based_selective_build.rst b/unstable_source/tracing_based_selective_build.rst similarity index 100% rename from prototype_source/tracing_based_selective_build.rst rename to unstable_source/tracing_based_selective_build.rst diff --git a/prototype_source/vmap_recipe.py b/unstable_source/vmap_recipe.py similarity index 100% rename from prototype_source/vmap_recipe.py rename to unstable_source/vmap_recipe.py diff --git a/prototype_source/vulkan_workflow.rst b/unstable_source/vulkan_workflow.rst similarity index 97% rename from prototype_source/vulkan_workflow.rst rename to unstable_source/vulkan_workflow.rst index c7fbe34f5c..4fc8fdc7c9 100644 --- a/prototype_source/vulkan_workflow.rst +++ b/unstable_source/vulkan_workflow.rst @@ -199,8 +199,6 @@ For Android API to run model on Vulkan backend we have to specify this during mo In this case, all inputs will be transparently copied from CPU to the Vulkan device, and model will be run on Vulkan device, the output will be copied transparently to CPU. -The example of using Vulkan backend can be found in test application within the PyTorch repository: -https://github.com/pytorch/pytorch/blob/master/android/test_app/app/src/main/java/org/pytorch/testapp/MainActivity.java#L133 Building android test app with Vulkan -------------------------------------