pygfx · Vipitis · Jul 23, 2025 · Jul 23, 2025 · Jul 24, 2025 · Jul 24, 2025
@@ -45,6 +45,7 @@ It also works out of the box, because the wgpu-native DLL is shipped with wgpu-p
 The wgpu_native backend provides a few extra functionalities:
 
 .. py:function:: wgpu.backends.wgpu_native.request_device_sync(adapter, trace_path, *, label="", required_features, required_limits, default_queue)
+
     An alternative to :func:`wgpu.GPUAdapter.request_adapter`, that streams a trace
     of all low level calls to disk, so the visualization can be replayed (also on other systems),
     investigated, and debugged.
@@ -166,9 +167,9 @@ they reduce driver overhead on the CPU.
 
 The first two require that you enable the feature ``"multi-draw-indirect"``.
 
-.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count):
+.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect(render_pass_encoder, buffer, *, offset=0, count)
 
-     Equivalent to::
+    Equivalent to::
         for i in range(count):
             render_pass_encoder.draw_indirect(buffer, offset + i * 16)
 
@@ -179,9 +180,9 @@ The first two require that you enable the feature ``"multi-draw-indirect"``.
                    Must be a multiple of 4.
     :param count: The number of draw operations to perform.
 
-.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count):
+.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect(render_pass_encoder, buffer, *, offset=0, count)
 
-     Equivalent to::
+    Equivalent to::
 
         for i in range(count):
             render_pass_encoder.draw_indexed_indirect(buffer, offset + i * 2-)
@@ -199,9 +200,9 @@ They are identical to the previous two, except that the ``count`` argument is re
 three arguments. The value at ``count_buffer_offset`` in ``count_buffer`` is treated as
 an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max_count``.
 
-.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count):
+.. py:function:: wgpu.backends.wgpu_native.multi_draw_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count)
 
-     Equivalent to::
+    Equivalent to::
 
         count = min(<u32 at count_buffer_offset in count_buffer>, max_count)
         for i in range(count):
@@ -217,9 +218,9 @@ an unsigned 32-bit integer. The ``count`` is the minimum of this value and ``max
                    Must be a multiple of 4.
     :param max_count: The maximum number of draw operations to perform.
 
-.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count):
+.. py:function:: wgpu.backends.wgpu_native.multi_draw_indexed_indirect_count(render_pass_encoder, buffer, *, offset=0, count_buffer, count_offset=0, max_count)
 
-     Equivalent to::
+    Equivalent to::
 
         count = min(<u32 at count_buffer_offset in count_buffer>, max_count)
         for i in range(count):
@@ -246,13 +247,13 @@ both enabled.
 
 When ``write_timestamp`` is called with a render pass or compute pass as its first
 argument, a timestamp is written to the indicated query set at the indicated index at
-that point in thie queue. This usage requires
+that point in this queue. This usage requires
 that the features ``"timestamp-query"`` and ``"timestamp-query-inside-passes"`` are
 both enabled.
 
-.. py:function:: wgpu.backends.wgpu_native.write_timestamp(encoder, query_set, query_index):
+.. py:function:: wgpu.backends.wgpu_native.write_timestamp(encoder, query_set, query_index)
 
-     Writes a timestamp to the timestamp query set and the indicated index.
+    Writes a timestamp to the timestamp query set and the indicated index.
 
     :param encoder: The ComputePassEncoder, RenderPassEncoder, or CommandEncoder.
     :param query_set: The timestamp query set into which to save the result.
@@ -293,7 +294,7 @@ the number of statistics chosen.
 The statistics are always output to the query set in the order above, even if they are
 given in a different order in the list.
 
-.. py:function:: wgpu.backends.wgpu_native.create_statistics_query_set(device, count, statistics):
+.. py:function:: wgpu.backends.wgpu_native.create_statistics_query_set(device, count, statistics)
 
     Create a query set that could hold count entries for the specified statistics.
     The statistics are specified as a list of strings.
@@ -302,20 +303,50 @@ given in a different order in the list.
     :param count: Number of entries that go into the query set.
     :param statistics: A sequence of strings giving the desired statistics.
 
-.. py:function:: wgpu.backends.wgpu_native.begin_pipeline_statistics_query(encoder, query_set, index):
+.. py:function:: wgpu.backends.wgpu_native.begin_pipeline_statistics_query(encoder, query_set, index)
 
     Start collecting statistics.
 
     :param encoder: The ComputePassEncoder or RenderPassEncoder.
     :param query_set: The query set into which to save the result.
     :param index: The index of the query set into which to write the result.
 
-.. py:function:: wgpu.backends.wgpu_native.begin_pipeline_statistics_query(encoder, query_set, index):
+.. py:function:: wgpu.backends.wgpu_native.end_pipeline_statistics_query(encoder, query_set, index)
 
     Stop collecting statistics and write them into the query set.
 
     :param encoder: The ComputePassEncoder or RenderPassEncoder.
 
+.. py:function:: wgpu.backends.wgpu_native.set_instance_extras(backends, flags, dx12_compiler, gles3_minor_version, fence_behavior, dxil_path, dxc_path, dxc_max_shader_model)
+
+    Sets the global instance with extras. Needs to be called before instance is created (in enumerate_adapters or request_adapter).
+
+    :param backends: bitflags as list[str], which backends to enable on the instance level. Defaults to ``["All"]``. Can be any combination of ``["Vulkan", "GL", "Metal", "DX12", "BrowserWebGPU"]`` or the premade combinations ``["All", "Primary", "secondary"]``. Note that your device needs to support these backends, for detailed information see https://docs.rs/wgpu/latest/wgpu/struct.Backends.html
+    :param flags: bitflags as list[str], debug flags for the compiler. Defaults to ``["Default"]``, can be any combination of ``["Debug", "Validation", "DiscardHalLabels"]``.
+    :param dx12_compiler: enum/str, either "Fxc", "Dxc" or "Undefined". Defaults to "Fxc" same as "Undefined". Dxc requires additional library files.
+    :param gles3_minor_version: enum/int 0, 1 or 2. Defaults to "Atomic" (handled by driver).
+    :param fence_behavior: enum/int, "Normal" or "AutoFinish", Default to "Normal".
+    :param dxil_path: str, path to dxil.dll, defaults to ``None``. None looks in the resource directory.
+    :param dxc_path: str, path to dxcompiler.dll, defaults to ``None``. None looks in the resource directory.
+    :param dxc_max_shader_model: float between 6.0 and 6.7, Maximum shader model the given dll supports. Defaults to 6.5.
+
+Use like the following before the instance is created, which happens during request_adapter or enumerate_adapters.
+
+.. code-block:: py
+
+    import wgpu
+    from wgpu.backends.wgpu_native.extras import set_instance_extras
+    set_instance_extras(
+        backends=["Vulkan"],
+        flags=["Debug"],
+    )
+
+    # ...
+
+    for a in wgpu.gpu.enumerate_adapters_sync():
+        print(a.summary)
+
+For additional usage examples look at `extras_dxc.py` and `extras_debug.py` in the examples directory.
 
 The js_webgpu backend
 ---------------------

@@ -30,12 +30,16 @@ def setup_drawing_sync(canvas, power_preference="high-performance", limits=None)
     """
 
     adapter = wgpu.gpu.request_adapter_sync(power_preference=power_preference)
-    device = adapter.request_device_sync(required_limits=limits)
+    device = adapter.request_device_sync(
+        required_limits=limits, label="Cube Example device"
+    )
 
     pipeline_layout, uniform_buffer, bind_groups = create_pipeline_layout(device)
     pipeline_kwargs = get_render_pipeline_kwargs(canvas, device, pipeline_layout)
 
-    render_pipeline = device.create_render_pipeline(**pipeline_kwargs)
+    render_pipeline = device.create_render_pipeline(
+        **pipeline_kwargs, label="Cube Example render pipeline"
+    )
 
     return get_draw_function(
         canvas, device, render_pipeline, uniform_buffer, bind_groups, asynchronous=False
@@ -50,12 +54,16 @@ async def setup_drawing_async(canvas, limits=None):
     """
 
     adapter = await wgpu.gpu.request_adapter_async(power_preference="high-performance")
-    device = await adapter.request_device_async(required_limits=limits)
+    device = await adapter.request_device_async(
+        required_limits=limits, label="Cube Example async device"
+    )
 
     pipeline_layout, uniform_buffer, bind_groups = create_pipeline_layout(device)
     pipeline_kwargs = get_render_pipeline_kwargs(canvas, device, pipeline_layout)
 
-    render_pipeline = await device.create_render_pipeline_async(**pipeline_kwargs)
+    render_pipeline = await device.create_render_pipeline_async(
+        **pipeline_kwargs, label="Cube Example async render pipeline"
+    )
 
     return get_draw_function(
         canvas, device, render_pipeline, uniform_buffer, bind_groups, asynchronous=True
@@ -65,12 +73,18 @@ async def setup_drawing_async(canvas, limits=None):
 # %% Functions to create wgpu objects
 
 
-def get_render_pipeline_kwargs(canvas, device, pipeline_layout):
-    context = canvas.get_context("wgpu")
+def get_render_pipeline_kwargs(
+    canvas: wgpu.WgpuCanvasInterface,
+    device: wgpu.GPUDevice,
+    pipeline_layout: wgpu.GPUPipelineLayout,
+) -> dict:
+    context: wgpu.GPUCanvasContext = canvas.get_context("wgpu")
     render_texture_format = context.get_preferred_format(device.adapter)
     context.configure(device=device, format=render_texture_format)
 
-    shader = device.create_shader_module(code=shader_source)
+    shader = device.create_shader_module(
+        code=shader_source, label="Cube Example shader module"
+    )
 
     return dict(
         layout=pipeline_layout,
@@ -119,17 +133,19 @@ def get_render_pipeline_kwargs(canvas, device, pipeline_layout):
     )
 
 
-def create_pipeline_layout(device):
+def create_pipeline_layout(device: wgpu.GPUDevice):
     # Create uniform buffer - data is uploaded each frame
     uniform_buffer = device.create_buffer(
         size=uniform_data.nbytes,
         usage=wgpu.BufferUsage.UNIFORM | wgpu.BufferUsage.COPY_DST,
+        label="Cube Example uniform buffer",
     )
 
     # Create another buffer to copy data to it (by mapping it and then copying the data)
     uniform_buffer.copy_buffer = device.create_buffer(
         size=uniform_data.nbytes,
         usage=wgpu.BufferUsage.MAP_WRITE | wgpu.BufferUsage.COPY_SRC,
+        label="Cube Example uniform buffer copy buffer",
     )
 
     # Create texture, and upload data
@@ -140,8 +156,9 @@ def create_pipeline_layout(device):
         format=wgpu.TextureFormat.r8unorm,
         mip_level_count=1,
         sample_count=1,
+        label="Cube Example texture",
     )
-    texture_view = texture.create_view()
+    texture_view = texture.create_view(label="Cube Example texture view")
 
     device.queue.write_texture(
         {
@@ -158,7 +175,7 @@ def create_pipeline_layout(device):
     )
 
     # Create a sampler
-    sampler = device.create_sampler()
+    sampler = device.create_sampler(label="Cube Example sampler")
 
     # We always have two bind groups, so we can play distributing our
     # resources over these two groups in different configurations.
@@ -206,30 +223,45 @@ def create_pipeline_layout(device):
     bind_groups = []
 
     for entries, layout_entries in zip(bind_groups_entries, bind_groups_layout_entries):
-        bind_group_layout = device.create_bind_group_layout(entries=layout_entries)
+        bind_group_layout = device.create_bind_group_layout(
+            entries=layout_entries,
+            label="Cube Example bind group layout",
+        )
         bind_group_layouts.append(bind_group_layout)
         bind_groups.append(
-            device.create_bind_group(layout=bind_group_layout, entries=entries)
+            device.create_bind_group(
+                layout=bind_group_layout,
+                entries=entries,
+                label=f"Cube Example bind group with {len(entries)} entries",
+            )
         )
 
     pipeline_layout = device.create_pipeline_layout(
-        bind_group_layouts=bind_group_layouts
+        bind_group_layouts=bind_group_layouts, label="Cube Example pipeline layout"
     )
 
     return pipeline_layout, uniform_buffer, bind_groups
 
 
 def get_draw_function(
-    canvas, device, render_pipeline, uniform_buffer, bind_groups, *, asynchronous
+    canvas: wgpu.WgpuCanvasInterface,
+    device: wgpu.GPUDevice,
+    render_pipeline: wgpu.GPURenderPipeline,
+    uniform_buffer: wgpu.GPUBuffer,
+    bind_groups: list[wgpu.GPUBindGroup],
+    *,
+    asynchronous: bool,
 ):
     # Create vertex buffer, and upload data
     vertex_buffer = device.create_buffer_with_data(
-        data=vertex_data, usage=wgpu.BufferUsage.VERTEX
+        data=vertex_data,
+        usage=wgpu.BufferUsage.VERTEX,
+        label="Cube Example vertex buffer",
     )
 
     # Create index buffer, and upload data
     index_buffer = device.create_buffer_with_data(
-        data=index_data, usage=wgpu.BufferUsage.INDEX
+        data=index_data, usage=wgpu.BufferUsage.INDEX, label="Cube Example index buffer"
     )
 
     def update_transform():
@@ -273,28 +305,48 @@ def upload_uniform_buffer_sync():
             tmp_buffer = device.create_buffer_with_data(
                 data=uniform_data, usage=wgpu.BufferUsage.COPY_SRC
             )
-        command_encoder = device.create_command_encoder()
+        command_encoder = device.create_command_encoder(
+            label="Cube Example uniform buffer upload command encoder"
+        )
         command_encoder.copy_buffer_to_buffer(
             tmp_buffer, 0, uniform_buffer, 0, uniform_data.nbytes
         )
-        device.queue.submit([command_encoder.finish()])
+        device.queue.submit(
+            [
+                command_encoder.finish(
+                    label="Cube Example uniform buffer upload command buffer"
+                )
+            ]
+        )
 
     async def upload_uniform_buffer_async():
         tmp_buffer = uniform_buffer.copy_buffer
         await tmp_buffer.map_async(wgpu.MapMode.WRITE)
         tmp_buffer.write_mapped(uniform_data)
         tmp_buffer.unmap()
-        command_encoder = device.create_command_encoder()
+        command_encoder = device.create_command_encoder(
+            label="Cube Example uniform buffer upload async command encoder"
+        )
         command_encoder.copy_buffer_to_buffer(
             tmp_buffer, 0, uniform_buffer, 0, uniform_data.nbytes
         )
-        device.queue.submit([command_encoder.finish()])
+        device.queue.submit(
+            [
+                command_encoder.finish(
+                    label="Cube Example uniform buffer upload async command buffer"
+                )
+            ]
+        )
 
     def draw_frame():
         current_texture_view = (
-            canvas.get_context("wgpu").get_current_texture().create_view()
+            canvas.get_context("wgpu")
+            .get_current_texture()
+            .create_view(label="Cube Example current surface texture view")
+        )
+        command_encoder = device.create_command_encoder(
+            label="Cube Example render pass command encoder"
         )
-        command_encoder = device.create_command_encoder()
         render_pass = command_encoder.begin_render_pass(
             color_attachments=[
                 {
@@ -305,17 +357,26 @@ def draw_frame():
                     "store_op": wgpu.StoreOp.store,
                 }
             ],
+            label="Cube Example render pass",
         )
 
+        # debug groups and markers can optionally be added to help debugging.
+        render_pass.push_debug_group("Cube Example Debug Group")
         render_pass.set_pipeline(render_pipeline)
         render_pass.set_index_buffer(index_buffer, wgpu.IndexFormat.uint32)
         render_pass.set_vertex_buffer(0, vertex_buffer)
         for bind_group_id, bind_group in enumerate(bind_groups):
             render_pass.set_bind_group(bind_group_id, bind_group)
+            render_pass.insert_debug_marker(
+                f"Cube Example bind group {bind_group_id=} set"
+            )
         render_pass.draw_indexed(index_data.size, 1, 0, 0, 0)
+        render_pass.pop_debug_group()
         render_pass.end()
 
-        device.queue.submit([command_encoder.finish()])
+        device.queue.submit(
+            [command_encoder.finish(label="Cube Example render pass command buffer")]
+        )
 
     def draw_frame_sync():
         update_transform()
@@ -459,7 +520,6 @@ async def draw_frame_async():
 for a in wgpu.gpu.enumerate_adapters_sync():
     print(a.summary)
 
-
 if __name__ == "__main__":
     canvas = RenderCanvas(
         size=(640, 480),