Capsize-Games
diff --git a/‎setup.py‎
Lines changed: 1 addition & 1 deletion b/‎setup.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎src/airunner/components/art/managers/stablediffusion/base_diffusers_model_manager.py‎
Lines changed: 21 additions & 1 deletion b/‎src/airunner/components/art/managers/stablediffusion/base_diffusers_model_manager.py‎
Lines changed: 21 additions & 1 deletion
diff --git a/‎src/airunner/components/art/managers/zimage/mixins/zimage_pipeline_loading_mixin.py‎
Lines changed: 8 additions & 0 deletions b/‎src/airunner/components/art/managers/zimage/mixins/zimage_pipeline_loading_mixin.py‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎src/airunner/components/art/managers/zimage/native/__init__.py‎
Lines changed: 10 additions & 0 deletions b/‎src/airunner/components/art/managers/zimage/native/__init__.py‎
Lines changed: 10 additions & 0 deletions
diff --git a/‎src/airunner/components/art/managers/zimage/native/fp8_ops.py‎
Lines changed: 39 additions & 175 deletions b/‎src/airunner/components/art/managers/zimage/native/fp8_ops.py‎
Lines changed: 39 additions & 175 deletions
@@ -187,7 +187,7 @@
 
 setup(
     name="airunner",
-    version="5.4.1",
+    version="5.4.2",
     author="Capsize LLC",
     description="Run local opensource AI models (Stable Diffusion, LLMs, TTS, STT, chatbots) in a lightweight Python GUI",
     long_description=open("README.md", "r", encoding="utf-8").read(),
 
@@ -288,7 +288,13 @@ def load(self):
         - DeepCache helper
         - Memory optimizations
         """
+        self.logger.debug(
+            f"[LOAD ENTRY] sd_is_loading={self.sd_is_loading}, "
+            f"model_is_loaded={self.model_is_loaded}, "
+            f"model_status={self.model_status}, model_type={self.model_type}"
+        )
         if self.sd_is_loading or self.model_is_loaded:
+            self.logger.debug("[LOAD ENTRY] Returning early - already loading or loaded")
             return
         if self.model_path is None or self.model_path == "":
             self.logger.error("No model selected")
@@ -327,7 +333,9 @@ def load(self):
 
         self.load_controlnet()
 
+        self.logger.debug("[LOAD] About to call _load_pipe()")
         if self._load_pipe():
+            self.logger.debug("[LOAD] _load_pipe() returned True, continuing load sequence")
             self._send_pipeline_loaded_signal()
             self._move_pipe_to_device()
             self._load_scheduler()
@@ -507,8 +515,20 @@ def _load_pipe(self) -> bool:
         Returns:
             True if loaded successfully, False otherwise
         """
+        self.logger.debug("[_load_pipe] ENTERING METHOD")
+        try:
+            pipeline_class = self._pipeline_class
+            self.logger.debug(f"[_load_pipe] pipeline_class={pipeline_class}")
+            section = self.section
+            self.logger.debug(f"[_load_pipe] section={section}")
+        except Exception as e:
+            self.logger.error(f"[_load_pipe] Error accessing properties: {e}")
+            import traceback
+            self.logger.error(traceback.format_exc())
+            return False
+        
         self.logger.debug(
-            f"Loading pipe {self._pipeline_class} for {self.section}"
+            f"Loading pipe {pipeline_class} for {section}"
         )
         self.change_model_status(self.model_type, ModelStatus.LOADING)
         data = self._prepare_pipe_data()
 
@@ -96,6 +96,14 @@ def _set_pipe(self, config_path: str, data: Dict):
 
         # Debug: verify _pipe was set
         self.logger.info(f"[ZIMAGE DEBUG] After _set_pipe: self._pipe={self._pipe}, self={id(self)}")
+
+        # Load LoRA adapters if available for this pipeline
+        try:
+            if hasattr(self, "_load_lora") and self._pipe is not None:
+                self.logger.info("[ZIMAGE] Loading LoRA adapters")
+                self._load_lora()
+        except Exception as exc:  # pragma: no cover - defensive logging
+            self.logger.warning(f"[ZIMAGE] Failed to load LoRA adapters: {exc}")
 
         _clear_gpu_memory()
 
 
@@ -50,6 +50,11 @@
 from airunner.components.art.managers.zimage.native.zimage_native_wrapper import (
     NativePipelineWrapper,
 )
+from airunner.components.art.managers.zimage.native.native_lora import (
+    NativeLoraLoader,
+    load_lora_into_transformer,
+    load_lora_state_dict,
+)
 
 __all__ = [
     # FP8 Operations
@@ -85,4 +90,9 @@
     "SimpleTextEncoder",
     # Pipeline
     "ZImageNativePipeline",
+    "NativePipelineWrapper",
+    # LoRA
+    "NativeLoraLoader",
+    "load_lora_into_transformer",
+    "load_lora_state_dict",
 ]
@@ -618,6 +618,10 @@ class FP8Linear(nn.Module):
     
     This layer can hold FP8 weights with per-layer scale factors and
     automatically dequantizes during forward pass for compatibility.
+    
+    The FP8 weight is stored as `fp8_weight_storage` to avoid nn.Module's
+    special handling of 'weight'. Access via `.weight` property is provided
+    for compatibility with LoRA loaders.
     """
 
     def __init__(
@@ -627,6 +631,7 @@ def __init__(
         bias: bool = True,
         device: Optional[torch.device] = None,
         dtype: Optional[torch.dtype] = None,
+        compute_dtype: Optional[torch.dtype] = None,
     ):
         """
         Initialize FP8 linear layer.
@@ -636,24 +641,49 @@ def __init__(
             out_features: Output feature dimension  
             bias: Whether to include bias
             device: Target device
-            dtype: Compute dtype (for activations)
+            dtype: Compute dtype (for activations) - deprecated, use compute_dtype
+            compute_dtype: Compute dtype (for activations)
         """
         super().__init__()
 
         self.in_features = in_features
         self.out_features = out_features
-        self.compute_dtype = dtype or torch.bfloat16
+        self.compute_dtype = compute_dtype or dtype or torch.bfloat16
 
-        # Initialize as empty - will be filled during checkpoint loading
-        self.weight: Optional[QuantizedTensor] = None
-        self.bias: Optional[torch.Tensor] = None
+        # Use unique name to avoid nn.Module intercepting 'weight'
+        self.fp8_weight_storage: Optional[QuantizedTensor] = None
+        self._has_bias = bias
 
         if bias:
             self.register_buffer(
                 '_bias',
                 torch.zeros(out_features, device=device, dtype=self.compute_dtype)
             )
 
+    @property
+    def weight(self) -> Optional[QuantizedTensor]:
+        """Get the FP8 quantized weight (for LoRA compatibility)."""
+        return self.fp8_weight_storage
+    
+    @weight.setter
+    def weight(self, value: Optional[QuantizedTensor]):
+        """Set the FP8 quantized weight."""
+        self.fp8_weight_storage = value
+    
+    @property
+    def bias(self) -> Optional[torch.Tensor]:
+        """Get bias tensor."""
+        return self._bias if self._has_bias and hasattr(self, '_bias') else None
+    
+    @bias.setter  
+    def bias(self, value: Optional[torch.Tensor]):
+        """Set bias tensor."""
+        if value is not None and self._has_bias:
+            if hasattr(self, '_bias'):
+                self._bias.copy_(value)
+            else:
+                self.register_buffer('_bias', value)
+    
     def set_fp8_weight(
         self,
         fp8_weight: torch.Tensor,
@@ -668,7 +698,7 @@ def set_fp8_weight(
             scale: Per-layer scale factor
             orig_dtype: Original dtype for dequantization
         """
-        self.weight = QuantizedTensor.from_fp8_with_scale(
+        self.fp8_weight_storage = QuantizedTensor.from_fp8_with_scale(
             fp8_weight, scale, orig_dtype
         )
 
@@ -682,13 +712,13 @@ def forward(self, x: torch.Tensor) -> torch.Tensor:
         Returns:
             Output tensor
         """
-        if self.weight is None:
+        if self.fp8_weight_storage is None:
             raise RuntimeError("Weight not set. Call set_fp8_weight first.")
 
         # Dequantize weight for forward pass
-        weight = self.weight.dequantize().to(x.dtype)
+        weight = self.fp8_weight_storage.dequantize().to(x.dtype)
 
-        bias = self.bias if hasattr(self, '_bias') and self._bias is not None else None
+        bias = self._bias if self._has_bias and hasattr(self, '_bias') else None
         if bias is not None:
             bias = bias.to(x.dtype)
 
@@ -748,169 +778,3 @@ def is_fp8_scaled_checkpoint(state_dict: Dict[str, torch.Tensor]) -> bool:
             return True
 
     return has_fp8 and has_scale
-
-
-class FP8Linear(nn.Module):
-    """
-    Linear layer that stores weights in FP8 format and dequantizes on forward.
-    
-    This allows ~50% memory savings compared to BF16 while maintaining
-    numerical accuracy through per-tensor scaling.
-    
-    Args:
-        in_features: Input feature dimension
-        out_features: Output feature dimension
-        bias: Whether to include bias
-        device: Target device
-        compute_dtype: Dtype for computation (default: bfloat16)
-    """
-    
-    def __init__(
-        self,
-        in_features: int,
-        out_features: int,
-        bias: bool = True,
-        device: Optional[torch.device] = None,
-        compute_dtype: torch.dtype = torch.bfloat16,
-    ):
-        super().__init__()
-        self.in_features = in_features
-        self.out_features = out_features
-        self.compute_dtype = compute_dtype
-        
-        # FP8 weight storage
-        self.register_buffer(
-            'weight_fp8',
-            torch.zeros(out_features, in_features, dtype=torch.float8_e4m3fn, device=device)
-        )
-        # Scale is always a scalar but store as 0-dim tensor
-        self.register_buffer(
-            'weight_scale',
-            torch.tensor(1.0, dtype=torch.float32, device=device)
-        )
-        
-        # Optional bias in compute dtype
-        if bias:
-            self.bias = nn.Parameter(
-                torch.zeros(out_features, dtype=compute_dtype, device=device)
-            )
-        else:
-            self.register_parameter('bias', None)
-    
-    def set_fp8_weight(
-        self,
-        weight_fp8: torch.Tensor,
-        scale: torch.Tensor,
-    ) -> None:
-        """
-        Set the FP8 weight and scale.
-        
-        Args:
-            weight_fp8: Weight tensor in FP8 format
-            scale: Scale factor for dequantization (scalar)
-        """
-        self.weight_fp8.copy_(weight_fp8)
-        # Handle scalar scale - extract the value if it's a tensor
-        if scale.numel() == 1:
-            self.weight_scale.fill_(scale.item())
-        else:
-            self.weight_scale.fill_(scale.flatten()[0].item())
-    
-    def forward(self, x: torch.Tensor) -> torch.Tensor:
-        """
-        Forward pass with on-the-fly dequantization.
-        
-        Args:
-            x: Input tensor
-            
-        Returns:
-            Output tensor
-        """
-        # Dequantize weight: fp8 * scale -> compute_dtype
-        weight = self.weight_fp8.to(self.compute_dtype) * self.weight_scale.to(self.compute_dtype)
-        
-        # Convert input to compute dtype if needed
-        if x.dtype != self.compute_dtype:
-            x = x.to(self.compute_dtype)
-        
-        # Standard linear operation
-        return F.linear(x, weight, self.bias)
-    
-    @classmethod
-    def from_linear(
-        cls,
-        linear: nn.Linear,
-        device: Optional[torch.device] = None,
-    ) -> 'FP8Linear':
-        """
-        Create FP8Linear from an existing nn.Linear layer.
-        
-        Quantizes weights to FP8 format with computed scale.
-        
-        Args:
-            linear: Source linear layer
-            device: Target device
-            
-        Returns:
-            FP8Linear layer with quantized weights
-        """
-        device = device or linear.weight.device
-        has_bias = linear.bias is not None
-        compute_dtype = linear.weight.dtype
-        
-        fp8_linear = cls(
-            linear.in_features,
-            linear.out_features,
-            bias=has_bias,
-            device=device,
-            compute_dtype=compute_dtype,
-        )
-        
-        # Quantize weight
-        qdata, params = TensorCoreFP8Layout.quantize(
-            linear.weight.detach(),
-            scale="recalculate",
-        )
-        fp8_linear.set_fp8_weight(qdata.to(device), params['scale'].to(device))
-        
-        # Copy bias
-        if has_bias:
-            fp8_linear.bias.data.copy_(linear.bias.data)
-        
-        return fp8_linear
-    
-    def extra_repr(self) -> str:
-        return f'in_features={self.in_features}, out_features={self.out_features}, bias={self.bias is not None}'
-
-
-def convert_linear_to_fp8(
-    module: nn.Module,
-    device: Optional[torch.device] = None,
-    skip_patterns: Optional[list] = None,
-) -> nn.Module:
-    """
-    Recursively convert all nn.Linear layers to FP8Linear.
-    
-    Args:
-        module: Module to convert
-        device: Target device
-        skip_patterns: List of name patterns to skip
-        
-    Returns:
-        Module with converted layers
-    """
-    skip_patterns = skip_patterns or []
-    
-    for name, child in list(module.named_children()):
-        # Check skip patterns
-        should_skip = any(pattern in name for pattern in skip_patterns)
-        
-        if isinstance(child, nn.Linear) and not should_skip:
-            # Convert to FP8
-            fp8_linear = FP8Linear.from_linear(child, device=device)
-            setattr(module, name, fp8_linear)
-        else:
-            # Recurse
-            convert_linear_to_fp8(child, device=device, skip_patterns=skip_patterns)
-    
-    return module