cr fixes 2

ilanbria · psychedelicious · commit aa4ec59af700 · 2025-08-07T16:00:01.000+10:00
diff --git a/invokeai/app/invocations/bria_decoder.py b/invokeai/app/invocations/bria_decoder.py
@@ -29,11 +29,19 @@ class BriaDecoderInvocation(BaseInvocation):
         description=FieldDescriptions.latents,
         input=Input.Connection,
     )
+    height: int = InputField(
+        title="Height",
+        description="The height of the output image",
+    )
+    width: int = InputField(
+        title="Width",
+        description="The width of the output image",
+    )
 
     @torch.no_grad()
     def invoke(self, context: InvocationContext) -> ImageOutput:
         latents = context.tensors.load(self.latents.latents_name)
-        latents = latents.view(1, 64, 64, 4, 2, 2).permute(0, 3, 1, 4, 2, 5).reshape(1, 4, 128, 128)
+        latents = _unpack_latents(latents, self.height, self.width)
 
         with context.models.load(self.vae.vae) as vae:
             assert isinstance(vae, AutoencoderKL)
@@ -48,3 +56,17 @@ def invoke(self, context: InvocationContext) -> ImageOutput:
         img = Image.fromarray(image)
         image_dto = context.images.save(image=img)
         return ImageOutput.build(image_dto)
+
+
+def _unpack_latents(latents, height, width, vae_scale_factor=16):
+    batch_size, num_patches, channels = latents.shape
+
+    height = height // vae_scale_factor
+    width = width // vae_scale_factor
+
+    latents = latents.view(batch_size, height, width, channels // 4, 2, 2)
+    latents = latents.permute(0, 3, 1, 4, 2, 5)
+
+    latents = latents.reshape(batch_size, channels // (2 * 2), height * 2, width * 2)
+
+    return latents
diff --git a/invokeai/app/invocations/bria_denoiser.py b/invokeai/app/invocations/bria_denoiser.py
@@ -22,6 +22,8 @@
 @invocation_output("bria_denoise_output")
 class BriaDenoiseInvocationOutput(BaseInvocationOutput):
     latents: LatentsField = OutputField(description=FieldDescriptions.latents)
+    height: int = OutputField(description="The height of the output image")
+    width: int = OutputField(description="The width of the output image")
 
 
 @invocation(
@@ -144,7 +146,6 @@ def invoke(self, context: InvocationContext) -> BriaDenoiseInvocationOutput:
                 height=self.height,
                 controlnet_conditioning_scale=control_scales,
                 num_inference_steps=self.num_steps,
-                max_sequence_length=128,
                 guidance_scale=self.guidance_scale,
                 latents=latents,
                 latent_image_ids=latent_image_ids,
@@ -158,7 +159,7 @@ def invoke(self, context: InvocationContext) -> BriaDenoiseInvocationOutput:
 
         assert isinstance(output_latents, torch.Tensor)
         saved_input_latents_tensor = context.tensors.save(output_latents)
-        return BriaDenoiseInvocationOutput(latents=LatentsField(latents_name=saved_input_latents_tensor))
+        return BriaDenoiseInvocationOutput(latents=LatentsField(latents_name=saved_input_latents_tensor), height=self.height, width=self.width)
 
     def _prepare_multi_control(
         self, context: InvocationContext, vae: AutoencoderKL, width: int, height: int, device: torch.device
@@ -191,7 +192,6 @@ def _prepare_multi_control(
 
 def _build_step_callback(context: InvocationContext) -> Callable[[PipelineIntermediateState], None]:
     def step_callback(state: PipelineIntermediateState) -> None:
-        return 
         context.util.sd_step_callback(state, BaseModelType.Bria)
 
     return step_callback
diff --git a/invokeai/app/invocations/bria_latent_noise.py b/invokeai/app/invocations/bria_latent_noise.py
@@ -26,8 +26,8 @@ class BriaLatentNoiseOutput(BaseModel):
 class BriaLatentNoiseInvocationOutput(BaseInvocationOutput):
     """Base class for nodes that output Bria latent tensors."""
     latent_noise: BriaLatentNoiseOutput = OutputField(description="The latent noise, containing latents and latent image ids.")
-    height: int = OutputField(description="The height of the output image", default=1024)
-    width: int = OutputField(description="The width of the output image", default=1024)
+    height: int = OutputField(description="The height of the output image")
+    width: int = OutputField(description="The width of the output image")
 
 @invocation(
     "bria_latent_noise",
diff --git a/invokeai/app/invocations/bria_text_encoder.py b/invokeai/app/invocations/bria_text_encoder.py
@@ -19,6 +19,7 @@
     invocation_output,
 )
 
+DEFAULT_NEGATIVE_PROMPT = "Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate"
 
 @invocation_output("bria_text_encoder_output")
 class BriaTextEncoderInvocationOutput(BaseInvocationOutput):
@@ -48,7 +49,7 @@ class BriaTextEncoderInvocation(BaseInvocation):
     negative_prompt: Optional[str] = InputField(
         title="Negative Prompt",
         description="The negative prompt to encode",
-        default="Logo,Watermark,Text,Ugly,Morbid,Extra fingers,Poorly drawn hands,Mutation,Blurry,Extra limbs,Gross proportions,Missing arms,Mutated hands,Long neck,Duplicate",
+        default="",
     )
     max_length: int = InputField(
         default=256,
@@ -74,11 +75,12 @@ def invoke(self, context: InvocationContext) -> BriaTextEncoderInvocationOutput:
             assert isinstance(tokenizer, T5TokenizerFast)
             assert isinstance(text_encoder, T5EncoderModel)
 
+        negative_prompt = f"{DEFAULT_NEGATIVE_PROMPT}, {self.negative_prompt}"
         prompt_embeds, negative_prompt_embeds = encode_prompt(
             prompt=self.prompt,
             tokenizer=tokenizer,
             text_encoder=text_encoder,
-            negative_prompt=self.negative_prompt,
+            negative_prompt=negative_prompt,
             device=text_encoder.device,
             num_images_per_prompt=1,
             max_sequence_length=self.max_length,
diff --git a/invokeai/app/util/step_callback.py b/invokeai/app/util/step_callback.py
@@ -94,7 +94,10 @@
 ]
 
 BRIA_LATENT_RGB_FACTORS = [
-    
+    [0.31115174, 0.38229316, 0.43620577],
+    [-0.26867455, 0.05353606, 0.1088054],
+    [0.09892498, 0.17854956, -0.12029117],
+    [-0.37774912, -0.17128916, -0.25255626],
 ]
 
 
diff --git a/invokeai/backend/bria/pipeline_bria.py b/invokeai/backend/bria/pipeline_bria.py
@@ -236,7 +236,6 @@ def check_inputs(
         prompt_embeds=None,
         negative_prompt_embeds=None,
         callback_on_step_end_tensor_inputs=None,
-        max_sequence_length=None,
     ):
         if height % 8 != 0 or width % 8 != 0:
             raise ValueError(f"`height` and `width` have to be divisible by 8 but are {height} and {width}.")
@@ -266,8 +265,6 @@ def check_inputs(
                 f" {negative_prompt_embeds}. Please make sure to only forward one of the two."
             )
 
-        if max_sequence_length is not None and max_sequence_length > 512:
-            raise ValueError(f"`max_sequence_length` cannot be greater than 512 but is {max_sequence_length}")
 
     def to(self, *args, **kwargs):
         DiffusionPipeline.to(self, *args, **kwargs)
diff --git a/invokeai/backend/bria/pipeline_bria_controlnet.py b/invokeai/backend/bria/pipeline_bria_controlnet.py
@@ -251,7 +251,6 @@ def __call__(
         joint_attention_kwargs: Optional[Dict[str, Any]] = None,
         callback_on_step_end: Optional[Callable[[int, int, Dict], None]] = None,
         callback_on_step_end_tensor_inputs: Optional[List[str]] = None,
-        max_sequence_length: int = 128,
         step_callback: Callable[[PipelineIntermediateState], None] = None,
     ):
         r"""
@@ -342,7 +341,6 @@ def __call__(
             prompt_embeds=prompt_embeds,
             negative_prompt_embeds=negative_prompt_embeds,
             callback_on_step_end_tensor_inputs=callback_on_step_end_tensor_inputs,
-            max_sequence_length=max_sequence_length,
         )
 
         self._guidance_scale = guidance_scale
@@ -416,15 +414,15 @@ def __call__(
             prompt_embeds = torch.cat([negative_prompt_embeds, prompt_embeds], dim=0)
 
         # Init Invoke step callback
-        # step_callback(
-        #     PipelineIntermediateState(
-        #         step=0,
-        #         order=1,
-        #         total_steps=num_inference_steps,
-        #         timestep=int(timesteps[0]),
-        #         latents=latents,
-        #     ),
-        # )
+        step_callback(
+            PipelineIntermediateState(
+                step=0,
+                order=1,
+                total_steps=num_inference_steps,
+                timestep=int(timesteps[0]),
+                latents=latents.view(1, 64, 64, 4, 2, 2).permute(0, 3, 1, 4, 2, 5).reshape(1, 4, 128, 128),
+            ),
+        )
 
         # EYAL - added the CFG loop
         # 7. Denoising loop
@@ -513,15 +511,15 @@ def __call__(
                 # call the callback, if provided
                 if i == len(timesteps) - 1 or ((i + 1) > num_warmup_steps and (i + 1) % self.scheduler.order == 0):
                     progress_bar.update()
-                    # step_callback(
-                    #     PipelineIntermediateState(
-                    #         step=i + 1,
-                    #         order=1,
-                    #         total_steps=num_inference_steps,
-                    #         timestep=int(t),
-                    #         latents=latents,
-                    #     ),
-                    # )
+                    step_callback(
+                        PipelineIntermediateState(
+                            step=i + 1,
+                            order=1,
+                            total_steps=num_inference_steps,
+                            timestep=int(t),
+                            latents=latents.view(1, 64, 64, 4, 2, 2).permute(0, 3, 1, 4, 2, 5).reshape(1, 4, 128, 128),
+                        ),
+                    )
 
         if output_type == "latent":
             image = latents
diff --git a/invokeai/backend/model_manager/config.py b/invokeai/backend/model_manager/config.py
@@ -442,27 +442,6 @@ def parse(cls, mod: ModelOnDisk) -> dict[str, Any]:
             "base": cls.base_model(mod),
         }
 
-class BriaDiffusersConfig(LoRAConfigBase, ModelConfigBase):
-    """Model config for Bria/Diffusers models."""
-
-    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
-
-    @classmethod
-    def matches(cls, mod: ModelOnDisk) -> bool:
-        if mod.path.is_file():
-            return cls.flux_lora_format(mod) == FluxLoRAFormat.Diffusers
-
-        suffixes = ["bin", "safetensors"]
-        weight_files = [mod.path / f"pytorch_lora_weights.{sfx}" for sfx in suffixes]
-        return any(wf.exists() for wf in weight_files)
-
-    @classmethod
-    def parse(cls, mod: ModelOnDisk) -> dict[str, Any]:
-        return {
-            "base": cls.base_model(mod),
-        }
-
-
 class VAECheckpointConfig(CheckpointConfigBase, LegacyProbeMixin, ModelConfigBase):
     """Model config for standalone VAE models."""
 
@@ -540,6 +519,35 @@ class MainDiffusersConfig(DiffusersConfigBase, MainConfigBase, LegacyProbeMixin,
 
     pass
 
+class BriaDiffusersConfig(DiffusersConfigBase, MainConfigBase, ModelConfigBase):
+    """Model config for Bria/Diffusers models."""
+
+    format: Literal[ModelFormat.Diffusers] = ModelFormat.Diffusers
+    base: Literal[BaseModelType.Bria] = BaseModelType.Bria
+
+    @classmethod
+    def matches(cls, mod: ModelOnDisk) -> bool:
+        if mod.path.is_file():
+            return False
+
+        config_path = mod.path / "transformer" / "config.json"
+        if config_path.exists():
+            with open(config_path) as file:
+                transformer_conf = json.load(file)
+            if transformer_conf["_class_name"] == "BriaTransformer2DModel":
+                return True
+
+        return False
+
+    @classmethod
+    def parse(cls, mod: ModelOnDisk) -> dict[str, Any]:
+        return {}
+
+    @classmethod
+    def get_tag(cls) -> Tag:
+        return Tag(f"{ModelType.Main.value}.{ModelFormat.Diffusers.value}.{BaseModelType.Bria.value}")
+
+
 
 class IPAdapterConfigBase(ABC, BaseModel):
     type: Literal[ModelType.IPAdapter] = ModelType.IPAdapter
diff --git a/invokeai/backend/model_manager/taxonomy.py b/invokeai/backend/model_manager/taxonomy.py
@@ -30,7 +30,7 @@ class BaseModelType(str, Enum):
     Imagen4 = "imagen4"
     ChatGPT4o = "chatgpt-4o"
     FluxKontext = "flux-kontext"
-    Bria = "bria"
+    Bria = "bria-3"
 
 
 class ModelType(str, Enum):
diff --git a/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx b/invokeai/frontend/web/src/features/modelManagerV2/subpanels/ModelManagerPanel/ModelBaseBadge.tsx
@@ -20,7 +20,7 @@ export const BASE_COLOR_MAP: Record<BaseModelType, string> = {
   imagen4: 'pink',
   'chatgpt-4o': 'pink',
   'flux-kontext': 'pink',
-  bria: 'purple',
+  'bria-3': 'purple',
 };
 
 const ModelBaseBadge = ({ base }: Props) => {
diff --git a/invokeai/frontend/web/src/features/nodes/types/common.ts b/invokeai/frontend/web/src/features/nodes/types/common.ts
@@ -76,7 +76,7 @@ const zBaseModel = z.enum([
   'imagen4',
   'chatgpt-4o',
   'flux-kontext',
-  'bria',
+  'bria-3',
 ]);
 export type BaseModelType = z.infer<typeof zBaseModel>;
 export const zMainModelBase = z.enum([
@@ -90,7 +90,7 @@ export const zMainModelBase = z.enum([
   'imagen4',
   'chatgpt-4o',
   'flux-kontext',
-  'bria',
+  'bria-3',
 ]);
 type MainModelBase = z.infer<typeof zMainModelBase>;
 export const isMainModelBase = (base: unknown): base is MainModelBase => zMainModelBase.safeParse(base).success;
diff --git a/invokeai/frontend/web/src/features/parameters/types/constants.ts b/invokeai/frontend/web/src/features/parameters/types/constants.ts
@@ -17,7 +17,7 @@ export const MODEL_TYPE_MAP: Record<BaseModelType, string> = {
   imagen4: 'Imagen4',
   'chatgpt-4o': 'ChatGPT 4o',
   'flux-kontext': 'Flux Kontext',
-  bria: 'Bria AI',
+  'bria-3': 'Bria AI',
 };
 
 /**
@@ -36,7 +36,7 @@ export const MODEL_TYPE_SHORT_MAP: Record<BaseModelType, string> = {
   imagen4: 'Imagen4',
   'chatgpt-4o': 'ChatGPT 4o',
   'flux-kontext': 'Flux Kontext',
-  bria: 'Bria',
+  'bria-3': 'Bria',
 };
 
 /**
@@ -91,7 +91,7 @@ export const CLIP_SKIP_MAP: Record<BaseModelType, { maxClip: number; markers: nu
     maxClip: 0,
     markers: [],
   },
-  bria: {
+  'bria-3': {
     maxClip: 0,
     markers: [],
   },
diff --git a/invokeai/frontend/web/src/services/api/schema.ts b/invokeai/frontend/web/src/services/api/schema.ts
diff --git a/invokeai/frontend/web/src/services/api/types.ts b/invokeai/frontend/web/src/services/api/types.ts

Original file line number	Diff line number	Diff line change
`@@ -94,7 +94,10 @@`
`94`	`94`	`]`
`95`	`95`
`96`	`96`	`BRIA_LATENT_RGB_FACTORS = [`
`97`		`-`
	`97`	`+ [0.31115174, 0.38229316, 0.43620577],`
	`98`	`+ [-0.26867455, 0.05353606, 0.1088054],`
	`99`	`+ [0.09892498, 0.17854956, -0.12029117],`
	`100`	`+ [-0.37774912, -0.17128916, -0.25255626],`
`98`	`101`	`]`
`99`	`102`
`100`	`103`