Update Cascade Tests (#7324)

DN6 · web-flow · commit 4974b84564d2 · 2024-03-14T20:51:22.000+05:30
* update

* update

* update
diff --git a/tests/models/unets/test_models_unet_stable_cascade.py b/tests/models/unets/test_models_unet_stable_cascade.py
@@ -50,9 +50,7 @@ def test_stable_cascade_unet_prior_single_file_components(self):
         gc.collect()
         torch.cuda.empty_cache()
 
-        unet = StableCascadeUNet.from_pretrained(
-            "stabilityai/stable-cascade-prior", subfolder="prior", revision="refs/pr/2", variant="bf16"
-        )
+        unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade-prior", subfolder="prior", variant="bf16")
         unet_config = unet.config
         del unet
         gc.collect()
@@ -74,9 +72,7 @@ def test_stable_cascade_unet_decoder_single_file_components(self):
         gc.collect()
         torch.cuda.empty_cache()
 
-        unet = StableCascadeUNet.from_pretrained(
-            "stabilityai/stable-cascade", subfolder="decoder", revision="refs/pr/44", variant="bf16"
-        )
+        unet = StableCascadeUNet.from_pretrained("stabilityai/stable-cascade", subfolder="decoder", variant="bf16")
         unet_config = unet.config
         del unet
         gc.collect()
diff --git a/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py b/tests/pipelines/stable_cascade/test_stable_cascade_decoder.py
@@ -21,13 +21,13 @@
 from transformers import CLIPTextConfig, CLIPTextModelWithProjection, CLIPTokenizer
 
 from diffusers import DDPMWuerstchenScheduler, StableCascadeDecoderPipeline
-from diffusers.image_processor import VaeImageProcessor
 from diffusers.models import StableCascadeUNet
 from diffusers.pipelines.wuerstchen import PaellaVQModel
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    load_image,
+    load_numpy,
     load_pt,
+    numpy_cosine_similarity_distance,
     require_torch_gpu,
     skip_mps,
     slow,
@@ -258,7 +258,7 @@ def tearDown(self):
 
     def test_stable_cascade_decoder(self):
         pipe = StableCascadeDecoderPipeline.from_pretrained(
-            "diffusers/StableCascade-decoder", torch_dtype=torch.bfloat16
+            "stabilityai/stable-cascade", variant="bf16", torch_dtype=torch.bfloat16
         )
         pipe.enable_model_cpu_offload()
         pipe.set_progress_bar_config(disable=None)
@@ -271,18 +271,16 @@ def test_stable_cascade_decoder(self):
         )
 
         image = pipe(
-            prompt=prompt, image_embeddings=image_embedding, num_inference_steps=10, generator=generator
+            prompt=prompt,
+            image_embeddings=image_embedding,
+            output_type="np",
+            num_inference_steps=2,
+            generator=generator,
         ).images[0]
 
-        assert image.size == (1024, 1024)
-
-        expected_image = load_image(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/t2i.png"
+        assert image.shape == (1024, 1024, 3)
+        expected_image = load_numpy(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_decoder_image.npy"
         )
-
-        image_processor = VaeImageProcessor()
-
-        image_np = image_processor.pil_to_numpy(image)
-        expected_image_np = image_processor.pil_to_numpy(expected_image)
-
-        self.assertTrue(np.allclose(image_np, expected_image_np, atol=53e-2))
+        max_diff = numpy_cosine_similarity_distance(image.flatten(), expected_image.flatten())
+        assert max_diff < 1e-4
diff --git a/tests/pipelines/stable_cascade/test_stable_cascade_prior.py b/tests/pipelines/stable_cascade/test_stable_cascade_prior.py
@@ -29,7 +29,8 @@
 from diffusers.utils.import_utils import is_peft_available
 from diffusers.utils.testing_utils import (
     enable_full_determinism,
-    load_pt,
+    load_numpy,
+    numpy_cosine_similarity_distance,
     require_peft_backend,
     require_torch_gpu,
     skip_mps,
@@ -319,25 +320,22 @@ def tearDown(self):
         torch.cuda.empty_cache()
 
     def test_stable_cascade_prior(self):
-        pipe = StableCascadePriorPipeline.from_pretrained("diffusers/StableCascade-prior", torch_dtype=torch.bfloat16)
+        pipe = StableCascadePriorPipeline.from_pretrained(
+            "stabilityai/stable-cascade-prior", variant="bf16", torch_dtype=torch.bfloat16
+        )
         pipe.enable_model_cpu_offload()
         pipe.set_progress_bar_config(disable=None)
 
         prompt = "A photograph of the inside of a subway train. There are raccoons sitting on the seats. One of them is reading a newspaper. The window shows the city in the background."
 
         generator = torch.Generator(device="cpu").manual_seed(0)
 
-        output = pipe(prompt, num_inference_steps=10, generator=generator)
+        output = pipe(prompt, num_inference_steps=2, output_type="np", generator=generator)
         image_embedding = output.image_embeddings
-
-        expected_image_embedding = load_pt(
-            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/image_embedding.pt"
+        expected_image_embedding = load_numpy(
+            "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/stable_cascade/stable_cascade_prior_image_embeddings.npy"
         )
-
         assert image_embedding.shape == (1, 16, 24, 24)
 
-        self.assertTrue(
-            np.allclose(
-                image_embedding.cpu().float().numpy(), expected_image_embedding.cpu().float().numpy(), atol=5e-2
-            )
-        )
+        max_diff = numpy_cosine_similarity_distance(image_embedding.flatten(), expected_image_embedding.flatten())
+        assert max_diff < 1e-4