diff --git a/src/diffusers/utils/__init__.py b/src/diffusers/utils/__init__.py index 12d731128385..7395f4edfa26 100644 --- a/src/diffusers/utils/__init__.py +++ b/src/diffusers/utils/__init__.py @@ -42,6 +42,7 @@ if is_torch_available(): from .testing_utils import ( floats_tensor, + load_hf_numpy, load_image, load_numpy, parse_flag_from_env, diff --git a/src/diffusers/utils/testing_utils.py b/src/diffusers/utils/testing_utils.py index bd3b08d54a1c..bf398e5b6fe5 100644 --- a/src/diffusers/utils/testing_utils.py +++ b/src/diffusers/utils/testing_utils.py @@ -139,6 +139,29 @@ def require_onnxruntime(test_case): return unittest.skipUnless(is_onnx_available(), "test requires onnxruntime")(test_case) +def load_numpy(arry: Union[str, np.ndarray]) -> np.ndarray: + if isinstance(arry, str): + if arry.startswith("http://") or arry.startswith("https://"): + response = requests.get(arry) + response.raise_for_status() + arry = np.load(BytesIO(response.content)) + elif os.path.isfile(arry): + arry = np.load(arry) + else: + raise ValueError( + f"Incorrect path or url, URLs must start with `http://` or `https://`, and {arry} is not a valid path" + ) + elif isinstance(arry, np.ndarray): + pass + else: + raise ValueError( + "Incorrect format used for numpy ndarray. Should be an url linking to an image, a local path, or a" + " ndarray." + ) + + return arry + + def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image: """ Args: @@ -168,17 +191,13 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image: return image -def load_numpy(path) -> np.ndarray: +def load_hf_numpy(path) -> np.ndarray: if not path.startswith("http://") or path.startswith("https://"): path = os.path.join( "https://huggingface.co/datasets/fusing/diffusers-testing/resolve/main", urllib.parse.quote(path) ) - response = requests.get(path) - response.raise_for_status() - array = np.load(BytesIO(response.content)) - - return array + return load_numpy(path) # --- pytest conf functions --- # diff --git a/tests/models/test_models_unet_2d.py b/tests/models/test_models_unet_2d.py index 548588918c88..20371708a4d8 100644 --- a/tests/models/test_models_unet_2d.py +++ b/tests/models/test_models_unet_2d.py @@ -21,7 +21,15 @@ import torch from diffusers import UNet2DConditionModel, UNet2DModel -from diffusers.utils import floats_tensor, load_numpy, logging, require_torch_gpu, slow, torch_all_close, torch_device +from diffusers.utils import ( + floats_tensor, + load_hf_numpy, + logging, + require_torch_gpu, + slow, + torch_all_close, + torch_device, +) from parameterized import parameterized from ..test_modeling_common import ModelTesterMixin @@ -423,7 +431,7 @@ def tearDown(self): def get_latents(self, seed=0, shape=(4, 4, 64, 64), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return image def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"): @@ -439,7 +447,7 @@ def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"): def get_encoder_hidden_states(self, seed=0, shape=(4, 77, 768), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - hidden_states = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + hidden_states = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return hidden_states @parameterized.expand( diff --git a/tests/models/test_models_vae.py b/tests/models/test_models_vae.py index f6333d6cd906..3da7b50e34f3 100644 --- a/tests/models/test_models_vae.py +++ b/tests/models/test_models_vae.py @@ -20,7 +20,7 @@ from diffusers import AutoencoderKL from diffusers.modeling_utils import ModelMixin -from diffusers.utils import floats_tensor, load_numpy, require_torch_gpu, slow, torch_all_close, torch_device +from diffusers.utils import floats_tensor, load_hf_numpy, require_torch_gpu, slow, torch_all_close, torch_device from parameterized import parameterized from ..test_modeling_common import ModelTesterMixin @@ -147,7 +147,7 @@ def tearDown(self): def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False): dtype = torch.float16 if fp16 else torch.float32 - image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) + image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype) return image def get_sd_vae_model(self, model_id="CompVis/stable-diffusion-v1-4", fp16=False): diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py index 0a373ada68bc..f5a8b3cf9ecc 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py @@ -28,7 +28,7 @@ UNet2DModel, VQModel, ) -from diffusers.utils import floats_tensor, load_image, slow, torch_device +from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device from diffusers.utils.testing_utils import require_torch_gpu from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -278,11 +278,10 @@ def test_stable_diffusion_inpaint_pipeline(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -307,7 +306,7 @@ def test_stable_diffusion_inpaint_pipeline(self): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_pipeline_fp16(self): init_image = load_image( @@ -318,11 +317,10 @@ def test_stable_diffusion_inpaint_pipeline_fp16(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench_fp16.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench_fp16.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -360,11 +358,10 @@ def test_stable_diffusion_inpaint_pipeline_pndm(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/yellow_cat_sitting_on_a_park_bench_pndm.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "runwayml/stable-diffusion-inpainting" pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler") @@ -388,4 +385,4 @@ def test_stable_diffusion_inpaint_pipeline_pndm(self): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 diff --git a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py index d25342a35aea..81deba67f274 100644 --- a/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py +++ b/tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint_legacy.py @@ -31,7 +31,7 @@ VQModel, ) from diffusers.utils import floats_tensor, load_image, slow, torch_device -from diffusers.utils.testing_utils import require_torch_gpu +from diffusers.utils.testing_utils import load_numpy, require_torch_gpu from PIL import Image from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer @@ -358,11 +358,10 @@ def test_stable_diffusion_inpaint_legacy_pipeline(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/red_cat_sitting_on_a_park_bench.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/red_cat_sitting_on_a_park_bench.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" pipe = StableDiffusionInpaintPipeline.from_pretrained( @@ -389,7 +388,7 @@ def test_stable_diffusion_inpaint_legacy_pipeline(self): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self): # TODO(Anton, Patrick) - I think we can remove this test soon @@ -401,11 +400,10 @@ def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self): "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" "/in_paint/overture-creations-5sI6fQgYIuo_mask.png" ) - expected_image = load_image( - "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main" - "/in_paint/red_cat_sitting_on_a_park_bench_k_lms.png" + expected_image = load_numpy( + "https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint" + "/red_cat_sitting_on_a_park_bench_k_lms.npy" ) - expected_image = np.array(expected_image, dtype=np.float32) / 255.0 model_id = "CompVis/stable-diffusion-v1-4" lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler") @@ -434,7 +432,7 @@ def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self): image = output.images[0] assert image.shape == (512, 512, 3) - assert np.abs(expected_image - image).max() < 1e-2 + assert np.abs(expected_image - image).max() < 1e-3 def test_stable_diffusion_inpaint_legacy_intermediate_state(self): number_of_steps = 0