Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Integration tests precision improvement for inpainting #1052

Merged
merged 8 commits into from
Nov 2, 2022
1 change: 1 addition & 0 deletions src/diffusers/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
if is_torch_available():
from .testing_utils import (
floats_tensor,
load_hf_numpy,
load_image,
load_numpy,
parse_flag_from_env,
Expand Down
31 changes: 25 additions & 6 deletions src/diffusers/utils/testing_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,29 @@ def require_onnxruntime(test_case):
return unittest.skipUnless(is_onnx_available(), "test requires onnxruntime")(test_case)


def load_numpy(arry: Union[str, np.ndarray]) -> np.ndarray:
if isinstance(arry, str):
if arry.startswith("http://") or arry.startswith("https://"):
response = requests.get(arry)
response.raise_for_status()
arry = np.load(BytesIO(response.content))
elif os.path.isfile(arry):
arry = np.load(arry)
else:
raise ValueError(
f"Incorrect path or url, URLs must start with `http://` or `https://`, and {arry} is not a valid path"
)
elif isinstance(arry, np.ndarray):
pass
else:
raise ValueError(
"Incorrect format used for numpy ndarray. Should be an url linking to an image, a local path, or a"
" ndarray."
)

return arry


def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
"""
Args:
Expand Down Expand Up @@ -168,17 +191,13 @@ def load_image(image: Union[str, PIL.Image.Image]) -> PIL.Image.Image:
return image


def load_numpy(path) -> np.ndarray:
def load_hf_numpy(path) -> np.ndarray:
if not path.startswith("http://") or path.startswith("https://"):
path = os.path.join(
"https://huggingface.co/datasets/fusing/diffusers-testing/resolve/main", urllib.parse.quote(path)
)

response = requests.get(path)
response.raise_for_status()
array = np.load(BytesIO(response.content))

return array
return load_numpy(path)


# --- pytest conf functions --- #
Expand Down
14 changes: 11 additions & 3 deletions tests/models/test_models_unet_2d.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@
import torch

from diffusers import UNet2DConditionModel, UNet2DModel
from diffusers.utils import floats_tensor, load_numpy, logging, require_torch_gpu, slow, torch_all_close, torch_device
from diffusers.utils import (
floats_tensor,
load_hf_numpy,
logging,
require_torch_gpu,
slow,
torch_all_close,
torch_device,
)
from parameterized import parameterized

from ..test_modeling_common import ModelTesterMixin
Expand Down Expand Up @@ -423,7 +431,7 @@ def tearDown(self):

def get_latents(self, seed=0, shape=(4, 4, 64, 64), fp16=False):
dtype = torch.float16 if fp16 else torch.float32
image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
return image

def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"):
Expand All @@ -439,7 +447,7 @@ def get_unet_model(self, fp16=False, model_id="CompVis/stable-diffusion-v1-4"):

def get_encoder_hidden_states(self, seed=0, shape=(4, 77, 768), fp16=False):
dtype = torch.float16 if fp16 else torch.float32
hidden_states = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
hidden_states = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
return hidden_states

@parameterized.expand(
Expand Down
4 changes: 2 additions & 2 deletions tests/models/test_models_vae.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@

from diffusers import AutoencoderKL
from diffusers.modeling_utils import ModelMixin
from diffusers.utils import floats_tensor, load_numpy, require_torch_gpu, slow, torch_all_close, torch_device
from diffusers.utils import floats_tensor, load_hf_numpy, require_torch_gpu, slow, torch_all_close, torch_device
from parameterized import parameterized

from ..test_modeling_common import ModelTesterMixin
Expand Down Expand Up @@ -147,7 +147,7 @@ def tearDown(self):

def get_sd_image(self, seed=0, shape=(4, 3, 512, 512), fp16=False):
dtype = torch.float16 if fp16 else torch.float32
image = torch.from_numpy(load_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
image = torch.from_numpy(load_hf_numpy(self.get_file_format(seed, shape))).to(torch_device).to(dtype)
return image

def get_sd_vae_model(self, model_id="CompVis/stable-diffusion-v1-4", fp16=False):
Expand Down
27 changes: 12 additions & 15 deletions tests/pipelines/stable_diffusion/test_stable_diffusion_inpaint.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
UNet2DModel,
VQModel,
)
from diffusers.utils import floats_tensor, load_image, slow, torch_device
from diffusers.utils import floats_tensor, load_image, load_numpy, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu
from PIL import Image
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer
Expand Down Expand Up @@ -278,11 +278,10 @@ def test_stable_diffusion_inpaint_pipeline(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/yellow_cat_sitting_on_a_park_bench.png"
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
"/yellow_cat_sitting_on_a_park_bench.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "runwayml/stable-diffusion-inpainting"
pipe = StableDiffusionInpaintPipeline.from_pretrained(
Expand All @@ -307,7 +306,7 @@ def test_stable_diffusion_inpaint_pipeline(self):
image = output.images[0]

assert image.shape == (512, 512, 3)
assert np.abs(expected_image - image).max() < 1e-2
assert np.abs(expected_image - image).max() < 1e-3

def test_stable_diffusion_inpaint_pipeline_fp16(self):
init_image = load_image(
Expand All @@ -318,11 +317,10 @@ def test_stable_diffusion_inpaint_pipeline_fp16(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/yellow_cat_sitting_on_a_park_bench_fp16.png"
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
"/yellow_cat_sitting_on_a_park_bench_fp16.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "runwayml/stable-diffusion-inpainting"
pipe = StableDiffusionInpaintPipeline.from_pretrained(
Expand Down Expand Up @@ -360,11 +358,10 @@ def test_stable_diffusion_inpaint_pipeline_pndm(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/yellow_cat_sitting_on_a_park_bench_pndm.png"
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
"/yellow_cat_sitting_on_a_park_bench_pndm.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "runwayml/stable-diffusion-inpainting"
pndm = PNDMScheduler.from_config(model_id, subfolder="scheduler")
Expand All @@ -388,4 +385,4 @@ def test_stable_diffusion_inpaint_pipeline_pndm(self):
image = output.images[0]

assert image.shape == (512, 512, 3)
assert np.abs(expected_image - image).max() < 1e-2
assert np.abs(expected_image - image).max() < 1e-3
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
VQModel,
)
from diffusers.utils import floats_tensor, load_image, slow, torch_device
from diffusers.utils.testing_utils import require_torch_gpu
from diffusers.utils.testing_utils import load_numpy, require_torch_gpu
from PIL import Image
from transformers import CLIPTextConfig, CLIPTextModel, CLIPTokenizer

Expand Down Expand Up @@ -358,11 +358,10 @@ def test_stable_diffusion_inpaint_legacy_pipeline(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/red_cat_sitting_on_a_park_bench.png"
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
"/red_cat_sitting_on_a_park_bench.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "CompVis/stable-diffusion-v1-4"
pipe = StableDiffusionInpaintPipeline.from_pretrained(
Expand All @@ -389,7 +388,7 @@ def test_stable_diffusion_inpaint_legacy_pipeline(self):
image = output.images[0]

assert image.shape == (512, 512, 3)
assert np.abs(expected_image - image).max() < 1e-2
assert np.abs(expected_image - image).max() < 1e-3

def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self):
# TODO(Anton, Patrick) - I think we can remove this test soon
Expand All @@ -401,11 +400,10 @@ def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self):
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/overture-creations-5sI6fQgYIuo_mask.png"
)
expected_image = load_image(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main"
"/in_paint/red_cat_sitting_on_a_park_bench_k_lms.png"
expected_image = load_numpy(
"https://huggingface.co/datasets/hf-internal-testing/diffusers-images/resolve/main/in_paint"
"/red_cat_sitting_on_a_park_bench_k_lms.npy"
)
expected_image = np.array(expected_image, dtype=np.float32) / 255.0

model_id = "CompVis/stable-diffusion-v1-4"
lms = LMSDiscreteScheduler.from_config(model_id, subfolder="scheduler")
Expand Down Expand Up @@ -434,7 +432,7 @@ def test_stable_diffusion_inpaint_legacy_pipeline_k_lms(self):
image = output.images[0]

assert image.shape == (512, 512, 3)
assert np.abs(expected_image - image).max() < 1e-2
assert np.abs(expected_image - image).max() < 1e-3

def test_stable_diffusion_inpaint_legacy_intermediate_state(self):
number_of_steps = 0
Expand Down