Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Introduce the copy mechanism #924

Merged
merged 6 commits into from
Oct 20, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions .github/workflows/pr_quality.yml
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,20 @@ jobs:
isort --check-only examples tests src utils scripts
flake8 examples tests src utils scripts
doc-builder style src/diffusers docs/source --max_len 119 --check_only --path_to_docs docs/source

check_repository_consistency:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v3
- name: Set up Python
uses: actions/setup-python@v4
with:
python-version: "3.7"
- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install .[quality]
- name: Check quality
run: |
python utils/check_copies.py
python utils/check_dummies.py
Comment on lines +47 to +50
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Finally some PR tests for both the dummies and the copies!

1 change: 1 addition & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ fixup: modified_only_fixup extra_style_checks autogenerate_code repo-consistency
# Make marked copies of snippets of codes conform to the original

fix-copies:
python utils/check_copies.py --fix_and_overwrite
python utils/check_dummies.py --fix_and_overwrite

# Run tests for the library
Expand Down
1 change: 1 addition & 0 deletions src/diffusers/schedulers/scheduling_ddim.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@


@dataclass
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->DDIM
class DDIMSchedulerOutput(BaseOutput):
Comment on lines +31 to 32
Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Quick demo here as well :)

"""
Output class for the scheduler's step function output.
Expand Down
1 change: 1 addition & 0 deletions src/diffusers/schedulers/scheduling_lms_discrete.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@


@dataclass
# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->LMSDiscrete
class LMSDiscreteSchedulerOutput(BaseOutput):
"""
Output class for the scheduler's step function output.
Expand Down
120 changes: 120 additions & 0 deletions tests/repo_utils/test_check_copies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import re
import shutil
import sys
import tempfile
import unittest

import black


git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
sys.path.append(os.path.join(git_repo_path, "utils"))

import check_copies # noqa: E402


# This is the reference code that will be used in the tests.
# If DDPMSchedulerOutput is changed in scheduling_ddpm.py, this code needs to be manually updated.
REFERENCE_CODE = """ \"""
Output class for the scheduler's step function output.

Args:
prev_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
Computed sample (x_{t-1}) of previous timestep. `prev_sample` should be used as next model input in the
denoising loop.
pred_original_sample (`torch.FloatTensor` of shape `(batch_size, num_channels, height, width)` for images):
The predicted denoised sample (x_{0}) based on the model output from the current timestep.
`pred_original_sample` can be used to preview progress or for guidance.
\"""

prev_sample: torch.FloatTensor
pred_original_sample: Optional[torch.FloatTensor] = None
"""


class CopyCheckTester(unittest.TestCase):
def setUp(self):
self.diffusers_dir = tempfile.mkdtemp()
os.makedirs(os.path.join(self.diffusers_dir, "schedulers/"))
check_copies.DIFFUSERS_PATH = self.diffusers_dir
shutil.copy(
os.path.join(git_repo_path, "src/diffusers/schedulers/scheduling_ddpm.py"),
os.path.join(self.diffusers_dir, "schedulers/scheduling_ddpm.py"),
)

def tearDown(self):
check_copies.DIFFUSERS_PATH = "src/diffusers"
shutil.rmtree(self.diffusers_dir)

def check_copy_consistency(self, comment, class_name, class_code, overwrite_result=None):
code = comment + f"\nclass {class_name}(nn.Module):\n" + class_code
if overwrite_result is not None:
expected = comment + f"\nclass {class_name}(nn.Module):\n" + overwrite_result
mode = black.Mode(target_versions={black.TargetVersion.PY35}, line_length=119)
code = black.format_str(code, mode=mode)
fname = os.path.join(self.diffusers_dir, "new_code.py")
with open(fname, "w", newline="\n") as f:
f.write(code)
if overwrite_result is None:
self.assertTrue(len(check_copies.is_copy_consistent(fname)) == 0)
else:
check_copies.is_copy_consistent(f.name, overwrite=True)
with open(fname, "r") as f:
self.assertTrue(f.read(), expected)

def test_find_code_in_diffusers(self):
code = check_copies.find_code_in_diffusers("schedulers.scheduling_ddpm.DDPMSchedulerOutput")
self.assertEqual(code, REFERENCE_CODE)

def test_is_copy_consistent(self):
# Base copy consistency
self.check_copy_consistency(
"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput",
"DDPMSchedulerOutput",
REFERENCE_CODE + "\n",
)

# With no empty line at the end
self.check_copy_consistency(
"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput",
"DDPMSchedulerOutput",
REFERENCE_CODE,
)

# Copy consistency with rename
self.check_copy_consistency(
"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->Test",
"TestSchedulerOutput",
re.sub("DDPM", "Test", REFERENCE_CODE),
)

# Copy consistency with a really long name
long_class_name = "TestClassWithAReallyLongNameBecauseSomePeopleLikeThatForSomeReason"
self.check_copy_consistency(
f"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->{long_class_name}",
f"{long_class_name}SchedulerOutput",
re.sub("Bert", long_class_name, REFERENCE_CODE),
)

# Copy consistency with overwrite
self.check_copy_consistency(
"# Copied from diffusers.schedulers.scheduling_ddpm.DDPMSchedulerOutput with DDPM->Test",
"TestSchedulerOutput",
REFERENCE_CODE,
overwrite_result=re.sub("DDPM", "Test", REFERENCE_CODE),
)
124 changes: 124 additions & 0 deletions tests/repo_utils/test_check_dummies.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,124 @@
# Copyright 2022 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
import sys
import unittest


git_repo_path = os.path.abspath(os.path.dirname(os.path.dirname(os.path.dirname(__file__))))
sys.path.append(os.path.join(git_repo_path, "utils"))

import check_dummies
from check_dummies import create_dummy_files, create_dummy_object, find_backend, read_init # noqa: E402


# Align TRANSFORMERS_PATH in check_dummies with the current path
check_dummies.PATH_TO_DIFFUSERS = os.path.join(git_repo_path, "src", "diffusers")


class CheckDummiesTester(unittest.TestCase):
def test_find_backend(self):
simple_backend = find_backend(" if not is_torch_available():")
self.assertEqual(simple_backend, "torch")

# backend_with_underscore = find_backend(" if not is_tensorflow_text_available():")
# self.assertEqual(backend_with_underscore, "tensorflow_text")

double_backend = find_backend(" if not (is_torch_available() and is_transformers_available()):")
self.assertEqual(double_backend, "torch_and_transformers")

# double_backend_with_underscore = find_backend(
# " if not (is_sentencepiece_available() and is_tensorflow_text_available()):"
# )
# self.assertEqual(double_backend_with_underscore, "sentencepiece_and_tensorflow_text")

triple_backend = find_backend(
" if not (is_torch_available() and is_transformers_available() and is_onnx_available()):"
)
self.assertEqual(triple_backend, "torch_and_transformers_and_onnx")

def test_read_init(self):
objects = read_init()
# We don't assert on the exact list of keys to allow for smooth grow of backend-specific objects
self.assertIn("torch", objects)
self.assertIn("torch_and_transformers", objects)
self.assertIn("flax_and_transformers", objects)
self.assertIn("torch_and_transformers_and_onnx", objects)

# Likewise, we can't assert on the exact content of a key
self.assertIn("UNet2DModel", objects["torch"])
self.assertIn("FlaxUNet2DConditionModel", objects["flax"])
self.assertIn("StableDiffusionPipeline", objects["torch_and_transformers"])
self.assertIn("FlaxStableDiffusionPipeline", objects["flax_and_transformers"])
self.assertIn("LMSDiscreteScheduler", objects["torch_and_scipy"])
self.assertIn("OnnxStableDiffusionPipeline", objects["torch_and_transformers_and_onnx"])

def test_create_dummy_object(self):
dummy_constant = create_dummy_object("CONSTANT", "'torch'")
self.assertEqual(dummy_constant, "\nCONSTANT = None\n")

dummy_function = create_dummy_object("function", "'torch'")
self.assertEqual(
dummy_function, "\ndef function(*args, **kwargs):\n requires_backends(function, 'torch')\n"
)

expected_dummy_class = """
class FakeClass(metaclass=DummyObject):
_backends = 'torch'

def __init__(self, *args, **kwargs):
requires_backends(self, 'torch')

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, 'torch')

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, 'torch')
"""
dummy_class = create_dummy_object("FakeClass", "'torch'")
self.assertEqual(dummy_class, expected_dummy_class)

def test_create_dummy_files(self):
expected_dummy_pytorch_file = """# This file is autogenerated by the command `make fix-copies`, do not edit.
# flake8: noqa

from ..utils import DummyObject, requires_backends


CONSTANT = None


def function(*args, **kwargs):
requires_backends(function, ["torch"])


class FakeClass(metaclass=DummyObject):
_backends = ["torch"]

def __init__(self, *args, **kwargs):
requires_backends(self, ["torch"])

@classmethod
def from_config(cls, *args, **kwargs):
requires_backends(cls, ["torch"])

@classmethod
def from_pretrained(cls, *args, **kwargs):
requires_backends(cls, ["torch"])
"""
dummy_files = create_dummy_files({"torch": ["CONSTANT", "function", "FakeClass"]})
self.assertEqual(dummy_files["torch"], expected_dummy_pytorch_file)
Loading