Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update transformers tests generation util v4.45.2 #1441

Merged
merged 23 commits into from
Dec 11, 2024
Merged
Show file tree
Hide file tree
Changes from 20 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
141 changes: 140 additions & 1 deletion conftest.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,88 @@
# coding=utf-8
# Copyright 2020 The HuggingFace Team. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# tests directory-specific settings - this file is run automatically
# by pytest before any tests are run
import doctest
import sys
import warnings
from os.path import abspath, dirname, join

import _pytest
import pytest
from transformers.testing_utils import HfDoctestModule, HfDocTestParser


NOT_DEVICE_TESTS = {
"test_tokenization",
"test_processor",
"test_processing",
"test_beam_constraints",
"test_configuration_utils",
"test_data_collator",
"test_trainer_callback",
"test_trainer_utils",
"test_feature_extraction",
"test_image_processing",
"test_image_processor",
"test_image_transforms",
"test_optimization",
"test_retrieval",
"test_config",
"test_from_pretrained_no_checkpoint",
"test_keep_in_fp32_modules",
"test_gradient_checkpointing_backward_compatibility",
"test_gradient_checkpointing_enable_disable",
"test_save_load_fast_init_from_base",
"test_fast_init_context_manager",
"test_fast_init_tied_embeddings",
"test_save_load_fast_init_to_base",
"test_torch_save_load",
"test_initialization",
"test_forward_signature",
"test_model_get_set_embeddings",
"test_model_main_input_name",
"test_correct_missing_keys",
"test_tie_model_weights",
"test_can_use_safetensors",
"test_load_save_without_tied_weights",
"test_tied_weights_keys",
"test_model_weights_reload_no_missing_tied_weights",
"test_pt_tf_model_equivalence",
"test_mismatched_shapes_have_properly_initialized_weights",
"test_matched_shapes_have_loaded_weights_when_some_mismatched_shapes_exist",
"test_model_is_small",
"test_tf_from_pt_safetensors",
"test_flax_from_pt_safetensors",
"ModelTest::test_pipeline_", # None of the pipeline tests from PipelineTesterMixin (of which XxxModelTest inherits from) are running on device
"ModelTester::test_pipeline_",
"/repo_utils/",
"/utils/",
"/agents/",
}

# allow having multiple repository checkouts and not needing to remember to rerun
# `pip install -e '.[dev]'` when switching between checkouts and running tests.
git_repo_path = abspath(join(dirname(__file__), "src"))
sys.path.insert(1, git_repo_path)

# silence FutureWarning warnings in tests since often we can't act on them until
# they become normal warnings - i.e. the tests still need to test the current functionality
warnings.simplefilter(action="ignore", category=FutureWarning)


class Secret:
"""
Taken from: https://stackoverflow.com/a/67393351
Expand All @@ -13,8 +98,44 @@ def __str___(self):
return "*******"


def pytest_configure(config):
config.addinivalue_line(
"markers", "is_pt_tf_cross_test: mark test to run only when PT and TF interactions are tested"
)
config.addinivalue_line(
"markers", "is_pt_flax_cross_test: mark test to run only when PT and FLAX interactions are tested"
)
config.addinivalue_line("markers", "is_pipeline_test: mark test to run only when pipelines are tested")
config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
config.addinivalue_line("markers", "agent_tests: mark the agent tests that are run on their specific schedule")
config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")


def pytest_collection_modifyitems(items):
for item in items:
if any(test_name in item.nodeid for test_name in NOT_DEVICE_TESTS):
item.add_marker(pytest.mark.not_device_test)


def pytest_addoption(parser):
parser.addoption("--token", action="store", default=None)
from transformers.testing_utils import pytest_addoption_shared

pytest_addoption_shared(parser)


def pytest_terminal_summary(terminalreporter):
from transformers.testing_utils import pytest_terminal_summary_main

make_reports = terminalreporter.config.getoption("--make-reports")
if make_reports:
pytest_terminal_summary_main(terminalreporter, id=make_reports)


def pytest_sessionfinish(session, exitstatus):
# If no tests are collected, pytest exists with code 5, which makes the CI fail.
if exitstatus == 5:
session.exitstatus = 0


def pytest_generate_tests(metafunc):
Expand All @@ -23,3 +144,21 @@ def pytest_generate_tests(metafunc):
option_value = Secret(metafunc.config.option.token)
if "token" in metafunc.fixturenames:
metafunc.parametrize("token", [option_value])


# Doctest custom flag to ignore output.
IGNORE_RESULT = doctest.register_optionflag("IGNORE_RESULT")

OutputChecker = doctest.OutputChecker


class CustomOutputChecker(OutputChecker):
def check_output(self, want, got, optionflags):
if IGNORE_RESULT & optionflags:
return True
return OutputChecker.check_output(self, want, got, optionflags)


doctest.OutputChecker = CustomOutputChecker
_pytest.doctest.DoctestModule = HfDoctestModule
doctest.DocTestParser = HfDocTestParser
29 changes: 17 additions & 12 deletions optimum/habana/transformers/generation/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -199,19 +199,20 @@ def _prepare_decoder_input_ids_for_generation(
# 2. `decoder_start_token_id` must have shape (batch_size, 1)
if device is None:
device = self.device
if token_idx is None:
if decoder_start_token_id.ndim == 1:
if decoder_start_token_id.shape[0] != batch_size:
raise ValueError(
f"`decoder_start_token_id` expected to have length {batch_size} but got {decoder_start_token_id.shape[0]}"
)
decoder_start_token_id = decoder_start_token_id.view(-1, 1)
else:
decoder_start_token_id = (
torch.ones((batch_size, 1), dtype=torch.long, device=device) * decoder_start_token_id
if decoder_start_token_id.ndim == 1:
if decoder_start_token_id.shape[0] != batch_size:
raise ValueError(
f"`decoder_start_token_id` expected to have length {batch_size} but got {decoder_start_token_id.shape[0]}"
)
decoder_start_token_id = decoder_start_token_id.view(-1, 1)
else:
# creating padded decoder_input_ids to achieve static shapes. Later new tokens once generated are copied in to decoder_input_ids based on token_idx
decoder_start_token_id = (
torch.ones((batch_size, 1), dtype=torch.long, device=device) * decoder_start_token_id
)

if token_idx is not None:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@malkomes this is a bit confusing! Are you repeating the same ops again here? Why did you remove the if condition in the first place if you want to repeat the same operation again

            decoder_start_token_id = (
                torch.ones((batch_size, 1), dtype=torch.long, device=device) * decoder_start_token_id
            )

Copy link
Contributor

@yafshar yafshar Oct 25, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It should only have the max_length and padding when token_idx is not None

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Well, here we are adding a logic to handle the case where we have a batch of tokens on the decoder_start_token_id. And, indeed. there different ways to implement this. In fact, in the other PR I did something a little bit "more cute" where I handle the decoder_start_token_id batch case and the static shape with more code changes using different variables names and etc.

This time I realize that it's much easier to maintain the code if we keep the code similar to the transformers code. It so happens that the max_length padding works for batch and non-batch if we do it afterwards.

I was hoping that my comments here would explain it: malkomes#2 let me know if that helps or not. And maybe we should add notes on the code since it is a little bit confusing.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

thanks for the explanation, I agree for readability and maintainability, your separation makes sense!

# creating padded decoder_input_ids to achieve static shapes.
# Later new tokens once generated are copied in to decoder_input_ids based on token_idx
max_length = max_new_tokens + 1 if max_new_tokens is not None else self.generation_config.max_length
decoder_start_token_id = (
torch.ones((batch_size, 1), dtype=torch.long, device=device) * decoder_start_token_id
Expand Down Expand Up @@ -2952,7 +2953,8 @@ def expand_if_needed(tensor, new_size, value, dim=-1):
if self.generation_config.early_stopping:
num_eos_tokens.add_(beam_tokens[0:num_beams].eq(self.config.eos_token_id).sum())

beam_scores.add_(torch.where(beam_tokens.eq(self.config.eos_token_id), float("-inf"), 0.0))
if self.config.eos_token_id is not None:
beam_scores.add_(torch.where(beam_tokens.eq(self.config.eos_token_id), float("-inf"), 0.0))
beam_scores = beam_scores.view(batch_size, -1).unsqueeze(0)
_, selected = torch.topk(beam_scores, k=num_beams, dim=-1, largest=True, sorted=True)
offset = torch.arange(0, torch.numel(beam_scores), beam_scores.shape[-1]).unsqueeze(-1)
Expand Down Expand Up @@ -3089,6 +3091,9 @@ def move(obj, device):
if not output_scores:
sequence_outputs["sequence_scores"] = None

if self.generation_config.static_shapes:
raise NotImplementedError("sequence_scores is not implemented for static_shapes")

if self.config.is_encoder_decoder:
return GenerateBeamEncoderDecoderOutput(
sequences=sequence_outputs["sequences"],
Expand Down
4 changes: 3 additions & 1 deletion optimum/habana/transformers/models/bart/modeling_bart.py
Original file line number Diff line number Diff line change
Expand Up @@ -458,7 +458,9 @@ def gaudi_BartDecoder_forward(

# past_key_values_length
past_key_values_length = past_key_values[0][0].shape[2] if past_key_values is not None else 0
tensor_past_key_values_length = token_idx - 1 if use_cache else torch.tensor(past_key_values_length)
tensor_past_key_values_length = (
token_idx - 1 if (use_cache and token_idx is not None) else torch.tensor(past_key_values_length)
)

if inputs_embeds is None:
inputs_embeds = self.embed_tokens(input)
Expand Down
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -41,3 +41,12 @@ skip-magic-trailing-comma = false

# Like Black, automatically detect the appropriate line ending.
line-ending = "auto"

[tool.pytest.ini_options]
addopts = "--doctest-glob='**/*.md'"
doctest_optionflags="NUMBER NORMALIZE_WHITESPACE ELLIPSIS"
markers = [
"flash_attn_test: marks tests related to flash attention (deselect with '-m \"not flash_attn_test\"')",
"bitsandbytes: select (or deselect with `not`) bitsandbytes integration tests",
"generate: marks tests that use the GenerationTesterMixin"
]
Loading