-
Notifications
You must be signed in to change notification settings - Fork 453
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add tests for qwen + allow uninitialized weights in Llama model #8552
base: jz/export_qwen
Are you sure you want to change the base?
Changes from all commits
9258a68
1b7de2f
5422420
12d4073
955b991
9b5516b
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -236,14 +236,23 @@ def __init__(self, **kwargs): | |
eviction_batch_size=eviction_batch_size, | ||
) | ||
|
||
# assign=True: load params/buffers by assignment instead of performing an in-place copy. | ||
# Because we are using device="meta", tensors do not have memory associated with them | ||
# and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. | ||
missing, unexpected = self.model_.load_state_dict( | ||
checkpoint, | ||
strict=False, | ||
assign=True, | ||
) # self.model_ = Transformer(gptconf) | ||
missing, unexpected = None, None | ||
try: | ||
# assign=True: load params/buffers by assignment instead of performing an in-place copy. | ||
# Because we are using device="meta", tensors do not have memory associated with them | ||
# and an in-place copy is a no-op. Use assign=True in load_state_dict for this scenario. | ||
missing, unexpected = self.model_.load_state_dict( | ||
checkpoint, | ||
strict=False, | ||
assign=True, | ||
) # self.model_ = Transformer(gptconf) | ||
except RuntimeError as e: | ||
Comment on lines
+240
to
+249
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this needed? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. So it doesn't error out when loading |
||
print( | ||
"Could not load checkpoint into mode, defaulting to random uninitialized weights." | ||
) | ||
print(f"Error: {e}") | ||
# Need to provide concrete (empty) values for meta-initialized tensors for quantization. | ||
self.model_.to_empty(device="cpu") | ||
|
||
if missing: | ||
missing_weights = [fqn for fqn in missing if fqn.endswith(".weight")] | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
# This source code is licensed under the BSD-style license found in the | ||
# LICENSE file in the root directory of this source tree. | ||
|
||
from executorch.example.models.llama.model import Llama2Model | ||
|
||
|
||
class Qwen2_5Model(Llama2Model): | ||
def __init__(self, **kwargs): | ||
super().__init__(**kwargs) | ||
|
||
|
||
__all__ = [ | ||
"Qwen2_5Model", | ||
] |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sorry I accidentally deleted the original comment about ordering, but I was going to say that I think this is clearer to list all the llama models first