Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

rm gc collect in packing #438

Merged
merged 6 commits into from
Feb 13, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions auto_round/export/export_to_autogptq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.


# MIT License
#
# Copyright (c) 2023 潘其威(William)
Expand Down Expand Up @@ -117,7 +116,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
qlayer.to(device)
pbar.update(1)


def save_quantized_as_autogptq(output_dir, inplace=True, backend="auto_gptq:exllamav2",
**kwargs):
"""Export the model to autogptq format to easily leverage cuda kernel."""
Expand Down
5 changes: 1 addition & 4 deletions auto_round/export/export_to_autoround/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,15 +21,13 @@
import torch.nn as nn
import transformers

from auto_round.export.register import register_format
from auto_round.utils import get_layer_names_in_block, get_module, logger, set_module
import threadpoolctl as tctl
import inspect
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor
from auto_round.utils import get_autogptq_packing_qlinear


def check_neq_config(config, data_type, bits, group_size, sym):
"""
Checks if the provided configuration parameters are not equal to the values in the config dictionary.
Expand Down Expand Up @@ -133,7 +131,6 @@ def pack_layer(name, model, layer_config, backend, pbar):
qlayer.pack(layer, scale, zero, None)
qlayer.to(device)
else:
from ..export_to_awq.utils import clear_memory
scale, zp = layer_config[name]["scale"].to(torch.float32), layer_config[name]["zp"].to(torch.float32)
scale = scale.t().contiguous()
zp = zp.t().contiguous()
Expand All @@ -149,7 +146,7 @@ def pack_layer(name, model, layer_config, backend, pbar):
)
qlayer.to(device)
set_module(model, name, qlayer)
clear_memory()

pbar.update(1)


Expand Down
3 changes: 1 addition & 2 deletions auto_round/export/export_to_awq/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,12 +31,11 @@
extract_block_names_to_str)
import copy
import json
from .utils import WQLinear_GEMM, clear_memory
from .utils import WQLinear_GEMM
from concurrent.futures import ThreadPoolExecutor
import threadpoolctl as tctl
from tqdm import tqdm


def pack_layer(name, model, layer_config, backend, pbar):
with tctl.threadpool_limits(limits=1):
pbar.set_description(f"packing {name}")
Expand Down