Skip to content

Commit

Permalink
fix OOM
Browse files Browse the repository at this point in the history
  • Loading branch information
AllentDan committed Jan 7, 2025
1 parent 8952e80 commit fd40edb
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
4 changes: 4 additions & 0 deletions lmdeploy/lite/apis/smooth_quant.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,8 @@ def smooth_quant(model: str,
parent = model.get_submodule(parent_name)
setattr(parent, child_name, q_linear)
linear.to('cpu')
q_linear.to('cpu')
torch.cuda.empty_cache()

for name, norm in rmsnorms.items():
if skipped_module(name):
Expand All @@ -99,6 +101,8 @@ def smooth_quant(model: str,
parent = model.get_submodule(parent_name)
setattr(parent, child_name, q_norm)
norm.to('cpu')
q_linear.to('cpu')
torch.cuda.empty_cache()

if vl_model:
from .auto_awq import save_vl_model
Expand Down
11 changes: 9 additions & 2 deletions lmdeploy/lite/quantization/awq.py
Original file line number Diff line number Diff line change
Expand Up @@ -325,6 +325,7 @@ def quant_weights(model, fcs, bits, symmetry, group_size=-1, device='cuda'):
scales, zeros))
setattr(parent, child_name, q_linear)
fc.to('cpu')
torch.cuda.empty_cache()

print(f'{name} weight {pack_or_skip}.')

Expand Down Expand Up @@ -366,7 +367,10 @@ def smooth_layers(layers,
smooth_fc_fcs(fc, fcs, a_scales[a_name], group_size)

layer.to('cpu')
print(f'{l_name} smooth weight done.')
torch.cuda.empty_cache()
max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
print(f'{l_name} smooth weight done.'
f' max gpu memory: {max_memory:.2f} GB')


def pseudo_quantize_tensor(w,
Expand Down Expand Up @@ -434,4 +438,7 @@ def awq_layers(layers,
smooth_fc_fcs(fc, fcs, a_scales[a_name], group_size, ratio)

layer.to('cpu')
print(f'{l_name} smooth weight done.')
torch.cuda.empty_cache()
max_memory = torch.cuda.max_memory_allocated() / 1024 / 1024 / 1024
print(f'{l_name} smooth weight done.'
f' max gpu memory: {max_memory:.2f} GB')

0 comments on commit fd40edb

Please sign in to comment.