fix 221 line in fast_model with q.bfloat16() #172

rezitdinovAR · 2024-06-08T21:37:25Z

File "/code/app/fam/llm/fast_inference.py", line 95, in init
self.model, self.tokenizer, self.smodel, self.model_size = build_model(
File "/code/app/fam/llm/fast_inference_utils.py", line 377, in build_model
y = generate(
File "/code/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 213, in generate
next_token = prefill(model, prompt.view(1, -1).repeat(2, 1), spk_emb, input_pos, **sampling_kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 125, in prefill
def prefill(
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 901, in forward
return compiled_fn(full_args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 81, in g
return f(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 94, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 105, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 118, in rng_functionalization_wrapper
return compiled_fw(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 864, in call
return self.get_current_callable()(inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 611, in run
return model(new_inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 892, in _run_from_cache
return compiled_graph.compiled_artifact(inputs)
File "/tmp/torchinductor_root/g3/cg3mdezu7x5r2zrzfflwlnu7c5fvdkrn6llc3crugk2alrhelevl.py", line 1212, in call
extern_kernels.mm(reinterpret_tensor(buf3, (2s0, 2048), (2048, 1), 0), reinterpret_tensor(arg52_1, (2048, 6144), (1, 2048), 0), out=buf4)
RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

fix 221 line in fast_model with q.bfloat16() #172

fix 221 line in fast_model with q.bfloat16() #172

rezitdinovAR commented Jun 8, 2024

fix 221 line in fast_model with q.bfloat16() #172

fix 221 line in fast_model with q.bfloat16() #172

Comments

rezitdinovAR commented Jun 8, 2024