You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
File "/code/app/fam/llm/fast_inference.py", line 95, in init
self.model, self.tokenizer, self.smodel, self.model_size = build_model(
File "/code/app/fam/llm/fast_inference_utils.py", line 377, in build_model
y = generate(
File "/code/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 213, in generate
next_token = prefill(model, prompt.view(1, -1).repeat(2, 1), spk_emb, input_pos, **sampling_kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 125, in prefill
def prefill(
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 901, in forward
return compiled_fn(full_args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 81, in g
return f(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 94, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 105, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 118, in rng_functionalization_wrapper
return compiled_fw(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 864, in call
return self.get_current_callable()(inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 611, in run
return model(new_inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 892, in _run_from_cache
return compiled_graph.compiled_artifact(inputs)
File "/tmp/torchinductor_root/g3/cg3mdezu7x5r2zrzfflwlnu7c5fvdkrn6llc3crugk2alrhelevl.py", line 1212, in call
extern_kernels.mm(reinterpret_tensor(buf3, (2s0, 2048), (2048, 1), 0), reinterpret_tensor(arg52_1, (2048, 6144), (1, 2048), 0), out=buf4)
RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half
The text was updated successfully, but these errors were encountered:
File "/code/app/fam/llm/fast_inference.py", line 95, in init
self.model, self.tokenizer, self.smodel, self.model_size = build_model(
File "/code/app/fam/llm/fast_inference_utils.py", line 377, in build_model
y = generate(
File "/code/.venv/lib/python3.10/site-packages/torch/utils/_contextlib.py", line 115, in decorate_context
return func(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 213, in generate
next_token = prefill(model, prompt.view(1, -1).repeat(2, 1), spk_emb, input_pos, **sampling_kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/app/fam/llm/fast_inference_utils.py", line 125, in prefill
def prefill(
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/eval_frame.py", line 489, in _fn
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_dynamo/external_utils.py", line 17, in inner
return fn(*args, **kwargs)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/aot_autograd.py", line 901, in forward
return compiled_fn(full_args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 81, in g
return f(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/runtime_wrappers.py", line 94, in runtime_wrapper
all_outs = call_func_at_runtime_with_args(
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/utils.py", line 105, in call_func_at_runtime_with_args
out = normalize_as_list(f(args))
File "/code/.venv/lib/python3.10/site-packages/torch/_functorch/_aot_autograd/jit_compile_runtime_wrappers.py", line 118, in rng_functionalization_wrapper
return compiled_fw(args)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 864, in call
return self.get_current_callable()(inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/compile_fx.py", line 611, in run
return model(new_inputs)
File "/code/.venv/lib/python3.10/site-packages/torch/_inductor/codecache.py", line 892, in _run_from_cache
return compiled_graph.compiled_artifact(inputs)
File "/tmp/torchinductor_root/g3/cg3mdezu7x5r2zrzfflwlnu7c5fvdkrn6llc3crugk2alrhelevl.py", line 1212, in call
extern_kernels.mm(reinterpret_tensor(buf3, (2s0, 2048), (2048, 1), 0), reinterpret_tensor(arg52_1, (2048, 6144), (1, 2048), 0), out=buf4)
RuntimeError: expected mat1 and mat2 to have the same dtype, but got: float != c10::Half
The text was updated successfully, but these errors were encountered: