You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
2024-08-05 10:03:15,594 xinference.api.restful_api 63143 ERROR [address=0.0.0.0:43027, pid=65671] shape '[-1, 6, 128, 4096]' is invalid for input of size 4096
Traceback (most recent call last):
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/api/restful_api.py", line 847, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 227, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 988, in launch_builtin_model
await _launch_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 952, in _launch_model
await _launch_one_model(rep_model_uid)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 932, in _launch_one_model
await worker_ref.launch_builtin_model(
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/utils.py", line 45, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/worker.py", line 841, in launch_builtin_model
await model_ref.load()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 227, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/model.py", line 295, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/model/llm/vllm/core.py", line 233, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 386, in from_engine_args
engine = cls(
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 340, in init
self.engine = self._init_engine(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 462, in _init_engine
return engine_class(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 222, in init
self.model_executor = executor_class(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 41, in init
self._init_executor()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/executor/gpu_executor.py", line 24, in _init_executor
self.driver_worker.load_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/worker/worker.py", line 121, in load_model
self.model_runner.load_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 134, in load_model
self.model = get_model(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/model_loader/init.py", line 21, in get_model
return loader.load_model(model_config=model_config,
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 243, in load_model
model.load_weights(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/models/internlm2.py", line 316, in load_weights
loaded_weight = loaded_weight.view(-1, 2 + kv_groups,
^^^^^^^^^^^^^^^^^
RuntimeError: [address=0.0.0.0:43027, pid=65671] shape '[-1, 6, 128, 4096]' is invalid for input of size 4096
Expected behavior / 期待表现
能运行
The text was updated successfully, but these errors were encountered:
System Info / 系統信息
ubuntu20.04
Running Xinference with Docker? / 是否使用 Docker 运行 Xinfernece?
Version info / 版本信息
0.13.2
The command used to start Xinference / 用以启动 xinference 的命令
XINFERENCE_MODEL_SRC=modelscope xinference-local --host 0.0.0.0 --port 9997
Reproduction / 复现过程
2024-08-05 10:03:15,594 xinference.api.restful_api 63143 ERROR [address=0.0.0.0:43027, pid=65671] shape '[-1, 6, 128, 4096]' is invalid for input of size 4096
Traceback (most recent call last):
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/api/restful_api.py", line 847, in launch_model
model_uid = await (await self._get_supervisor_ref()).launch_builtin_model(
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 227, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 988, in launch_builtin_model
await _launch_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 952, in _launch_model
await _launch_one_model(rep_model_uid)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/supervisor.py", line 932, in _launch_one_model
await worker_ref.launch_builtin_model(
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 284, in __pyx_actor_method_wrapper
async with lock:
File "xoscar/core.pyx", line 287, in xoscar.core.__pyx_actor_method_wrapper
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/utils.py", line 45, in wrapped
ret = await func(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/worker.py", line 841, in launch_builtin_model
await model_ref.load()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 227, in send
return self._process_result_message(result)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/context.py", line 102, in _process_result_message
raise message.as_instanceof_cause()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 659, in send
result = await self._run_coro(message.message_id, coro)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/backends/pool.py", line 370, in _run_coro
return await coro
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xoscar/api.py", line 384, in on_receive
return await super().on_receive(message) # type: ignore
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 558, in on_receive
raise ex
File "xoscar/core.pyx", line 520, in xoscar.core._BaseActor.on_receive
async with self._lock:
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 521, in xoscar.core._BaseActor.on_receive
with debug_async_timeout('actor_lock_timeout',
^^^^^^^^^^^^^^^^^
File "xoscar/core.pyx", line 526, in xoscar.core._BaseActor.on_receive
result = await result
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/core/model.py", line 295, in load
self._model.load()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/xinference/model/llm/vllm/core.py", line 233, in load
self._engine = AsyncLLMEngine.from_engine_args(engine_args)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 386, in from_engine_args
engine = cls(
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 340, in init
self.engine = self._init_engine(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/async_llm_engine.py", line 462, in _init_engine
return engine_class(*args, **kwargs)
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/engine/llm_engine.py", line 222, in init
self.model_executor = executor_class(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/executor/executor_base.py", line 41, in init
self._init_executor()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/executor/gpu_executor.py", line 24, in _init_executor
self.driver_worker.load_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/worker/worker.py", line 121, in load_model
self.model_runner.load_model()
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/worker/model_runner.py", line 134, in load_model
self.model = get_model(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/model_loader/init.py", line 21, in get_model
return loader.load_model(model_config=model_config,
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/model_loader/loader.py", line 243, in load_model
model.load_weights(
^^^^^^^^^^^^^^^^^
File "/root/miniconda3/envs/xinference/lib/python3.11/site-packages/vllm/model_executor/models/internlm2.py", line 316, in load_weights
loaded_weight = loaded_weight.view(-1, 2 + kv_groups,
^^^^^^^^^^^^^^^^^
RuntimeError: [address=0.0.0.0:43027, pid=65671] shape '[-1, 6, 128, 4096]' is invalid for input of size 4096
Expected behavior / 期待表现
能运行
The text was updated successfully, but these errors were encountered: