-
Notifications
You must be signed in to change notification settings - Fork 5.5k
Ollama with API token #15164
Replies: 1 comment · 16 replies
-
Hey @rsivanandan, I'm here to help you with any issues you encounter. I'm a bot designed to assist with bug fixes, answer questions, and help you become a contributor. Let me know how I can assist you with the RAG system using Ollama server. To use the 'user-id:api-key' bearer token in the Ollama LLM instantiation using LlamaIndex, you need to set the from llama_index.llms.oci_genai import OCIGenAI
llm = OCIGenAI(
model="MY_MODEL",
service_endpoint="https://inference.generativeai.us-chicago-1.oci.oraclecloud.com",
compartment_id="MY_OCID",
auth_type="API_KEY",
auth_profile="user-id:api-key", # replace with your actual user-id:api-key
)
resp = llm.complete("Paul Graham is ")
print(resp) This configuration sets up the OCIGenAI interface with the necessary authentication using the bearer token [1]. |
Beta Was this translation helpful? Give feedback.
All reactions
-
To resolve the from llama_index.llms.ollama import Ollama
from llama_index.llms.base import ChatMessage, ChatResponse, CompletionResponse, MessageRole
from typing import Sequence, Any
import httpx
from httpx import Timeout
class CustomOllama(Ollama):
def __init__(self, *args, **kwargs):
self.headers = kwargs.pop('headers', {})
super().__init__(*args, **kwargs)
def _get_client(self):
return httpx.Client(timeout=Timeout(self.request_timeout), headers=self.headers)
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
payload = {
"model": self.model,
"messages": [
{
"role": message.role.value,
"content": message.content,
**message.additional_kwargs,
}
for message in messages
],
"options": self._model_kwargs,
"stream": False,
**kwargs,
}
with self._get_client() as client:
response = client.post(
url=f"{self.base_url}/api/chat",
json=payload,
)
response.raise_for_status()
raw = response.json()
message = raw["message"]
return ChatResponse(
message=ChatMessage(
content=message.get("content"),
role=MessageRole(message.get("role")),
additional_kwargs=get_additional_kwargs(
message, ("content", "role")
),
),
raw=raw,
additional_kwargs=get_additional_kwargs(raw, ("message",)),
)
def stream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponseGen:
payload = {
"model": self.model,
"messages": [
{
"role": message.role.value,
"content": message.content,
**message.additional_kwargs,
}
for message in messages
],
"options": self._model_kwargs,
"stream": True,
**kwargs,
}
with self._get_client() as client:
with client.stream(
method="POST",
url=f"{self.base_url}/api/chat",
json=payload,
) as response:
response.raise_for_status()
text = ""
for line in response.iter_lines():
if line:
chunk = json.loads(line)
if "done" in chunk and chunk["done"]:
break
message = chunk["message"]
delta = message.get("content")
text += delta
yield ChatResponse(
message=ChatMessage(
content=text,
role=MessageRole(message.get("role")),
additional_kwargs=get_additional_kwargs(
message, ("content", "role")
),
),
delta=delta,
raw=chunk,
additional_kwargs=get_additional_kwargs(chunk, ("message",)),
)
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
payload = {
self.prompt_key: prompt,
"model": self.model,
"options": self._model_kwargs,
"stream": False,
**kwargs,
}
with self._get_client() as client:
response = client.post(
url=f"{self.base_url}/api/generate",
json=payload,
)
response.raise_for_status()
raw = response.json()
text = raw.get("response")
return CompletionResponse(
text=text,
raw=raw,
additional_kwargs=get_additional_kwargs(raw, ("response",)),
)
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
payload = {
self.prompt_key: prompt,
"model": self.model,
"options": self._model_kwargs,
"stream": True,
**kwargs,
}
with self._get_client() as client:
with client.stream(
method="POST",
url=f"{self.base_url}/api/generate",
json=payload,
) as response:
response.raise_for_status()
text = ""
for line in response.iter_lines():
if line:
chunk = json.loads(line)
delta = chunk.get("response")
text += delta
yield CompletionResponse(
delta=delta,
text=text,
raw=chunk,
additional_kwargs=get_additional_kwargs(
chunk, ("response",)
),
) This modification ensures that the headers, including the authorization key, are included in the HTTP requests, which should help resolve the |
Beta Was this translation helpful? Give feedback.
All reactions
-
@dosu, I finally got it to work with the following changes until llama_index 0.10.68. Now that I upgraded the llama_index version to 0.11.4, I am getting assertion error . what could be the issue 2024-09-04 10:41:09.024 Uncaught app exception The code is as below for CustomOllama class and how I instantiate it class CustomOllama():
@st.cache_resource(show_spinner=False) |
Beta Was this translation helpful? Give feedback.
All reactions
-
The To resolve this, you need to ensure that from llama_index.llms.ollama import Ollama
from llama_index.llms.base import ChatMessage, ChatResponse, CompletionResponse, MessageRole, LLM, LLMMetadata
from typing import Sequence, Any, Dict
import httpx
from httpx import Timeout
class CustomOllama(LLM):
"""Wrapper around Ollama to handle custom headers."""
def __init__(
self,
model: str,
request_timeout: float,
base_url: str,
headers: Dict[str, str] = None,
**kwargs,
):
self.headers = headers or {}
self.ollama = Ollama(
model=model, request_timeout=request_timeout, base_url=base_url, **kwargs
)
@property
def metadata(self) -> LLMMetadata:
DEFAULT_NUM_OUTPUTS = 30
"""LLM metadata."""
return LLMMetadata(
context_window=self.ollama.context_window,
num_output=DEFAULT_NUM_OUTPUTS,
model_name=self.ollama.model,
is_chat_model=True,
)
def _get_client(self):
return httpx.Client(
timeout=Timeout(self.ollama.request_timeout), headers=self.headers
)
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
payload = {
"model": self.ollama.model,
"messages": [
{
"role": message.role.value,
"content": message.content,
**message.additional_kwargs,
}
for message in messages
],
"options": self.ollama._model_kwargs,
"stream": False,
**kwargs,
}
if self.ollama.json_mode:
payload["format"] = "json"
with self._get_client() as client:
response = client.post(
url=f"{self.ollama.base_url}/api/chat",
json=payload,
)
response.raise_for_status()
raw = response.json()
message = raw["message"]
return ChatResponse(
message=ChatMessage(
content=message.get("content"),
role=MessageRole(message.get("role")),
additional_kwargs=get_additional_kwargs(
message, ("content", "role")
),
),
raw=raw,
additional_kwargs=get_additional_kwargs(raw, ("message",)),
)
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
payload = {
self.ollama.prompt_key: prompt,
"model": self.ollama.model,
"options": self.ollama._model_kwargs,
"stream": False,
**kwargs,
}
if self.ollama.json_mode:
payload["format"] = "json"
with self._get_client() as client:
response = client.post(
url=f"{self.ollama.base_url}/api/generate",
json=payload,
)
response.raise_for_status()
raw = response.json()
text = raw.get("response")
return CompletionResponse(text=text, raw=raw)
def stream_chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponseGen:
payload = {
"model": self.ollama.model,
"messages": [
{
"role": message.role.value,
"content": message.content,
**message.additional_kwargs,
}
for message in messages
],
"options": self.ollama._model_kwargs,
"stream": True,
**kwargs,
}
with self._get_client() as client:
with client.stream(
method="POST",
url=f"{self.ollama.base_url}/api/chat",
json=payload,
) as response:
response.raise_for_status()
text = ""
for line in response.iter_lines():
if line:
chunk = json.loads(line)
if "done" in chunk and chunk["done"]:
break
message = chunk["message"]
delta = message.get("content")
text += delta
yield ChatResponse(
message=ChatMessage(
content=text,
role=MessageRole(message.get("role")),
additional_kwargs=get_additional_kwargs(
message, ("content", "role")
),
),
delta=delta,
raw=chunk,
additional_kwargs=get_additional_kwargs(chunk, ("message",)),
)
def stream_complete(self, prompt: str, **kwargs: Any) -> CompletionResponseGen:
payload = {
self.ollama.prompt_key: prompt,
"model": self.ollama.model,
"options": self.ollama._model_kwargs,
"stream": True,
**kwargs,
}
with self._get_client() as client:
with client.stream(
method="POST",
url=f"{self.ollama.base_url}/api/generate",
json=payload,
) as response:
response.raise_for_status()
text = ""
for line in response.iter_lines():
if line:
chunk = json.loads(line)
delta = chunk.get("response")
text += delta
yield CompletionResponse(
delta=delta,
text=text,
raw=chunk,
additional_kwargs=get_additional_kwargs(
chunk, ("response",)
),
) This modification ensures that |
Beta Was this translation helpful? Give feedback.
All reactions
-
@dosu with this customollama as below, I am trying out HyDEQueryTransform, I get the following exception File "C:\Python311\Lib\site-packages\llama_index\core\instrumentation\dispatcher.py", line 260, in wrapper class CustomOllama():
|
Beta Was this translation helpful? Give feedback.
All reactions
-
To resolve the from llama_index.llms.ollama import Ollama
from llama_index.llms.base import ChatMessage, ChatResponse, CompletionResponse, MessageRole, LLM, LLMMetadata
from typing import Sequence, Any, Dict
import httpx
from httpx import Timeout
class CustomOllama(LLM):
"""Wrapper around Ollama to handle custom headers."""
def __init__(
self,
model: str,
request_timeout: float,
base_url: str,
headers: Dict[str, str] = None,
**kwargs,
):
self.headers = headers or {}
self.ollama = Ollama(
model=model, request_timeout=request_timeout, base_url=base_url, **kwargs
)
@property
def metadata(self) -> LLMMetadata:
DEFAULT_NUM_OUTPUTS = 30
"""LLM metadata."""
return LLMMetadata(
context_window=self.ollama.context_window,
num_output=DEFAULT_NUM_OUTPUTS,
model_name=self.ollama.model,
is_chat_model=True,
)
def _get_client(self):
return httpx.Client(
timeout=Timeout(self.ollama.request_timeout), headers=self.headers
)
def chat(self, messages: Sequence[ChatMessage], **kwargs: Any) -> ChatResponse:
payload = {
"model": self.ollama.model,
"messages": [
{
"role": message.role.value,
"content": message.content,
**message.additional_kwargs,
}
for message in messages
],
"options": self.ollama._model_kwargs,
"stream": False,
**kwargs,
}
if self.ollama.json_mode:
payload["format"] = "json"
with self._get_client() as client:
response = client.post(
url=f"{self.ollama.base_url}/api/chat",
json=payload,
)
response.raise_for_status()
raw = response.json()
message = raw["message"]
return ChatResponse(
message=ChatMessage(
content=message.get("content"),
role=MessageRole(message.get("role")),
additional_kwargs=get_additional_kwargs(
message, ("content", "role")
),
),
raw=raw,
additional_kwargs=get_additional_kwargs(raw, ("message",)),
)
def complete(self, prompt: str, **kwargs: Any) -> CompletionResponse:
payload = {
self.ollama.prompt_key: prompt,
"model": self.ollama.model,
"options": self.ollama._model_kwargs,
"stream": False,
**kwargs,
}
if self.ollama.json_mode:
payload["format"] = "json"
with self._get_client() as client:
response = client.post(
url=f"{self.ollama.base_url}/api/generate",
json=payload,
)
response.raise_for_status()
raw = response.json()
text = raw.get("response")
return CompletionResponse(text=text, raw=raw)
def predict(self, prompt: str, **kwargs: Any) -> str:
response = self.complete(prompt, **kwargs)
return response.text This |
Beta Was this translation helpful? Give feedback.
-
Hi, trying to build a RAG system using ollama server that is provided to us. It also uses apikey (bearer token) in the format of 'user-id':
'api-key'.
How do we use this in the Ollama LLM instantiation using llamaindex? Would appreciate if someone can help with this one.
Beta Was this translation helpful? Give feedback.
All reactions