Skip to content

Commit

Permalink
Merge pull request #1760 from SciPhi-AI/feature/configurable-api-base
Browse files Browse the repository at this point in the history
Feature/configurable api base
  • Loading branch information
emrgnt-cmplxty authored Jan 7, 2025
2 parents 155503c + d2338aa commit 4763c32
Show file tree
Hide file tree
Showing 15 changed files with 1,639 additions and 1,809 deletions.
3,048 changes: 1,279 additions & 1,769 deletions js/sdk/pnpm-lock.yaml

Large diffs are not rendered by default.

57 changes: 57 additions & 0 deletions js/sdk/src/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -376,3 +376,60 @@ export type WrappedServerStatsResponse = ResultsWrapper<ServerStats>;
export type WrappedTokenResponse = ResultsWrapper<TokenResponse>;
export type WrappedUserResponse = ResultsWrapper<User>;
export type WrappedUsersResponse = PaginatedResultsWrapper<User[]>;



/**
* The "base" shape for an R2R results wrapper.
*/
export interface R2RResults<T> {
results: T;
// Potentially other fields, e.g. "info", "status", etc.
}

/**
* A paginated results wrapper typically includes a 'meta' object
* or something similar for "total_entries".
*/
export interface PaginatedR2RResult<T> extends R2RResults<T> {
meta: {
total_entries: number;
};
}

// ---------------------------
// API Key Models
// ---------------------------

/**
* Full API Key model (includes the private `api_key` which is only
* returned ONCE at creation time).
*/
export interface ApiKey {
public_key: string;
/** The private key, only returned during creation. */
api_key: string;
key_id: string;
name?: string;
}

/**
* API Key model that omits the private `api_key`. Typically used
* for listing user keys.
*/
export interface ApiKeyNoPriv {
public_key: string;
key_id: string;
name?: string;
updated_at: string; // or `Date` if your code auto-parses
}

/**
* Wrapped response that contains one newly created API key.
*/
export type WrappedAPIKeyResponse = R2RResults<ApiKey>;

/**
* Wrapped response that contains a list of existing API keys (no private keys).
*/
export type WrappedAPIKeysResponse = PaginatedR2RResult<ApiKeyNoPriv[]>;
43 changes: 43 additions & 0 deletions js/sdk/src/v3/clients/users.ts
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
import { feature } from "../../feature";
import { r2rClient } from "../../r2rClient";
import {
WrappedAPIKeyResponse,
WrappedAPIKeysResponse,
WrappedBooleanResponse,
WrappedGenericMessageResponse,
WrappedCollectionsResponse,
Expand Down Expand Up @@ -458,4 +460,45 @@ export class UsersClient {
downloadBlob(blob, options.filename);
}
}

/**
* Create a new API key for the specified user.
* Only superusers or the user themselves may create an API key.
* @param id ID of the user for whom to create an API key
* @returns WrappedAPIKeyResponse
*/
@feature("users.createApiKey")
async createApiKey(options: { id: string }): Promise<WrappedAPIKeyResponse> {
return this.client.makeRequest("POST", `users/${options.id}/api-keys`);
}

/**
* List all API keys for the specified user.
* Only superusers or the user themselves may list the API keys.
* @param id ID of the user whose API keys to list
* @returns WrappedAPIKeysResponse
*/
@feature("users.listApiKeys")
async listApiKeys(options: { id: string }): Promise<WrappedAPIKeysResponse> {
return this.client.makeRequest("GET", `users/${options.id}/api-keys`);
}

/**
* Delete a specific API key for the specified user.
* Only superusers or the user themselves may delete the API key.
* @param id ID of the user
* @param keyId ID of the API key to delete
* @returns WrappedBooleanResponse
*/
@feature("users.deleteApiKey")
async deleteApiKey(options: {
id: string;
keyId: string;
}): Promise<WrappedBooleanResponse> {
return this.client.makeRequest(
"DELETE",
`users/${options.id}/api-keys/${options.keyId}`,
);
}

}
2 changes: 1 addition & 1 deletion py/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ COPY pyproject.toml /app/py/pyproject.toml

# Install dependencies
RUN poetry config virtualenvs.create false \
&& poetry install --extras "core ingestion-bundle" --no-dev --no-root \
&& poetry install --extras "core ingestion-bundle" --no-root \
&& pip install --no-cache-dir gunicorn uvicorn

# Create the final image
Expand Down
38 changes: 24 additions & 14 deletions py/cli/command_group.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import json
import os
import types
from functools import wraps
from pathlib import Path
Expand Down Expand Up @@ -142,18 +143,32 @@ def exit(self, code: int = 0) -> Never:
raise SystemExit(code)


def initialize_client(base_url: str) -> R2RAsyncClient:
def initialize_client() -> R2RAsyncClient:
"""Initialize R2R client with API key from config if available."""
client = R2RAsyncClient()

try:
config = load_config()
if api_key := config.get("api_key"):
client.set_api_key(api_key)
if not client.api_key:
console.print(
"[yellow]Warning: API key not properly set in client[/yellow]"
)

env_api_base = os.getenv("R2R_API_BASE")
config_api_base = config.get("api_base")
if env_api_base:
api_base = env_api_base
elif config_api_base:
api_base = config_api_base
else:
api_base = "https://cloud.sciphi.ai"
client.set_base_url(api_base)

env_api_key = os.getenv("R2R_API_KEY")
config_api_key = config.get("api_key")
if env_api_key:
api_key = env_api_key
elif config_api_key:
api_key = config_api_key
else:
api_key = None
client.set_api_key(api_key)

except Exception as e:
console.print(
Expand All @@ -165,12 +180,7 @@ def initialize_client(base_url: str) -> R2RAsyncClient:


@click.group(cls=CustomGroup)
@click.option(
"--base-url",
default="https://cloud.sciphi.ai",
help="Base URL for the API",
)
@pass_context
async def cli(ctx: click.Context, base_url: str) -> None:
async def cli(ctx: click.Context) -> None:
"""R2R CLI for all core operations."""
ctx.obj = initialize_client(base_url)
ctx.obj = initialize_client()
1 change: 0 additions & 1 deletion py/cli/commands/retrieval.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ async def search(ctx: click.Context, query, **kwargs):

client: R2RAsyncClient = ctx.obj

print("client.base_url = ", client.base_url)
try:
with timer():
results = await client.retrieval.search(
Expand Down
20 changes: 20 additions & 0 deletions py/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,26 @@ async def set_api_key(ctx, api_key: str):
console.print("[red]Failed to set API key:[/red]", str(e))


# Commands for Setting / Retrieving Base URL
#
@cli.command("set-api-base", short_help="Set your R2R API base URL")
@click.argument("base_url", required=True, type=str)
@click.pass_context
async def set_api_base(ctx, base_url: str):
"""
Store your R2R API base URL locally so you don’t have to pass it on every command.
Example usage:
r2r set-api-base https://api.example.com
"""
try:
config = load_config()
config["api_base"] = base_url
save_config(config)
console.print("[green]API base URL set successfully![/green]")
except Exception as e:
console.print("[red]Failed to set API base:[/red]", str(e))


@cli.command("get-api", short_help="Get your stored R2R API key")
@click.pass_context
async def get_api(ctx):
Expand Down
39 changes: 39 additions & 0 deletions py/core/base/providers/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,45 @@ class AppConfig(R2RSerializable):
default_max_documents_per_user: Optional[int] = 100
default_max_chunks_per_user: Optional[int] = 100_000
default_max_collections_per_user: Optional[int] = 10
default_max_upload_size: int = 2_000_000 # e.g. ~2 MB

# File extension to max-size mapping
# These are examples; adjust sizes as needed.
max_upload_size_by_type: dict[str, int] = {
# Common text-based formats
"txt": 2_000_000,
"md": 2_000_000,
"tsv": 2_000_000,
"csv": 5_000_000,
"xml": 2_000_000,
"html": 5_000_000,
# Office docs
"doc": 10_000_000,
"docx": 10_000_000,
"ppt": 20_000_000,
"pptx": 20_000_000,
"xls": 10_000_000,
"xlsx": 10_000_000,
"odt": 5_000_000,
# PDFs can expand quite a bit when converted to text
"pdf": 30_000_000,
# E-mail
"eml": 5_000_000,
"msg": 5_000_000,
"p7s": 5_000_000,
# Images
"bmp": 5_000_000,
"heic": 5_000_000,
"jpeg": 5_000_000,
"jpg": 5_000_000,
"png": 5_000_000,
"tiff": 5_000_000,
# Others
"epub": 10_000_000,
"rtf": 5_000_000,
"rst": 5_000_000,
"org": 5_000_000,
}

@classmethod
def create(cls, *args, **kwargs):
Expand Down
16 changes: 16 additions & 0 deletions py/core/main/api/v3/documents_router.py
Original file line number Diff line number Diff line change
Expand Up @@ -471,6 +471,22 @@ async def create_document(
if file:
file_data = await self._process_file(file)
content_length = len(file_data["content"])
file_ext = file.filename.split(".")[
-1
] # e.g. "pdf", "txt"
max_allowed_size = await self.services.management.get_max_upload_size_by_type(
user_id=auth_user.id, file_type_or_ext=file_ext
)

if content_length > max_allowed_size:
raise R2RException(
status_code=413, # HTTP 413: Payload Too Large
message=(
f"File size exceeds maximum of {max_allowed_size} bytes "
f"for extension '{file_ext}'."
),
)

file_content = BytesIO(
base64.b64decode(file_data["content"])
)
Expand Down
68 changes: 68 additions & 0 deletions py/core/main/services/management_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -841,10 +841,78 @@ async def delete_conversation(
)

async def get_user_max_documents(self, user_id: UUID) -> int | None:
# Fetch the user to see if they have any overrides stored
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if user.limits_overrides and "max_documents" in user.limits_overrides:
return user.limits_overrides["max_documents"]
return self.config.app.default_max_documents_per_user

async def get_user_max_chunks(self, user_id: UUID) -> int | None:
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if user.limits_overrides and "max_chunks" in user.limits_overrides:
return user.limits_overrides["max_chunks"]
return self.config.app.default_max_chunks_per_user

async def get_user_max_collections(self, user_id: UUID) -> int | None:
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
if (
user.limits_overrides
and "max_collections" in user.limits_overrides
):
return user.limits_overrides["max_collections"]
return self.config.app.default_max_collections_per_user

async def get_max_upload_size_by_type(
self, user_id: UUID, file_type_or_ext: str
) -> int:
"""
Return the maximum allowed upload size (in bytes) for the given user's file type/extension.
Respects user-level overrides if present, falling back to the system config.
```json
{
"limits_overrides": {
"max_file_size": 20_000_000,
"max_file_size_by_type":
{
"pdf": 50_000_000,
"docx": 30_000_000
},
...
}
}
```
"""
# 1. Normalize extension
ext = file_type_or_ext.lower().lstrip(".")

# 2. Fetch user from DB to see if we have any overrides
user = await self.providers.database.users_handler.get_user_by_id(
user_id
)
user_overrides = user.limits_overrides or {}

# 3. Check if there's a user-level override for "max_file_size_by_type"
user_file_type_limits = user_overrides.get("max_file_size_by_type", {})
if ext in user_file_type_limits:
return user_file_type_limits[ext]

# 4. If not, check if there's a user-level fallback "max_file_size"
if "max_file_size" in user_overrides:
return user_overrides["max_file_size"]

# 5. If none exist at user level, use system config
# Example config paths:
system_type_limits = self.config.app.max_upload_size_by_type
if ext in system_type_limits:
return system_type_limits[ext]

# 6. Otherwise, return the global default
return self.config.app.default_max_upload_size
Loading

0 comments on commit 4763c32

Please sign in to comment.