diff --git a/.github/actions/run-script-zerox-tests/action.yml b/.github/actions/run-script-zerox-tests/action.yml deleted file mode 100644 index ea15d49e1..000000000 --- a/.github/actions/run-script-zerox-tests/action.yml +++ /dev/null @@ -1,14 +0,0 @@ -name: 'Run SDK Auth Tests' -description: 'Runs SDK authentication tests for R2R' -runs: - using: "composite" - steps: - - name: Ingest zerox document - working-directory: ./py - shell: bash - run: poetry run python core/examples/scripts/run_ingest_with_zerox.py - - - name: Test ingested zerox document - working-directory: ./py - shell: bash - run: poetry run python tests/integration/runner_scripts.py test_ingested_zerox_document diff --git a/.github/actions/run-sdk-prompt-management-tests/action.yml b/.github/actions/run-sdk-prompt-management-tests/action.yml new file mode 100644 index 000000000..782347658 --- /dev/null +++ b/.github/actions/run-sdk-prompt-management-tests/action.yml @@ -0,0 +1,42 @@ +name: 'Run SDK Prompt Management Tests' +description: 'Runs SDK prompt management tests for R2R' +runs: + using: "composite" + steps: + # First run basic prompt operations + - name: Add prompt test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_add_prompt + + - name: Get prompt test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_get_prompt + + - name: Get all prompts test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_get_all_prompts + + - name: Update prompt test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_update_prompt + + # Then run error handling and access control tests + - name: Prompt error handling test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_prompt_error_handling + + - name: Prompt access control test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_prompt_access_control + + # Finally run deletion test + - name: Delete prompt test (SDK) + working-directory: ./py + shell: bash + run: poetry run python tests/integration/runner_sdk.py test_delete_prompt diff --git a/.github/workflows/r2r-full-integration-deep-dive-tests.yml b/.github/workflows/r2r-full-integration-deep-dive-tests.yml index 4021478d4..a4391820d 100644 --- a/.github/workflows/r2r-full-integration-deep-dive-tests.yml +++ b/.github/workflows/r2r-full-integration-deep-dive-tests.yml @@ -36,6 +36,3 @@ jobs: - name: Start R2R Full server uses: ./.github/actions/start-r2r-full - - - name: Run Test Zerox - uses: ./.github/actions/run-script-zerox-tests diff --git a/.github/workflows/r2r-full-py-integration-tests-graphrag.yml b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml index a0cf644e9..29c0b3111 100644 --- a/.github/workflows/r2r-full-py-integration-tests-graphrag.yml +++ b/.github/workflows/r2r-full-py-integration-tests-graphrag.yml @@ -16,6 +16,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: diff --git a/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml b/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml index cf36afd67..e24a9df84 100644 --- a/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml +++ b/.github/workflows/r2r-full-py-integration-tests-mac-and-windows.yml @@ -6,6 +6,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: diff --git a/.github/workflows/r2r-full-py-integration-tests.yml b/.github/workflows/r2r-full-py-integration-tests.yml index 1e10f9414..55dd6ac7e 100644 --- a/.github/workflows/r2r-full-py-integration-tests.yml +++ b/.github/workflows/r2r-full-py-integration-tests.yml @@ -16,6 +16,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: @@ -27,6 +28,7 @@ jobs: - sdk-retrieval - sdk-auth - sdk-collections + - sdk-prompts env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} TELEMETRY_ENABLED: 'false' @@ -56,29 +58,27 @@ jobs: - name: Run CLI Ingestion Tests if: matrix.test_category == 'cli-ingestion' uses: ./.github/actions/run-cli-ingestion-tests - continue-on-error: true - name: Run CLI Retrieval Tests if: matrix.test_category == 'cli-retrieval' uses: ./.github/actions/run-cli-retrieval-tests - continue-on-error: true - name: Run SDK Ingestion Tests if: matrix.test_category == 'sdk-ingestion' uses: ./.github/actions/run-sdk-ingestion-tests - continue-on-error: true - name: Run SDK Retrieval Tests if: matrix.test_category == 'sdk-retrieval' uses: ./.github/actions/run-sdk-retrieval-tests - continue-on-error: true - name: Run SDK Auth Tests if: matrix.test_category == 'sdk-auth' uses: ./.github/actions/run-sdk-auth-tests - continue-on-error: true - name: Run SDK Collections Tests if: matrix.test_category == 'sdk-collections' uses: ./.github/actions/run-sdk-collections-tests - continue-on-error: true + + - name: Run SDK Prompt Tests + if: matrix.test_category == 'sdk-prompts' + uses: ./.github/actions/run-sdk-prompt-management-tests diff --git a/.github/workflows/r2r-light-py-integration-tests-graphrag.yml b/.github/workflows/r2r-light-py-integration-tests-graphrag.yml index cba939154..b105214f0 100644 --- a/.github/workflows/r2r-light-py-integration-tests-graphrag.yml +++ b/.github/workflows/r2r-light-py-integration-tests-graphrag.yml @@ -18,6 +18,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: diff --git a/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml b/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml index 70a3d6314..509d80520 100644 --- a/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml +++ b/.github/workflows/r2r-light-py-integration-tests-mac-and-windows.yml @@ -8,6 +8,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: diff --git a/.github/workflows/r2r-light-py-integration-tests.yml b/.github/workflows/r2r-light-py-integration-tests.yml index b4ac669a5..eb7d7b04b 100644 --- a/.github/workflows/r2r-light-py-integration-tests.yml +++ b/.github/workflows/r2r-light-py-integration-tests.yml @@ -18,6 +18,7 @@ on: jobs: test: runs-on: ${{ matrix.os }} + continue-on-error: true strategy: matrix: @@ -29,6 +30,7 @@ jobs: - sdk-retrieval - sdk-auth - sdk-collections + - sdk-prompts env: OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} TELEMETRY_ENABLED: 'false' @@ -59,29 +61,27 @@ jobs: - name: Run CLI Ingestion Tests if: matrix.test_category == 'cli-ingestion' uses: ./.github/actions/run-cli-ingestion-tests - continue-on-error: true - name: Run CLI Retrieval Tests if: matrix.test_category == 'cli-retrieval' uses: ./.github/actions/run-cli-retrieval-tests - continue-on-error: true - name: Run SDK Ingestion Tests if: matrix.test_category == 'sdk-ingestion' uses: ./.github/actions/run-sdk-ingestion-tests - continue-on-error: true - name: Run SDK Retrieval Tests if: matrix.test_category == 'sdk-retrieval' uses: ./.github/actions/run-sdk-retrieval-tests - continue-on-error: true - name: Run SDK Auth Tests if: matrix.test_category == 'sdk-auth' uses: ./.github/actions/run-sdk-auth-tests - continue-on-error: true - name: Run SDK Collections Tests if: matrix.test_category == 'sdk-collections' uses: ./.github/actions/run-sdk-collections-tests - continue-on-error: true + + - name: Run SDK Prompt Tests + if: matrix.test_category == 'sdk-prompts' + uses: ./.github/actions/run-sdk-prompt-management-tests diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index 643611fc3..fe7791b66 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ -{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/update_chunk/{document_id}/{extraction_id}":{"put":{"summary":"Update Chunk App","operationId":"update_chunk_app_v2_update_chunk__document_id___extraction_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The document ID of the chunk to update","title":"Document Id"},"description":"The document ID of the chunk to update"},{"name":"extraction_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The extraction ID of the chunk to update","title":"Extraction Id"},"description":"The extraction ID of the chunk to update"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","description":"Create a vector index for a given table.","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/list_vector_indices":{"get":{"summary":"List Vector Indices App","operationId":"list_vector_indices_app_v2_list_vector_indices_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"table_name","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors","title":"Table Name"},"description":"The table to create the index on. Default: vectors"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ListVectorIndicesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/delete_vector_index":{"delete":{"summary":"Delete Vector Index App","operationId":"delete_vector_index_app_v2_delete_vector_index_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_vector_index_app_v2_delete_vector_index_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_DeleteVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.delete_vector_index(\n index_name=\"ix_vector_cosine_ops_hnsw__20241021211541\",\n table_name=\"vectors\",\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X DELETE \"http://localhost:7276/v2/delete_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"index_name\": \"ix_vector_cosine_ops_hnsw__20241021211541\",\n \"table_name\": \"vectors\",\n \"concurrently\": true\n }'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations_overview":{"get":{"summary":"Conversations Overview App","operationId":"conversations_overview_app_v2_conversations_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Conversation Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_ConversationOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_conversation/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_get_conversation__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_conversation":{"post":{"summary":"Create Conversation","operationId":"create_conversation_v2_create_conversation_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Create Conversation V2 Create Conversation Post"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_message/{conversation_id}":{"post":{"summary":"Add Message","operationId":"add_message_v2_add_message__conversation_id__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_message_v2_add_message__conversation_id__post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Add Message V2 Add Message Conversation Id Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_message/{message_id}":{"put":{"summary":"Edit Message","operationId":"edit_message_v2_update_message__message_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"message_id","in":"path","required":true,"schema":{"type":"string","description":"Message ID","title":"Message Id"},"description":"Message ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"string","description":"New content","title":"Message"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Edit Message V2 Update Message Message Id Put"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/branches_overview/{conversation_id}":{"get":{"summary":"Branches Overview","operationId":"branches_overview_v2_branches_overview__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Branches Overview V2 Branches Overview Conversation Id Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_conversation/{conversation_id}":{"delete":{"summary":"Delete Conversation","operationId":"delete_conversation_v2_delete_conversation__conversation_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"entity_level","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/EntityLevel"},{"type":"null"}],"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.","default":"document","title":"Entity Level"},"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/deduplicate_entities":{"post":{"summary":"Deduplicate Entities","description":"Deduplicate entities in the knowledge graph.","operationId":"deduplicate_entities_v2_deduplicate_entities_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_deduplicate_entities_v2_deduplicate_entities_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/tuned_prompt":{"get":{"summary":"Get Tuned Prompt","description":"Auto-tune the prompt for a specific collection.","operationId":"get_tuned_prompt_v2_tuned_prompt_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"query","required":true,"schema":{"type":"string","description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'.","title":"Prompt Name"},"description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"documents_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for document pagination.","default":0,"title":"Documents Offset"},"description":"Offset for document pagination."},{"name":"documents_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for document pagination.","default":100,"title":"Documents Limit"},"description":"Limit for document pagination."},{"name":"chunks_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for chunk pagination.","default":0,"title":"Chunks Offset"},"description":"Offset for chunk pagination."},{"name":"chunks_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for chunk pagination.","default":100,"title":"Chunks Limit"},"description":"Limit for chunk pagination."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTunePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_graph_for_collection":{"delete":{"summary":"Delete Graph For Collection","description":"Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation.\n\nNotes:\nThe endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection.\n\nWARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection.","operationId":"delete_graph_for_collection_v2_delete_graph_for_collection_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"BaseModel":{"properties":{},"type":"object","title":"BaseModel"},"Body_add_message_v2_add_message__conversation_id__post":{"properties":{"message":{"$ref":"#/components/schemas/Message","description":"Message content"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id","description":"Parent message ID"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Metadata"}},"type":"object","required":["message"],"title":"Body_add_message_v2_add_message__conversation_id__post"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw","default":"hnsw"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}"},"index_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Name","description":"Optional custom name for the index. If not provided, one will be auto-generated"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently. Default: true","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_deduplicate_entities_v2_deduplicate_entities_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to deduplicate entities for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the deduplication process."},"deduplication_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Deduplication Settings","description":"Settings for the deduplication process."}},"type":"object","title":"Body_deduplicate_entities_v2_deduplicate_entities_post"},"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id","description":"Collection ID to delete graph for."},"cascade":{"type":"boolean","title":"Cascade","description":"Whether to cascade the deletion, and delete entities and triples belonging to the collection.","default":false}},"type":"object","required":["collection_id"],"title":"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_delete_vector_index_app_v2_delete_vector_index_delete":{"properties":{"index_name":{"type":"string","title":"Index Name","description":"The name of the index to delete"},"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the table containing the index. Default: vectors","default":"vectors"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to delete the index concurrently. Default: true","default":true}},"type":"object","required":["index_name"],"title":"Body_delete_vector_index_app_v2_delete_vector_index_delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put":{"properties":{"text":{"type":"string","title":"Text","description":"The new text content for the chunk"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional updated metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","default":true}},"type":"object","required":["text"],"title":"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"ConversationOverviewResponse":{"properties":{"conversation_id":{"type":"string","format":"uuid","title":"Conversation Id"},"created_at":{"type":"string","format":"date-time","title":"Created At"}},"type":"object","required":["conversation_id","created_at"],"title":"ConversationOverviewResponse"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DeleteVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"DeleteVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"document_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Document Ids"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"EntityLevel":{"type":"string","enum":["collection","document","chunk"],"title":"EntityLevel"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"},"response_format":{"anyOf":[{"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Response Format"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGCreationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"document_count":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Document Count","description":"The number of documents in the collection."},"number_of_jobs_created":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Number Of Jobs Created","description":"The number of jobs created for the graph creation process."},"total_chunks":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Chunks","description":"The estimated total number of chunks."},"estimated_entities":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Entities","description":"The estimated number of entities in the graph."},"estimated_triples":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Triples","description":"The estimated number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls in millions."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph creation process in minutes."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph creation process in USD."}},"type":"object","title":"KGCreationEstimationResponse","description":"Response for knowledge graph creation estimation."},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG creation request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG creation request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse"},"KGDeduplicationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"num_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Entities","description":"The number of entities in the collection."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost in USD."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated time in minutes."}},"type":"object","title":"KGDeduplicationEstimationResponse","description":"Response for knowledge graph deduplication estimation."},"KGEnrichmentEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"total_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Entities","description":"The total number of entities in the graph."},"total_triples":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Total Triples","description":"The total number of triples in the graph."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost to run the graph enrichment process."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated total time to run the graph enrichment process."}},"type":"object","title":"KGEnrichmentEstimationResponse","description":"Response for knowledge graph enrichment estimation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the KG enrichment request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the KG enrichment request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityDeduplicationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user."}},"type":"object","required":["message"],"title":"KGEntityDeduplicationResponse","description":"Response for knowledge graph entity deduplication.","example":{"message":"Entity deduplication queued successfully."}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"entities_level":{"$ref":"#/components/schemas/EntityLevel","description":"The level of entities to search for","default":"document"},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"KGTunePromptResponse":{"properties":{"tuned_prompt":{"type":"string","title":"Tuned Prompt","description":"The updated prompt."}},"type":"object","required":["tuned_prompt"],"title":"KGTunePromptResponse","description":"Response containing just the tuned prompt string.","example":{"tuned_prompt":"The updated prompt"}},"ListVectorIndicesResponse":{"properties":{"indices":{"items":{"type":"object"},"type":"array","title":"Indices"}},"type":"object","required":["indices"],"title":"ListVectorIndicesResponse"},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_ConversationOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/ConversationOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[ConversationOverviewResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_DeleteVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/DeleteVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[DeleteVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_KGTunePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTunePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTunePromptResponse]"},"ResultsWrapper_ListVectorIndicesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ListVectorIndicesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ListVectorIndicesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGCreationResponse__KGCreationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGCreationResponse"},{"$ref":"#/components/schemas/KGCreationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGCreationResponse, KGCreationEstimationResponse]]"},"ResultsWrapper_Union_KGEnrichmentResponse__KGEnrichmentEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEnrichmentResponse"},{"$ref":"#/components/schemas/KGEnrichmentEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEnrichmentResponse, KGEnrichmentEstimationResponse]]"},"ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEntityDeduplicationResponse"},{"$ref":"#/components/schemas/KGDeduplicationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEntityDeduplicationResponse, KGDeduplicationEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"type":"string","title":"Subject"},"predicate":{"type":"string","title":"Predicate"},"object":{"type":"string","title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","required":["subject","predicate","object"],"title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"type":"integer","title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"type":"integer","title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"$ref":"#/components/schemas/HybridSearchSettings","description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["vectors","document_entity","collection_entity","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-25T09:43:53.444456"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-25T09:43:53.444465"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-25T09:43:53.460296"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-25T09:43:53.460300"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} +{"openapi":"3.1.0","info":{"title":"R2R Application API","version":"1.0.0"},"paths":{"/v2/ingest_files":{"post":{"summary":"Ingest Files App","description":"Ingest files into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development.","operationId":"ingest_files_app_v2_ingest_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_ingest_files_app_v2_ingest_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_files(\n file_paths=[\"pg_essay_1.html\", \"got.txt\"],\n metadatas=[{\"metadata_1\":\"some random metadata\"}, {\"metadata_2\": \"some other random metadata\"}],\n document_ids=None\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1.html;type=text/html\" \\\n -F \"file=@got.txt;type=text/plain\" \\\n -F 'metadatas=[{},{}]' \\\n -F 'document_ids=null'\n"}]}},"/v2/update_files":{"post":{"summary":"Update Files App","description":"Update existing files in the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development.","operationId":"update_files_app_v2_update_files_post","requestBody":{"content":{"multipart/form-data":{"schema":{"$ref":"#/components/schemas/Body_update_files_app_v2_update_files_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.update_files(\n file_paths=[\"pg_essay_1_v2.txt\"],\n document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/update_files\" \\\n -H \"Content-Type: multipart/form-data\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/ingest_chunks":{"post":{"summary":"Ingest Chunks App","description":"Ingest text chunks into the system.\n\nThis endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R.\n\nA valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development.","operationId":"ingest_chunks_app_v2_ingest_chunks_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_ingest_chunks_app_v2_ingest_chunks_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_IngestionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.ingest_chunks(\n chunks=[\n {\n \"text\": \"Another chunk of text\",\n },\n {\n \"text\": \"Yet another chunk of text\",\n },\n {\n \"text\": \"A chunk of text\",\n },\n ],\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/ingest_chunks\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"chunks\": [\n {\n \"text\": \"Another chunk of text\"\n },\n {\n \"text\": \"Yet another chunk of text\"\n },\n {\n \"text\": \"A chunk of text\"\n }\n ],\n \"document_id\": \"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\",\n \"metadata\": {}\n }'\n"}]}},"/v2/update_chunk/{document_id}/{extraction_id}":{"put":{"summary":"Update Chunk App","operationId":"update_chunk_app_v2_update_chunk__document_id___extraction_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The document ID of the chunk to update","title":"Document Id"},"description":"The document ID of the chunk to update"},{"name":"extraction_id","in":"path","required":true,"schema":{"type":"string","format":"uuid","description":"The extraction ID of the chunk to update","title":"Extraction Id"},"description":"The extraction ID of the chunk to update"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdateResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_vector_index":{"post":{"summary":"Create Vector Index App","description":"Create a vector index for a given table.","operationId":"create_vector_index_app_v2_create_vector_index_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_vector_index_app_v2_create_vector_index_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CreateVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/list_vector_indices":{"get":{"summary":"List Vector Indices App","operationId":"list_vector_indices_app_v2_list_vector_indices_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"table_name","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors","title":"Table Name"},"description":"The table to create the index on. Default: vectors"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ListVectorIndicesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.create_vector_index(\n table_name=\"vectors\",\n index_method=\"hnsw\",\n index_measure=\"cosine_distance\",\n index_arguments={\"m\": 16, \"ef_construction\": 64},\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"http://localhost:7276/v2/create_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"table_name\": \"vectors\",\n \"index_method\": \"hnsw\",\n \"index_measure\": \"cosine_distance\",\n \"index_arguments\": {\n \"m\": 16,\n \"ef_construction\": 64\n },\n \"concurrently\": true\n }'\n"}]}},"/v2/delete_vector_index":{"delete":{"summary":"Delete Vector Index App","operationId":"delete_vector_index_app_v2_delete_vector_index_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_vector_index_app_v2_delete_vector_index_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_DeleteVectorIndexResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.delete_vector_index(\n index_name=\"ix_vector_cosine_ops_hnsw__20241021211541\",\n table_name=\"vectors\",\n concurrently=True\n)\n"},{"lang":"Shell","source":"curl -X DELETE \"http://localhost:7276/v2/delete_vector_index\" \\\n -H \"Content-Type: application/json\" \\\n -d '{\n \"index_name\": \"ix_vector_cosine_ops_hnsw__20241021211541\",\n \"table_name\": \"vectors\",\n \"concurrently\": true\n }'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","minimum":-1,"description":"Number of items to return. Use -1 to return all items.","default":100,"title":"Limit"},"description":"Number of items to return. Use -1 to return all items."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}},{"name":"include_vectors","in":"query","required":false,"schema":{"anyOf":[{"type":"boolean"},{"type":"null"}],"default":false,"title":"Include Vectors"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_CollectionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_UserResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_CollectionResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/conversations_overview":{"get":{"summary":"Conversations Overview App","operationId":"conversations_overview_app_v2_conversations_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Conversation Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/PaginatedResultsWrapper_list_ConversationOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_conversation/{conversation_id}":{"get":{"summary":"Get Conversation","operationId":"get_conversation_v2_get_conversation__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"},{"name":"branch_id","in":"query","required":false,"schema":{"type":"string","description":"Branch ID","title":"Branch Id"},"description":"Branch ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_list_Tuple_str__Message___"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_conversation":{"post":{"summary":"Create Conversation","operationId":"create_conversation_v2_create_conversation_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Create Conversation V2 Create Conversation Post"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_message/{conversation_id}":{"post":{"summary":"Add Message","operationId":"add_message_v2_add_message__conversation_id__post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_message_v2_add_message__conversation_id__post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Add Message V2 Add Message Conversation Id Post"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_message/{message_id}":{"put":{"summary":"Edit Message","operationId":"edit_message_v2_update_message__message_id__put","security":[{"HTTPBearer":[]}],"parameters":[{"name":"message_id","in":"path","required":true,"schema":{"type":"string","description":"Message ID","title":"Message Id"},"description":"Message ID"}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"type":"string","description":"New content","title":"Message"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Edit Message V2 Update Message Message Id Put"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/branches_overview/{conversation_id}":{"get":{"summary":"Branches Overview","operationId":"branches_overview_v2_branches_overview__conversation_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"type":"object","title":"Response Branches Overview V2 Branches Overview Conversation Id Get"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_conversation/{conversation_id}":{"delete":{"summary":"Delete Conversation","operationId":"delete_conversation_v2_delete_conversation__conversation_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"conversation_id","in":"path","required":true,"schema":{"type":"string","description":"Conversation ID","title":"Conversation Id"},"description":"Conversation ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_NoneType_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"$eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/completion":{"post":{"summary":"Completion","description":"Generate completions for a list of messages.\n\nThis endpoint uses the language model to generate completions for the provided messages.\nThe generation process can be customized using the generation_config parameter.","operationId":"completion_v2_completion_post","security":[{"HTTPBearer":[]}],"parameters":[{"name":"response_model","in":"query","required":false,"schema":{"title":"Response Model"}}],"requestBody":{"required":true,"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_completion_v2_completion_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings.\nIf document IDs are not provided, the graph will be created on all documents in the system.\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information.\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information.\nIt creates communities of nodes based on their similarity and adds embeddings to the graph.\nThis step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/entities":{"get":{"summary":"Get Entities","description":"Retrieve entities from the knowledge graph.","operationId":"get_entities_v2_entities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"entity_level","in":"query","required":false,"schema":{"anyOf":[{"$ref":"#/components/schemas/EntityLevel"},{"type":"null"}],"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection.","default":"document","title":"Entity Level"},"description":"Type of entities to retrieve. Options are: raw, dedup_document, dedup_collection."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve entities from.","title":"Collection Id"},"description":"Collection ID to retrieve entities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity IDs to filter by.","title":"Entity Ids"},"description":"Entity IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEntitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/triples":{"get":{"summary":"Get Triples","description":"Retrieve triples from the knowledge graph.","operationId":"get_triples_v2_triples_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve triples from.","title":"Collection Id"},"description":"Collection ID to retrieve triples from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"entity_names","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Entity names to filter by.","title":"Entity Names"},"description":"Entity names to filter by."},{"name":"triple_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"description":"Triple IDs to filter by.","title":"Triple Ids"},"description":"Triple IDs to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTriplesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/communities":{"get":{"summary":"Get Communities","description":"Retrieve communities from the knowledge graph.","operationId":"get_communities_v2_communities_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Offset for pagination.","default":0,"title":"Offset"},"description":"Offset for pagination."},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Limit for pagination.","default":100,"title":"Limit"},"description":"Limit for pagination."},{"name":"levels","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Levels to filter by.","title":"Levels"},"description":"Levels to filter by."},{"name":"community_numbers","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"integer"}},{"type":"null"}],"description":"Community numbers to filter by.","title":"Community Numbers"},"description":"Community numbers to filter by."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCommunitiesResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/deduplicate_entities":{"post":{"summary":"Deduplicate Entities","description":"Deduplicate entities in the knowledge graph.","operationId":"deduplicate_entities_v2_deduplicate_entities_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_deduplicate_entities_v2_deduplicate_entities_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/tuned_prompt":{"get":{"summary":"Get Tuned Prompt","description":"Auto-tune the prompt for a specific collection.","operationId":"get_tuned_prompt_v2_tuned_prompt_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"query","required":true,"schema":{"type":"string","description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'.","title":"Prompt Name"},"description":"The name of the prompt to tune. Valid options are 'kg_triples_extraction_prompt', 'kg_entity_description_prompt' and 'community_reports_prompt'."},{"name":"collection_id","in":"query","required":false,"schema":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"description":"Collection ID to retrieve communities from.","title":"Collection Id"},"description":"Collection ID to retrieve communities from."},{"name":"documents_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for document pagination.","default":0,"title":"Documents Offset"},"description":"Offset for document pagination."},{"name":"documents_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for document pagination.","default":100,"title":"Documents Limit"},"description":"Limit for document pagination."},{"name":"chunks_offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Offset for chunk pagination.","default":0,"title":"Chunks Offset"},"description":"Offset for chunk pagination."},{"name":"chunks_limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer"},{"type":"null"}],"description":"Limit for chunk pagination.","default":100,"title":"Chunks Limit"},"description":"Limit for chunk pagination."}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGTunePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete_graph_for_collection":{"delete":{"summary":"Delete Graph For Collection","description":"Delete the graph for a given collection. Note that this endpoint may delete a large amount of data created by the KG pipeline, this deletion is irreversible, and recreating the graph may be an expensive operation.\n\nNotes:\nThe endpoint deletes all communities for a given collection. If the cascade flag is set to true, the endpoint also deletes all the entities and triples associated with the collection.\n\nWARNING: Setting this flag to true will delete entities and triples for documents that are shared across multiple collections. Do not set this flag unless you are absolutely sure that you want to delete the entities and triples for all documents in the collection.","operationId":"delete_graph_for_collection_v2_delete_graph_for_collection_delete","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"BaseModel":{"properties":{},"type":"object","title":"BaseModel"},"Body_add_message_v2_add_message__conversation_id__post":{"properties":{"message":{"$ref":"#/components/schemas/Message","description":"Message content"},"parent_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Parent Id","description":"Parent message ID"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Metadata"}},"type":"object","required":["message"],"title":"Body_add_message_v2_add_message__conversation_id__post"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"message":{"anyOf":[{"$ref":"#/components/schemas/Message"},{"type":"null"}],"description":"Message object"},"messages":{"anyOf":[{"items":{"$ref":"#/components/schemas/Message"},"type":"array"},{"type":"null"}],"title":"Messages","description":"List of message objects (deprecated, use message instead)","deprecated":true},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true},"conversation_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Conversation Id","description":"The ID of the conversation, a new ID is assigned if not provided"},"branch_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Branch Id","description":"The ID of the branch"}},"type":"object","title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_completion_v2_completion_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"The messages to complete"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"The generation config"}},"type":"object","required":["messages"],"title":"Body_completion_v2_completion_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to create graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph creation process."},"kg_creation_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_create_vector_index_app_v2_create_vector_index_post":{"properties":{"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The table to create the index on. Default: vectors","default":"vectors"},"index_method":{"$ref":"#/components/schemas/IndexMethod","description":"The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw","default":"hnsw"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance","default":"cosine_distance"},"index_arguments":{"anyOf":[{"$ref":"#/components/schemas/IndexArgsIVFFlat"},{"$ref":"#/components/schemas/IndexArgsHNSW"},{"type":"null"}],"title":"Index Arguments","description":"Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}"},"index_name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Name","description":"Optional custom name for the index. If not provided, one will be auto-generated"},"index_column":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Index Column","description":"The column containing the vectors to index. Default: `vec`, or `vec_binary` when using hamming or jaccard distance."},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to create the index concurrently. Default: true","default":true}},"type":"object","title":"Body_create_vector_index_app_v2_create_vector_index_post"},"Body_deduplicate_entities_v2_deduplicate_entities_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to deduplicate entities for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the deduplication process."},"deduplication_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Deduplication Settings","description":"Settings for the deduplication process."}},"type":"object","title":"Body_deduplicate_entities_v2_deduplicate_entities_post"},"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id","description":"Collection ID to delete graph for."},"cascade":{"type":"boolean","title":"Cascade","description":"Whether to cascade the deletion, and delete entities and triples belonging to the collection.","default":false}},"type":"object","required":["collection_id"],"title":"Body_delete_graph_for_collection_v2_delete_graph_for_collection_delete"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_delete_vector_index_app_v2_delete_vector_index_delete":{"properties":{"index_name":{"type":"string","title":"Index Name","description":"The name of the index to delete"},"table_name":{"anyOf":[{"$ref":"#/components/schemas/VectorTableName"},{"type":"null"}],"description":"The name of the table containing the index. Default: vectors","default":"vectors"},"concurrently":{"type":"boolean","title":"Concurrently","description":"Whether to delete the index concurrently. Default: true","default":true}},"type":"object","required":["index_name"],"title":"Body_delete_vector_index_app_v2_delete_vector_index_delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id","description":"Collection ID to enrich graph for."},"run_type":{"anyOf":[{"$ref":"#/components/schemas/KGRunType"},{"type":"null"}],"description":"Run type for the graph enrichment process.","default":"estimate"},"kg_enrichment_settings":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_chunks_app_v2_ingest_chunks_post":{"properties":{"chunks":{"items":{"$ref":"#/components/schemas/RawChunk"},"type":"array","title":"Chunks","description":"A list of text chunks to ingest into the system.","default":{}},"document_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Document Id","description":"An optional document id to associate the chunks with. If not provided, a unique document id will be generated."},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","title":"Body_ingest_chunks_app_v2_ingest_chunks_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id","description":"ID of the user to update"},"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"is_superuser":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Is Superuser","description":"Updated superuser status"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put":{"properties":{"text":{"type":"string","title":"Text","description":"The new text content for the chunk"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata","description":"Optional updated metadata"},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","default":true}},"type":"object","required":["text"],"title":"Body_update_chunk_app_v2_update_chunk__document_id___extraction_id__put"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"ingestion_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"title":"Ingestion Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."},"run_with_orchestration":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Run With Orchestration","description":"Whether or not ingestion runs with orchestration, default is `True`. When set to `False`, the ingestion process will run synchronous and directly return the result.","default":true}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"CollectionOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"CollectionOverviewResponse"},"CollectionResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"CollectionResponse"},"CommunityReport":{"properties":{"community_number":{"type":"integer","title":"Community Number"},"level":{"type":"integer","title":"Level"},"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name","default":""},"summary":{"type":"string","title":"Summary","default":""},"findings":{"items":{"type":"string"},"type":"array","title":"Findings","default":[]},"rating":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Rating"},"rating_explanation":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Rating Explanation"},"embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Embedding"},"attributes":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["community_number","level","collection_id"],"title":"CommunityReport"},"ConversationOverviewResponse":{"properties":{"conversation_id":{"type":"string","format":"uuid","title":"Conversation Id"},"created_at":{"type":"string","format":"date-time","title":"Created At"}},"type":"object","required":["conversation_id","created_at"],"title":"ConversationOverviewResponse"},"CreateVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message","description":"Vector index creation queued successfully."}},"type":"object","required":["message"],"title":"CreateVectorIndexResponse"},"DeleteVectorIndexResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"DeleteVectorIndexResponse"},"DocumentChunkResponse":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"},"vector":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Vector"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"document_type":{"type":"string","title":"Document Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"kg_extraction_status":{"type":"string","title":"Kg Extraction Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","document_type","created_at","updated_at","ingestion_status","kg_extraction_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"Entity":{"properties":{"name":{"type":"string","title":"Name"},"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"category":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Category"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"description_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"string"},{"type":"null"}],"title":"Description Embedding"},"community_numbers":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Community Numbers"},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"collection_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Collection Id"},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"document_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Document Ids"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"},{"type":"null"}],"title":"Attributes"}},"type":"object","required":["name"],"title":"Entity","description":"An entity extracted from a document."},"EntityLevel":{"type":"string","enum":["collection","document","chunk"],"title":"EntityLevel"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"},"response_format":{"anyOf":[{"type":"object"},{"$ref":"#/components/schemas/BaseModel"},{"type":"null"}],"title":"Response Format"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexArgsHNSW":{"properties":{"m":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"M","default":16},"ef_construction":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Construction","default":64}},"type":"object","title":"IndexArgsHNSW","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an HNSW type index.\n\nRef: https://github.com/pgvector/pgvector#index-options\n\nBoth attributes are Optional in case the user only wants to specify one and\nleave the other as default\n\nAttributes:\n m (int): Maximum number of connections per node per layer (default: 16)\n ef_construction (int): Size of the dynamic candidate list for\n constructing the graph (default: 64)"},"IndexArgsIVFFlat":{"properties":{"n_lists":{"type":"integer","title":"N Lists"}},"type":"object","required":["n_lists"],"title":"IndexArgsIVFFlat","description":"A class for arguments that can optionally be supplied to the index creation\nmethod when building an IVFFlat type index.\n\nAttributes:\n nlist (int): The number of IVF centroids that the index should use"},"IndexMeasure":{"type":"string","enum":["l2_distance","max_inner_product","cosine_distance","l1_distance","hamming_distance","jaccard_distance"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IndexMethod":{"type":"string","enum":["auto","ivfflat","hnsw"],"title":"IndexMethod","description":"An enum representing the index methods available.\n\nThis class currently only supports the 'ivfflat' method but may\nexpand in the future.\n\nAttributes:\n auto (str): Automatically choose the best available index method.\n ivfflat (str): The ivfflat index method.\n hnsw (str): The hnsw index method."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCommunitiesResponse":{"properties":{"communities":{"items":{"$ref":"#/components/schemas/CommunityReport"},"type":"array","title":"Communities","description":"The list of communities in the graph for the collection."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of communities in the graph."}},"type":"object","required":["communities","total_entries"],"title":"KGCommunitiesResponse","description":"Response for knowledge graph communities.","example":{"communities":[{"collection_ids":["122fdf6a-e116-546b-a8f6-e4cb2e2c0a09"],"community_number":1,"findings":["finding1","finding2"],"id":"1","level":0,"name":"community name","rating":"8","rating_explanation":"rating explanation","summary":"community summary"}],"total_count":1}},"KGCommunityResult":{"properties":{"name":{"type":"string","title":"Name"},"summary":{"type":"string","title":"Summary"},"rating":{"type":"number","title":"Rating"},"rating_explanation":{"type":"string","title":"Rating Explanation"},"findings":{"items":{"type":"string"},"type":"array","title":"Findings"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","summary","rating","rating_explanation","findings"],"title":"KGCommunityResult","findings":["Finding 1","Finding 2"],"metadata":{},"name":"Community Name","rating":9,"rating_explanation":"Rating Explanation","summary":"Community Summary"},"KGDeduplicationEstimationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user.","default":""},"num_entities":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Num Entities","description":"The number of entities in the collection."},"estimated_llm_calls":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Llm Calls","description":"The estimated number of LLM calls."},"estimated_total_in_out_tokens_in_millions":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total In Out Tokens In Millions","description":"The estimated total number of input and output tokens in millions."},"estimated_cost_in_usd":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Cost In Usd","description":"The estimated cost in USD."},"estimated_total_time_in_minutes":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Estimated Total Time In Minutes","description":"The estimated time in minutes."}},"type":"object","title":"KGDeduplicationEstimationResponse","description":"Response for knowledge graph deduplication estimation."},"KGEntitiesResponse":{"properties":{"entities":{"items":{"$ref":"#/components/schemas/Entity"},"type":"array","title":"Entities","description":"The list of entities in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of entities in the graph for the collection or document."}},"type":"object","required":["entities","total_entries"],"title":"KGEntitiesResponse","description":"Response for knowledge graph entities.","example":{"entities":[{"description":"Description 1","id":"1","name":"Entity 1"},{"description":"Description 2","id":"2","name":"Entity 2"}],"total_entries":2}},"KGEntityDeduplicationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"The message to display to the user."}},"type":"object","required":["message"],"title":"KGEntityDeduplicationResponse","description":"Response for knowledge graph entity deduplication.","example":{"message":"Entity deduplication queued successfully."}},"KGEntityResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGEntityResult","description":"Entity Description","metadata":{},"name":"Entity Name"},"KGGlobalResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGGlobalResult","description":"Global Result Description","metadata":{},"name":"Global Result Name"},"KGRelationshipResult":{"properties":{"name":{"type":"string","title":"Name"},"description":{"type":"string","title":"Description"},"metadata":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Metadata"}},"type":"object","required":["name","description"],"title":"KGRelationshipResult","description":"Relationship Description","metadata":{},"name":"Relationship Name"},"KGRunType":{"type":"string","enum":["estimate","run"],"title":"KGRunType","description":"Type of KG run."},"KGSearchMethod":{"type":"string","enum":["local"],"const":"local","title":"KGSearchMethod"},"KGSearchResult":{"properties":{"method":{"$ref":"#/components/schemas/KGSearchMethod"},"content":{"anyOf":[{"$ref":"#/components/schemas/KGEntityResult"},{"$ref":"#/components/schemas/KGRelationshipResult"},{"$ref":"#/components/schemas/KGCommunityResult"},{"$ref":"#/components/schemas/KGGlobalResult"}],"title":"Content"},"result_type":{"anyOf":[{"$ref":"#/components/schemas/KGSearchResultType"},{"type":"null"}]},"extraction_ids":{"anyOf":[{"items":{"type":"string","format":"uuid"},"type":"array"},{"type":"null"}],"title":"Extraction Ids"},"metadata":{"type":"object","title":"Metadata","default":{}}},"type":"object","required":["method","content"],"title":"KGSearchResult","content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"},"KGSearchResultType":{"type":"string","enum":["entity","relationship","community"],"title":"KGSearchResultType"},"KGSearchSettings":{"properties":{"entities_level":{"$ref":"#/components/schemas/EntityLevel","description":"The level of entities to search for","default":"document"},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"graphrag_map_system":{"type":"string","title":"Graphrag Map System","description":"The system prompt for the graphrag map prompt.","default":"graphrag_map_system"},"graphrag_reduce_system":{"type":"string","title":"Graphrag Reduce System","description":"The system prompt for the graphrag reduce prompt.","default":"graphrag_reduce_system"},"use_kg_search":{"type":"boolean","title":"Use Kg Search","description":"Whether to use KG search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","description":"KG search type","default":"local"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level","description":"KG search level"},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph search."},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"local","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"KGTriplesResponse":{"properties":{"triples":{"items":{"$ref":"#/components/schemas/Triple"},"type":"array","title":"Triples","description":"The list of triples in the graph."},"total_entries":{"type":"integer","title":"Total Entries","description":"The total number of triples in the graph for the collection or document."}},"type":"object","required":["triples","total_entries"],"title":"KGTriplesResponse","description":"Response for knowledge graph triples.","example":{"total_entries":2,"triples":[{"description":"Paris is the capital of France","object":"France","predicate":"is capital of","subject":"Paris"}]}},"KGTunePromptResponse":{"properties":{"tuned_prompt":{"type":"string","title":"Tuned Prompt","description":"The updated prompt."}},"type":"object","required":["tuned_prompt"],"title":"KGTunePromptResponse","description":"Response containing just the tuned prompt string.","example":{"tuned_prompt":"The updated prompt"}},"ListVectorIndicesResponse":{"properties":{"indices":{"items":{"type":"object"},"type":"array","title":"Indices"}},"type":"object","required":["indices"],"title":"ListVectorIndicesResponse"},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PaginatedResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[CollectionResponse]]"},"PaginatedResultsWrapper_list_ConversationOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/ConversationOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[ConversationOverviewResponse]]"},"PaginatedResultsWrapper_list_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentChunkResponse]]"},"PaginatedResultsWrapper_list_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[DocumentOverviewResponse]]"},"PaginatedResultsWrapper_list_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserOverviewResponse]]"},"PaginatedResultsWrapper_list_UserResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/shared__api__models__management__responses__UserResponse"},"type":"array","title":"Results"},"total_entries":{"type":"integer","title":"Total Entries"}},"type":"object","required":["results","total_entries"],"title":"PaginatedResultsWrapper[list[UserResponse]]"},"PromptResponse":{"properties":{"prompt_id":{"type":"string","format":"uuid","title":"Prompt Id"},"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["prompt_id","name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"},"conversation_id":{"type":"string","title":"Conversation Id","description":"The conversation ID for the RAG agent response"}},"type":"object","required":["messages","conversation_id"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"RawChunk":{"properties":{"text":{"type":"string","title":"Text"}},"type":"object","required":["text"],"title":"RawChunk"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_CollectionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CollectionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CollectionResponse]"},"ResultsWrapper_CreateVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/CreateVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[CreateVectorIndexResponse]"},"ResultsWrapper_DeleteVectorIndexResponse_":{"properties":{"results":{"$ref":"#/components/schemas/DeleteVectorIndexResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[DeleteVectorIndexResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_KGCommunitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCommunitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCommunitiesResponse]"},"ResultsWrapper_KGEntitiesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEntitiesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEntitiesResponse]"},"ResultsWrapper_KGTriplesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTriplesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTriplesResponse]"},"ResultsWrapper_KGTunePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGTunePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGTunePromptResponse]"},"ResultsWrapper_ListVectorIndicesResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ListVectorIndicesResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ListVectorIndicesResponse]"},"ResultsWrapper_NoneType_":{"properties":{"results":{"type":"null","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[NoneType]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_Union_KGEntityDeduplicationResponse__KGDeduplicationEstimationResponse__":{"properties":{"results":{"anyOf":[{"$ref":"#/components/schemas/KGEntityDeduplicationResponse"},{"$ref":"#/components/schemas/KGDeduplicationEstimationResponse"}],"title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[Union[KGEntityDeduplicationResponse, KGDeduplicationEstimationResponse]]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/shared__api__models__auth__responses__UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_CollectionOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionOverviewResponse]]"},"ResultsWrapper_list_CollectionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/CollectionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[CollectionResponse]]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_list_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[LogResponse]]"},"ResultsWrapper_list_Tuple_str__Message___":{"properties":{"results":{"items":{"prefixItems":[{"type":"string"},{"$ref":"#/components/schemas/Message"}],"type":"array","maxItems":2,"minItems":2},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[Tuple[str, Message]]]"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"content":{"description":"Entity Description","metadata":{},"name":"Entity Name"},"extraction_ids":["c68dc72e-fc23-5452-8f49-d7bd46088a96"],"metadata":{"associated_query":"What is the capital of France?"},"method":"local","result_type":"entity"}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"Triple":{"properties":{"id":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Id"},"subject":{"type":"string","title":"Subject"},"predicate":{"type":"string","title":"Predicate"},"object":{"type":"string","title":"Object"},"weight":{"anyOf":[{"type":"number"},{"type":"null"}],"title":"Weight","default":1.0},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"predicate_embedding":{"anyOf":[{"items":{"type":"number"},"type":"array"},{"type":"null"}],"title":"Predicate Embedding"},"extraction_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Extraction Ids","default":[]},"document_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Document Id"},"attributes":{"anyOf":[{"type":"object"},{"type":"string"}],"title":"Attributes","default":{}}},"type":"object","required":["subject","predicate","object"],"title":"Triple","description":"A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities."},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Alias for search_filters","deprecated":true},"search_filters":{"type":"object","title":"Search Filters","description":"Filters to apply to the vector search. Allowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.\n\n Commonly seen filters include operations include the following:\n\n `{\"document_id\": {\"$eq\": \"9fbe403b-...\"}}`\n\n `{\"document_id\": {\"$in\": [\"9fbe403b-...\", \"3e157b3a-...\"]}}`\n\n `{\"collection_ids\": {\"$overlap\": [\"122fdf6a-...\", \"...\"]}}`\n\n `{\"$and\": {\"$document_id\": ..., \"collection_ids\": ...}}`"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"offset":{"type":"integer","minimum":0.0,"title":"Offset","description":"Offset to paginate search results","default":0},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"type":"integer","title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"type":"integer","title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"$ref":"#/components/schemas/HybridSearchSettings","description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"type":"string","title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","limit":20,"offset":0,"probes":10,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true},"VectorTableName":{"type":"string","enum":["vectors","document_entity","collection_entity","community_report"],"title":"VectorTableName","description":"This enum represents the different tables where we store vectors."},"shared__api__models__auth__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-29T09:47:24.038501"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-29T09:47:24.038510"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"shared__api__models__management__responses__UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-10-29T09:47:24.055195"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-10-29T09:47:24.055198"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx index 0d8437edf..6ebc6a14c 100644 --- a/docs/cookbooks/graphrag.mdx +++ b/docs/cookbooks/graphrag.mdx @@ -99,9 +99,6 @@ excluded_parsers = ["mp4"] semantic_similarity_threshold = 0.7 generation_config = { model = "openai/gpt-4o-mini" } - [ingestion.extra_parsers] - pdf = "zerox" - [database] provider = "postgres" batch_size = 256 @@ -204,9 +201,6 @@ max_characters = 1_024 combine_under_n_chars = 128 overlap = 256 - [ingestion.extra_parsers] - pdf = "zerox" - [orchestration] provider = "hatchet" kg_creation_concurrency_lipmit = 32 diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json index 959185296..b9d95ef49 100644 --- a/js/sdk/package-lock.json +++ b/js/sdk/package-lock.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.11", + "version": "0.3.12", "lockfileVersion": 3, "requires": true, "packages": { diff --git a/js/sdk/package.json b/js/sdk/package.json index 734db3ea3..827425ed4 100644 --- a/js/sdk/package.json +++ b/js/sdk/package.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.11", + "version": "0.3.12", "description": "", "main": "dist/index.js", "browser": "dist/index.browser.js", diff --git a/py/cli/command_group.py b/py/cli/command_group.py index efa27d936..49c560f88 100644 --- a/py/cli/command_group.py +++ b/py/cli/command_group.py @@ -2,7 +2,7 @@ from asyncclick import pass_context from asyncclick.exceptions import Exit -from r2r import R2RAsyncClient +from sdk import R2RAsyncClient @click.group() diff --git a/py/cli/commands/ingestion.py b/py/cli/commands/ingestion.py index e6d09e88a..bd28823f4 100644 --- a/py/cli/commands/ingestion.py +++ b/py/cli/commands/ingestion.py @@ -11,7 +11,7 @@ from cli.command_group import cli from cli.utils.param_types import JSON from cli.utils.timer import timer -from core.base.abstractions import IndexMeasure, IndexMethod, VectorTableName +from shared.abstractions import IndexMeasure, IndexMethod, VectorTableName async def ingest_files_from_urls(client, urls): @@ -243,6 +243,7 @@ async def create_vector_index( index_measure, index_arguments, index_name, + index_column, no_concurrent, ): """Create a vector index for similarity search.""" @@ -254,6 +255,7 @@ async def create_vector_index( index_measure=index_measure, index_arguments=index_arguments, index_name=index_name, + index_column=index_column, concurrently=not no_concurrent, ) click.echo(json.dumps(response, indent=2)) diff --git a/py/core/__init__.py b/py/core/__init__.py index 9d8917895..504206166 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -134,6 +134,9 @@ # Crypto provider "CryptoConfig", "CryptoProvider", + # Email provider + "EmailConfig", + "EmailProvider", # Database providers "DatabaseConfig", "DatabaseProvider", @@ -192,9 +195,9 @@ "AudioParser", "DOCXParser", "ImageParser", - "PDFParser", + "VLMPDFParser", + "BasicPDFParser", "PDFParserUnstructured", - "PDFParserMarker", "PPTParser", # Structured parsers "CSVParser", @@ -233,6 +236,7 @@ # Embeddings "LiteLLMEmbeddingProvider", "OpenAIEmbeddingProvider", + "OllamaEmbeddingProvider", # LLM "OpenAICompletionProvider", "LiteLLMCompletionProvider", diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index 7cf762533..81a4cdbef 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -106,6 +106,9 @@ # Crypto provider "CryptoConfig", "CryptoProvider", + # Email provider + "EmailConfig", + "EmailProvider", # Database providers "DatabaseConfig", "DatabaseProvider", diff --git a/py/core/base/parsers/base_parser.py b/py/core/base/parsers/base_parser.py index d0bc8633c..1de600404 100644 --- a/py/core/base/parsers/base_parser.py +++ b/py/core/base/parsers/base_parser.py @@ -3,14 +3,11 @@ from abc import ABC, abstractmethod from typing import AsyncGenerator, Generic, TypeVar -from ..abstractions import DataType - T = TypeVar("T") class AsyncParser(ABC, Generic[T]): + @abstractmethod - async def ingest( - self, data: T, **kwargs - ) -> AsyncGenerator[DataType, None]: + async def ingest(self, data: T, **kwargs) -> AsyncGenerator[str, None]: pass diff --git a/py/core/base/providers/__init__.py b/py/core/base/providers/__init__.py index 8234f5f35..37af2b8f3 100644 --- a/py/core/base/providers/__init__.py +++ b/py/core/base/providers/__init__.py @@ -16,6 +16,7 @@ UserHandler, VectorHandler, ) +from .email import EmailConfig, EmailProvider from .embedding import EmbeddingConfig, EmbeddingProvider from .ingestion import ChunkingStrategy, IngestionConfig, IngestionProvider from .llm import CompletionConfig, CompletionProvider @@ -36,6 +37,9 @@ # Crypto provider "CryptoConfig", "CryptoProvider", + # Email provider + "EmailConfig", + "EmailProvider", # Database providers "DatabaseConnectionManager", "DocumentHandler", diff --git a/py/core/base/providers/auth.py b/py/core/base/providers/auth.py index 37fc95b83..6ed1a9338 100644 --- a/py/core/base/providers/auth.py +++ b/py/core/base/providers/auth.py @@ -10,6 +10,8 @@ from ..api.models import UserResponse from .base import Provider, ProviderConfig from .crypto import CryptoProvider +from .database import DatabaseProvider +from .email import EmailProvider logger = logging.getLogger() @@ -33,8 +35,17 @@ def validate_config(self) -> None: class AuthProvider(Provider, ABC): security = HTTPBearer(auto_error=False) - - def __init__(self, config: AuthConfig, crypto_provider: CryptoProvider): + crypto_provider: CryptoProvider + email_provider: EmailProvider + database_provider: DatabaseProvider + + def __init__( + self, + config: AuthConfig, + crypto_provider: CryptoProvider, + database_provider: DatabaseProvider, + email_provider: EmailProvider, + ): if not isinstance(config, AuthConfig): raise ValueError( "AuthProvider must be initialized with an AuthConfig" @@ -43,6 +54,8 @@ def __init__(self, config: AuthConfig, crypto_provider: CryptoProvider): self.admin_email = config.default_admin_email self.admin_password = config.default_admin_password self.crypto_provider = crypto_provider + self.database_provider = database_provider + self.email_provider = email_provider super().__init__(config) self.config: AuthConfig = config # for type hinting diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 02dd4c87e..a035ea639 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -571,6 +571,7 @@ async def create_index( Union[IndexArgsIVFFlat, IndexArgsHNSW] ] = None, index_name: Optional[str] = None, + index_column: Optional[str] = None, concurrently: bool = True, ) -> None: pass @@ -1452,6 +1453,7 @@ async def create_index( Union[IndexArgsIVFFlat, IndexArgsHNSW] ] = None, index_name: Optional[str] = None, + index_column: Optional[str] = None, concurrently: bool = True, ) -> None: return await self.vector_handler.create_index( @@ -1460,6 +1462,7 @@ async def create_index( index_method, index_arguments, index_name, + index_column, concurrently, ) diff --git a/py/core/base/providers/email.py b/py/core/base/providers/email.py new file mode 100644 index 000000000..3aca8f5d8 --- /dev/null +++ b/py/core/base/providers/email.py @@ -0,0 +1,71 @@ +# email_provider.py +import logging +from abc import ABC, abstractmethod +from typing import Optional + +from .base import Provider, ProviderConfig + + +class EmailConfig(ProviderConfig): + smtp_server: Optional[str] = None + smtp_port: Optional[int] = None + smtp_username: Optional[str] = None + smtp_password: Optional[str] = None + from_email: Optional[str] = None + use_tls: Optional[bool] = True + + @property + def supported_providers(self) -> list[str]: + return [ + "smtp", + "console", + ] # Could add more providers like AWS SES, SendGrid etc. + + def validate_config(self) -> None: + pass + # if self.provider == "smtp": + # if not all( + # [ + # self.smtp_server, + # self.smtp_port, + # self.smtp_username, + # self.smtp_password, + # self.from_email, + # ] + # ): + # raise ValueError("SMTP configuration is incomplete") + + +logger = logging.getLogger(__name__) + + +class EmailProvider(Provider, ABC): + def __init__(self, config: EmailConfig): + if not isinstance(config, EmailConfig): + raise ValueError( + "EmailProvider must be initialized with an EmailConfig" + ) + super().__init__(config) + self.config: EmailConfig = config # for type hinting + + @abstractmethod + async def send_email( + self, + to_email: str, + subject: str, + body: str, + html_body: Optional[str] = None, + ) -> None: + pass + + @abstractmethod + async def send_verification_email( + self, to_email: str, verification_code: str + ) -> None: + pass + + @abstractmethod + async def send_password_reset_email( + self, to_email: str, reset_token: str + ) -> None: + pass diff --git a/py/core/base/providers/embedding.py b/py/core/base/providers/embedding.py index 091408cea..66f686011 100644 --- a/py/core/base/providers/embedding.py +++ b/py/core/base/providers/embedding.py @@ -1,5 +1,6 @@ import asyncio import logging +import random import time from abc import abstractmethod from enum import Enum @@ -43,7 +44,7 @@ def validate_config(self) -> None: @property def supported_providers(self) -> list[str]: - return ["litellm", "openai"] + return ["litellm", "openai", "ollama"] class EmbeddingProvider(Provider): @@ -79,7 +80,7 @@ async def _execute_with_backoff_async(self, task: dict[str, Any]): retries += 1 if retries == self.config.max_retries: raise - await asyncio.sleep(backoff) + await asyncio.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) def _execute_with_backoff_sync(self, task: dict[str, Any]): @@ -97,7 +98,7 @@ def _execute_with_backoff_sync(self, task: dict[str, Any]): retries += 1 if retries == self.config.max_retries: raise - time.sleep(backoff) + time.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) @abstractmethod diff --git a/py/core/base/providers/ingestion.py b/py/core/base/providers/ingestion.py index 6e80b51e8..865757326 100644 --- a/py/core/base/providers/ingestion.py +++ b/py/core/base/providers/ingestion.py @@ -1,10 +1,13 @@ import logging from abc import ABC from enum import Enum +from typing import Optional from core.base.abstractions import ChunkEnrichmentSettings from .base import Provider, ProviderConfig +from .database import DatabaseProvider +from .llm import CompletionProvider logger = logging.getLogger() @@ -17,6 +20,14 @@ class IngestionConfig(ProviderConfig): ) extra_parsers: dict[str, str] = {} + audio_transcription_model: str = "openai/whisper-1" + + vision_img_prompt_name: str = "vision_img" + vision_img_model: str = "openai/gpt-4-mini" + + vision_pdf_prompt_name: str = "vision_pdf" + vision_pdf_model: str = "openai/gpt-4-mini" + @property def supported_providers(self) -> list[str]: return ["r2r", "unstructured_local", "unstructured_api"] @@ -27,7 +38,21 @@ def validate_config(self) -> None: class IngestionProvider(Provider, ABC): - pass + + config: IngestionConfig + database_provider: DatabaseProvider + llm_provider: CompletionProvider + + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + super().__init__(config) + self.config: IngestionConfig = config + self.llm_provider = llm_provider + self.database_provider = database_provider class ChunkingStrategy(str, Enum): diff --git a/py/core/base/providers/llm.py b/py/core/base/providers/llm.py index 41cb00a21..66beede8f 100644 --- a/py/core/base/providers/llm.py +++ b/py/core/base/providers/llm.py @@ -1,5 +1,6 @@ import asyncio import logging +import random import time from abc import abstractmethod from concurrent.futures import ThreadPoolExecutor @@ -67,7 +68,7 @@ async def _execute_with_backoff_async(self, task: dict[str, Any]): retries += 1 if retries == self.config.max_retries: raise - await asyncio.sleep(backoff) + await asyncio.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) async def _execute_with_backoff_async_stream( @@ -90,7 +91,7 @@ async def _execute_with_backoff_async_stream( retries += 1 if retries == self.config.max_retries: raise - await asyncio.sleep(backoff) + await asyncio.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) def _execute_with_backoff_sync(self, task: dict[str, Any]): @@ -106,7 +107,7 @@ def _execute_with_backoff_sync(self, task: dict[str, Any]): retries += 1 if retries == self.config.max_retries: raise - time.sleep(backoff) + time.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) def _execute_with_backoff_sync_stream( @@ -125,7 +126,7 @@ def _execute_with_backoff_sync_stream( retries += 1 if retries == self.config.max_retries: raise - time.sleep(backoff) + time.sleep(random.uniform(0, backoff)) backoff = min(backoff * 2, self.config.max_backoff) @abstractmethod @@ -147,6 +148,8 @@ async def aget_completion( "generation_config": generation_config, "kwargs": kwargs, } + if modalities := kwargs.get("modalities"): + task["modalities"] = modalities response = await self._execute_with_backoff_async(task) return LLMChatCompletion(**response.dict()) diff --git a/py/core/configs/full.toml b/py/core/configs/full.toml index 3d397527e..daa7d3e4f 100644 --- a/py/core/configs/full.toml +++ b/py/core/configs/full.toml @@ -6,9 +6,8 @@ new_after_n_chars = 512 max_characters = 1_024 combine_under_n_chars = 128 overlap = 256 - - [ingestion.extra_parsers] - pdf = "zerox" + [ingestion.extra_parsers] + pdf = "basic" [orchestration] provider = "hatchet" diff --git a/py/core/configs/full_local_llm.toml b/py/core/configs/full_local_llm.toml index 1414b51f4..49fdc3eb8 100644 --- a/py/core/configs/full_local_llm.toml +++ b/py/core/configs/full_local_llm.toml @@ -33,6 +33,11 @@ new_after_n_chars = 512 max_characters = 1_024 combine_under_n_chars = 128 overlap = 20 +vision_img_model = "ollama/llama3.2-vision" +vision_pdf_model = "ollama/llama3.2-vision" + [ingestion.extra_parsers] + pdf = "basic" + [orchestration] provider = "hatchet" diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml index d16bbe951..7f372680b 100644 --- a/py/core/configs/local_llm.toml +++ b/py/core/configs/local_llm.toml @@ -27,3 +27,10 @@ concurrent_request_limit = 2 [orchestration] provider = "simple" + + +[ingestion] +vision_img_model = "ollama/llama3.2-vision" +vision_pdf_model = "ollama/llama3.2-vision" + [ingestion.extra_parsers] + pdf = "basic" diff --git a/py/core/configs/r2r_aws_bedrock.toml b/py/core/configs/r2r_aws_bedrock.toml deleted file mode 100644 index 148bfdc0e..000000000 --- a/py/core/configs/r2r_aws_bedrock.toml +++ /dev/null @@ -1,62 +0,0 @@ -[auth] -provider = "r2r" -access_token_lifetime_in_minutes = 60 -refresh_token_lifetime_in_days = 7 -require_authentication = false -require_email_verification = false -default_admin_email = "admin@example.com" -default_admin_password = "change_me_immediately" - -[ingestion] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "by_title" -new_after_n_chars = 512 -max_characters = 1_024 -combine_under_n_chars = 128 -overlap = 20 - -[completion] -provider = "litellm" -concurrent_request_limit = 256 - - [completion.generation_config] - model = "bedrock/anthropic.claude-v2" - - -[crypto] -provider = "bcrypt" - -[database] -provider = "postgres" - -[embedding] -provider = "litellm" -base_model = "amazon.titan-embed-text-v1" -base_dimension = 512 -batch_size = 128 -add_title_as_prefix = false -[database] -provider = "None" - -[logging] -provider = "local" -log_table = "logs" -log_info_table = "log_info" - -[parsing] -provider = "unstructured_local" -excluded_parsers = ["mp4"] - -[prompt] -provider = "r2r" - -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] - -[file] -provider = "postgres" - -[orchestration] -provider = "hatchet" diff --git a/py/core/examples/scripts/run_auth_workflow.py b/py/core/examples/scripts/run_auth_workflow.py index 53841769b..df9d09d0e 100644 --- a/py/core/examples/scripts/run_auth_workflow.py +++ b/py/core/examples/scripts/run_auth_workflow.py @@ -6,7 +6,7 @@ ) # Replace with your R2R deployment URL # Register a new user - user_result = client.register("user11123@test.com", "password123") + user_result = client.register("owen@sciphi.ai", "password123") print(user_result) # # Uncomment when running with authentication diff --git a/py/core/examples/scripts/run_ingest_with_zerox.py b/py/core/examples/scripts/run_ingest_with_zerox.py deleted file mode 100644 index 41aba6adf..000000000 --- a/py/core/examples/scripts/run_ingest_with_zerox.py +++ /dev/null @@ -1,15 +0,0 @@ -import os -import time - -from r2r import R2RClient - -if __name__ == "__main__": - client = R2RClient(base_url="http://localhost:7272") - script_path = os.path.dirname(__file__) - sample_file = os.path.join(script_path, "..", "data", "graphrag.pdf") - - ingest_response = client.ingest_files( - file_paths=[sample_file], - ingestion_config={"parser_overrides": {"pdf": "zerox"}}, - ) - time.sleep(60) diff --git a/py/core/main/abstractions.py b/py/core/main/abstractions.py index 63bba5cd1..4a7b71f28 100644 --- a/py/core/main/abstractions.py +++ b/py/core/main/abstractions.py @@ -6,9 +6,12 @@ from core.base.pipes import AsyncPipe from core.pipelines import RAGPipeline, SearchPipeline from core.providers import ( + AsyncSMTPEmailProvider, + ConsoleMockEmailProvider, HatchetOrchestrationProvider, LiteLLMCompletionProvider, LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, OpenAICompletionProvider, OpenAIEmbeddingProvider, PostgresDBProvider, @@ -25,12 +28,17 @@ class R2RProviders(BaseModel): auth: Union[R2RAuthProvider, SupabaseAuthProvider] database: PostgresDBProvider ingestion: Union[R2RIngestionProvider, UnstructuredIngestionProvider] - embedding: Union[LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider] + embedding: Union[ + LiteLLMEmbeddingProvider, + OpenAIEmbeddingProvider, + OllamaEmbeddingProvider, + ] llm: Union[LiteLLMCompletionProvider, OpenAICompletionProvider] orchestration: Union[ HatchetOrchestrationProvider, SimpleOrchestrationProvider ] logging: SqlitePersistentLoggingProvider + email: Union[AsyncSMTPEmailProvider, ConsoleMockEmailProvider] class Config: arbitrary_types_allowed = True diff --git a/py/core/main/api/data/ingestion_router_openapi.yml b/py/core/main/api/data/ingestion_router_openapi.yml index 283ad58da..d494ca1f6 100644 --- a/py/core/main/api/data/ingestion_router_openapi.yml +++ b/py/core/main/api/data/ingestion_router_openapi.yml @@ -172,6 +172,7 @@ create_vector_index: index_method: "The indexing method to use. Options: hnsw, ivfflat, auto. Default: hnsw" index_measure: "Distance measure for vector comparisons. Options: cosine_distance, l2_distance, max_inner_product. Default: cosine_distance" index_name: "Optional custom name for the index. If not provided, one will be auto-generated" + index_column: "The column containing the vectors to index. Default: `vec`, or `vec_binary` when using hamming or jaccard distance." index_arguments: "Configuration parameters for the chosen index method. For HNSW: {m: int, ef_construction: int}. For IVFFlat: {n_lists: int}" concurrently: "Whether to create the index concurrently. Default: true" diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 76f7b8ae4..3a90e4c81 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -138,11 +138,11 @@ async def ingest_files_app( auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedIngestionResponse: # type: ignore """ - Ingest files into the system. + Ingests files into R2R, resulting in stored `Document` objects. Each document has corresponding `Chunk` objects which are used in vector indexing and search. - This endpoint supports multipart/form-data requests, enabling you to ingest files and their associated metadatas into R2R. + This endpoint supports multipart/form-data requests. - A valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. More expansive collection permissioning is under development. + A valid user authentication token is required to access this endpoint, as regular users can only ingest files for their own access. """ # Check if the user is a superuser if not auth_user.is_superuser: @@ -257,11 +257,11 @@ async def update_files_app( auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedUpdateResponse: """ - Update existing files in the system. + Ingests updated files into R2R, updating the corresponding `Document` and `Chunk` objects from previous ingestion. This endpoint supports multipart/form-data requests, enabling you to update files and their associated metadatas into R2R. - A valid user authentication token is required to access this endpoint, as regular users can only update their own files. More expansive collection permissioning is under development. + A valid user authentication token is required to access this endpoint, as regular users can only update their own files. """ if not auth_user.is_superuser: for metadata in metadatas or []: @@ -366,11 +366,9 @@ async def ingest_chunks_app( auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedIngestionResponse: """ - Ingest text chunks into the system. + Ingests `Chunk` objects into the system as raw text and associated metadata. - This endpoint supports multipart/form-data requests, enabling you to ingest pre-parsed text chunks into R2R. - - A valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. More expansive collection permissioning is under development. + A valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. """ if document_id: try: @@ -437,6 +435,12 @@ async def update_chunk_app( run_with_orchestration: Optional[bool] = Body(True), auth_user=Depends(self.service.providers.auth.auth_wrapper), ) -> WrappedUpdateResponse: + """ + Updates a previously ingested `Chunk` object into the system as raw text and associated metadata. + + A valid user authentication token is required to access this endpoint, as regular users can only ingest chunks for their own access. + """ + try: workflow_input = { "document_id": str(document_id), @@ -509,6 +513,10 @@ async def create_vector_index_app( None, description=create_vector_descriptions.get("index_name"), ), + index_column: Optional[str] = Body( + None, + description=create_vector_descriptions.get("index_column"), + ), concurrently: bool = Body( default=True, description=create_vector_descriptions.get("concurrently"), @@ -532,6 +540,7 @@ async def create_vector_index_app( "index_method": index_method, "index_measure": index_measure, "index_name": index_name, + "index_column": index_column, "index_arguments": index_arguments, "concurrently": concurrently, }, diff --git a/py/core/main/api/kg_router.py b/py/core/main/api/kg_router.py index 1014cbffc..2d1f02617 100644 --- a/py/core/main/api/kg_router.py +++ b/py/core/main/api/kg_router.py @@ -102,7 +102,7 @@ async def create_graph( description="Settings for the graph creation process.", ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGCreationResponse: # type: ignore + ): # -> WrappedKGCreationResponse: # type: ignore """ Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system. @@ -170,7 +170,7 @@ async def enrich_graph( description="Settings for the graph enrichment process.", ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGEnrichmentResponse: + ): # -> WrappedKGEnrichmentResponse: """ This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. diff --git a/py/core/main/app.py b/py/core/main/app.py index 287c26238..5fc6ec16c 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -1,12 +1,11 @@ from typing import Union from fastapi import FastAPI, Request -from fastapi.responses import JSONResponse from fastapi.middleware.cors import CORSMiddleware from fastapi.openapi.utils import get_openapi +from fastapi.responses import JSONResponse from core.base import R2RException - from core.providers import ( HatchetOrchestrationProvider, SimpleOrchestrationProvider, diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index fede0f34d..8ba3efc76 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -10,6 +10,7 @@ CompletionProvider, CryptoConfig, DatabaseConfig, + EmailConfig, EmbeddingConfig, EmbeddingProvider, IngestionConfig, @@ -24,11 +25,14 @@ logger = logging.getLogger() from core.providers import ( + AsyncSMTPEmailProvider, BCryptConfig, BCryptProvider, + ConsoleMockEmailProvider, HatchetOrchestrationProvider, LiteLLMCompletionProvider, LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, OpenAICompletionProvider, OpenAIEmbeddingProvider, PostgresDBProvider, @@ -49,21 +53,24 @@ def __init__(self, config: R2RConfig): @staticmethod async def create_auth_provider( auth_config: AuthConfig, - database_provider: PostgresDBProvider, crypto_provider: BCryptProvider, + database_provider: PostgresDBProvider, + email_provider: Union[ + AsyncSMTPEmailProvider, ConsoleMockEmailProvider + ], *args, **kwargs, ) -> Union[R2RAuthProvider, SupabaseAuthProvider]: if auth_config.provider == "r2r": r2r_auth = R2RAuthProvider( - auth_config, crypto_provider, database_provider + auth_config, crypto_provider, database_provider, email_provider ) await r2r_auth.initialize() return r2r_auth elif auth_config.provider == "supabase": return SupabaseAuthProvider( - auth_config, crypto_provider, database_provider + auth_config, crypto_provider, database_provider, email_provider ) else: raise ValueError( @@ -83,7 +90,13 @@ def create_crypto_provider( @staticmethod def create_ingestion_provider( - ingestion_config: IngestionConfig, *args, **kwargs + ingestion_config: IngestionConfig, + database_provider: PostgresDBProvider, + llm_provider: Union[ + LiteLLMCompletionProvider, OpenAICompletionProvider + ], + *args, + **kwargs, ) -> Union[R2RIngestionProvider, UnstructuredIngestionProvider]: config_dict = ( @@ -98,7 +111,9 @@ def create_ingestion_provider( r2r_ingestion_config = R2RIngestionConfig( **config_dict, **extra_fields ) - return R2RIngestionProvider(r2r_ingestion_config) + return R2RIngestionProvider( + r2r_ingestion_config, database_provider, llm_provider + ) elif config_dict["provider"] in [ "unstructured_local", "unstructured_api", @@ -108,7 +123,7 @@ def create_ingestion_provider( ) return UnstructuredIngestionProvider( - unstructured_ingestion_config, + unstructured_ingestion_config, database_provider, llm_provider ) else: raise ValueError( @@ -167,7 +182,11 @@ async def create_database_provider( @staticmethod def create_embedding_provider( embedding: EmbeddingConfig, *args, **kwargs - ) -> Union[LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider]: + ) -> Union[ + LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, + OpenAIEmbeddingProvider, + ]: embedding_provider: Optional[EmbeddingProvider] = None if embedding.provider == "openai": @@ -184,6 +203,11 @@ def create_embedding_provider( embedding_provider = LiteLLMEmbeddingProvider(embedding) + elif embedding.provider == "ollama": + from core.providers import OllamaEmbeddingProvider + + embedding_provider = OllamaEmbeddingProvider(embedding) + else: raise ValueError( f"Embedding provider {embedding.provider} not supported" @@ -208,6 +232,25 @@ def create_llm_provider( raise ValueError("Language model provider not found") return llm_provider + @staticmethod + async def create_email_provider( + email_config: Optional[EmailConfig] = None, *args, **kwargs + ) -> Union[AsyncSMTPEmailProvider, ConsoleMockEmailProvider]: + """Creates an email provider based on configuration.""" + if not email_config: + raise ValueError( + f"No email configuration provided for email provider, please add `[email]` to your `r2r.toml`." + ) + + if email_config.provider == "smtp": + return AsyncSMTPEmailProvider(email_config) + elif email_config.provider == "console_mock": + return ConsoleMockEmailProvider(email_config) + else: + raise ValueError( + f"Email provider {email_config.provider} not supported." + ) + async def create_providers( self, auth_provider_override: Optional[ @@ -215,8 +258,15 @@ async def create_providers( ] = None, crypto_provider_override: Optional[BCryptProvider] = None, database_provider_override: Optional[PostgresDBProvider] = None, + email_provider_override: Optional[ + Union[AsyncSMTPEmailProvider, ConsoleMockEmailProvider] + ] = None, embedding_provider_override: Optional[ - Union[LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider] + Union[ + LiteLLMEmbeddingProvider, + OpenAIEmbeddingProvider, + OllamaEmbeddingProvider, + ] ] = None, ingestion_provider_override: Optional[ Union[R2RIngestionProvider, UnstructuredIngestionProvider] @@ -237,21 +287,14 @@ async def create_providers( self.config.embedding, *args, **kwargs ) ) - ingestion_provider = ( - ingestion_provider_override - or self.create_ingestion_provider( - self.config.ingestion, *args, **kwargs - ) - ) - llm_provider = llm_provider_override or self.create_llm_provider( self.config.completion, *args, **kwargs ) + crypto_provider = ( crypto_provider_override or self.create_crypto_provider(self.config.crypto, *args, **kwargs) ) - database_provider = ( database_provider_override or await self.create_database_provider( @@ -259,12 +302,31 @@ async def create_providers( ) ) + ingestion_provider = ( + ingestion_provider_override + or self.create_ingestion_provider( + self.config.ingestion, + database_provider, + llm_provider, + *args, + **kwargs, + ) + ) + + email_provider = ( + email_provider_override + or await self.create_email_provider( + self.config.email, crypto_provider, *args, **kwargs + ) + ) + auth_provider = ( auth_provider_override or await self.create_auth_provider( self.config.auth, - database_provider, crypto_provider, + database_provider, + email_provider, *args, **kwargs, ) @@ -287,6 +349,7 @@ async def create_providers( embedding=embedding_provider, ingestion=ingestion_provider, llm=llm_provider, + email=email_provider, orchestration=orchestration_provider, logging=logging_provider, ) diff --git a/py/core/main/config.py b/py/core/main/config.py index 8853b476a..4b914d6da 100644 --- a/py/core/main/config.py +++ b/py/core/main/config.py @@ -1,7 +1,6 @@ import logging import os from enum import Enum -from pathlib import Path from typing import Any, Optional import toml @@ -14,6 +13,7 @@ from ..base.providers.auth import AuthConfig from ..base.providers.crypto import CryptoConfig from ..base.providers.database import DatabaseConfig +from ..base.providers.email import EmailConfig from ..base.providers.embedding import EmbeddingConfig from ..base.providers.ingestion import IngestionConfig from ..base.providers.llm import CompletionConfig @@ -41,6 +41,7 @@ class R2RConfig: "app": [], "completion": ["provider"], "crypto": ["provider"], + "email": ["provider"], "auth": ["provider"], "embedding": [ "provider", @@ -63,6 +64,7 @@ class R2RConfig: crypto: CryptoConfig database: DatabaseConfig embedding: EmbeddingConfig + email: EmailConfig ingestion: IngestionConfig logging: PersistentLoggingConfig agent: AgentConfig @@ -113,6 +115,7 @@ def __init__(self, config_data: dict[str, Any]): self.auth = AuthConfig.create(**self.auth, app=self.app) # type: ignore self.completion = CompletionConfig.create(**self.completion, app=self.app) # type: ignore self.crypto = CryptoConfig.create(**self.crypto, app=self.app) # type: ignore + self.email = EmailConfig.create(**self.email, app=self.app) # type: ignore self.database = DatabaseConfig.create(**self.database, app=self.app) # type: ignore self.embedding = EmbeddingConfig.create(**self.embedding, app=self.app) # type: ignore self.ingestion = IngestionConfig.create(**self.ingestion, app=self.app) # type: ignore diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index ab222737e..72637b1c0 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -660,6 +660,7 @@ def parse_create_vector_index_input(data: dict) -> dict: "index_method": IndexMethod(data["index_method"]), "index_measure": IndexMeasure(data["index_measure"]), "index_name": data["index_name"], + "index_column": data["index_column"], "index_arguments": data["index_arguments"], "concurrently": data["concurrently"], } diff --git a/py/core/parsers/__init__.py b/py/core/parsers/__init__.py index 0439c320b..2915f1ab2 100644 --- a/py/core/parsers/__init__.py +++ b/py/core/parsers/__init__.py @@ -7,9 +7,10 @@ "AudioParser", "DOCXParser", "ImageParser", - "PDFParser", + "VLMPDFParser", + "BasicPDFParser", "PDFParserUnstructured", - "PDFParserMarker", + "VLMPDFParser", "PPTParser", # Structured parsers "CSVParser", diff --git a/py/core/parsers/media/__init__.py b/py/core/parsers/media/__init__.py index 71c9bf0df..38881e171 100644 --- a/py/core/parsers/media/__init__.py +++ b/py/core/parsers/media/__init__.py @@ -2,10 +2,9 @@ from .docx_parser import DOCXParser from .img_parser import ImageParser from .pdf_parser import ( # type: ignore - PDFParser, - PDFParserMarker, + BasicPDFParser, PDFParserUnstructured, - ZeroxPDFParser, + VLMPDFParser, ) from .ppt_parser import PPTParser @@ -13,9 +12,8 @@ "AudioParser", "DOCXParser", "ImageParser", - "PDFParser", + "VLMPDFParser", + "BasicPDFParser", "PDFParserUnstructured", - "ZeroxPDFParser", - "PDFParserMarker", "PPTParser", ] diff --git a/py/core/parsers/media/audio_parser.py b/py/core/parsers/media/audio_parser.py index a8026c1af..1d4421d37 100644 --- a/py/core/parsers/media/audio_parser.py +++ b/py/core/parsers/media/audio_parser.py @@ -1,35 +1,81 @@ +import base64 +import logging import os +import tempfile from typing import AsyncGenerator from core.base.parsers.base_parser import AsyncParser -from core.parsers.media.openai_helpers import process_audio_with_openai +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) + +logger = logging.getLogger() class AudioParser(AsyncParser[bytes]): - """A parser for audio data.""" + """A parser for audio data using Whisper transcription.""" def __init__( - self, api_base: str = "https://api.openai.com/v1/audio/transcriptions" + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, ): - self.api_base = api_base - self.openai_api_key = os.environ.get("OPENAI_API_KEY") + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + try: + from litellm import atranscription + + self.atranscription = atranscription + except ImportError: + logger.error("Failed to import LiteLLM transcription") + raise ImportError( + "Please install the `litellm` package to use the AudioParser." + ) async def ingest( # type: ignore - self, data: bytes, chunk_size: int = 1024, *args, **kwargs + self, data: bytes, **kwargs ) -> AsyncGenerator[str, None]: - """Ingest audio data and yield a transcription.""" - temp_audio_path = "temp_audio.wav" - with open(temp_audio_path, "wb") as f: - f.write(data) + """ + Ingest audio data and yield a transcription using Whisper via LiteLLM. + + Args: + data: Raw audio bytes + chunk_size: Size of text chunks to yield + model: The model to use for transcription (default is whisper-1) + *args, **kwargs: Additional arguments passed to the transcription call + + Yields: + Chunks of transcribed text + """ try: - transcription_text = process_audio_with_openai( - open(temp_audio_path, "rb"), self.openai_api_key # type: ignore + # Create a temporary file to store the audio data + with tempfile.NamedTemporaryFile( + suffix=".wav", delete=False + ) as temp_file: + temp_file.write(data) + temp_file_path = temp_file.name + + # Call Whisper transcription + response = await self.atranscription( + model=self.config.audio_transcription_model, + file=open(temp_file_path, "rb"), + **kwargs, ) - # split text into small chunks and yield them - for i in range(0, len(transcription_text), chunk_size): - text = transcription_text[i : i + chunk_size] - if text and text != "": - yield text + # The response should contain the transcribed text directly + yield response.text + + except Exception as e: + logger.error(f"Error processing audio with Whisper: {str(e)}") + raise + finally: - os.remove(temp_audio_path) + # Clean up the temporary file + try: + os.unlink(temp_file_path) + except Exception as e: + logger.warning(f"Failed to delete temporary file: {str(e)}") diff --git a/py/core/parsers/media/docx_parser.py b/py/core/parsers/media/docx_parser.py index 21272e1b2..86c242115 100644 --- a/py/core/parsers/media/docx_parser.py +++ b/py/core/parsers/media/docx_parser.py @@ -3,12 +3,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class DOCXParser(AsyncParser[DataType]): """A parser for DOCX data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + try: from docx import Document diff --git a/py/core/parsers/media/img_parser.py b/py/core/parsers/media/img_parser.py index 206c9160f..719440a4c 100644 --- a/py/core/parsers/media/img_parser.py +++ b/py/core/parsers/media/img_parser.py @@ -1,50 +1,109 @@ import base64 import logging -import os from typing import AsyncGenerator -from core.base.abstractions import DataType +from core.base.abstractions import DataType, GenerationConfig from core.base.parsers.base_parser import AsyncParser -from core.parsers.media.openai_helpers import process_frame_with_openai +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) logger = logging.getLogger() class ImageParser(AsyncParser[DataType]): - """A parser for image data.""" + """A parser for image data using vision models.""" def __init__( self, - model: str = "gpt-4o-mini", - max_tokens: int = 2_048, - api_base: str = "https://api.openai.com/v1/chat/completions", - max_image_size: int = 1 * 1024 * 1024, # 4MB limit + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, ): - self.model = model - self.max_tokens = max_tokens - self.openai_api_key = os.environ.get("OPENAI_API_KEY") - self.api_base = api_base - self.max_image_size = max_image_size + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + self.vision_prompt_text = None + + try: + from litellm import supports_vision + + self.supports_vision = supports_vision + except ImportError: + logger.error("Failed to import LiteLLM vision support") + raise ImportError( + "Please install the `litellm` package to use the ImageParser." + ) async def ingest( # type: ignore - self, data: DataType, chunk_size: int = 1024, *args, **kwargs + self, data: DataType, **kwargs ) -> AsyncGenerator[str, None]: - """Ingest image data and yield a description.""" - - if isinstance(data, bytes): - # Encode to base64 - data = base64.b64encode(data).decode("utf-8") - - openai_text = process_frame_with_openai( - data, # type: ignore - self.openai_api_key, # type: ignore - self.model, - self.max_tokens, - self.api_base, - ) - - # split text into small chunks and yield them - for i in range(0, len(openai_text), chunk_size): - text = openai_text[i : i + chunk_size] - if text and text != "": - yield text + """ + Ingest image data and yield a description using vision model. + + Args: + data: Image data (bytes or base64 string) + chunk_size: Size of text chunks to yield + *args, **kwargs: Additional arguments passed to the completion call + + Yields: + Chunks of image description text + """ + if not self.vision_prompt_text: + self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore + prompt_name=self.config.vision_img_prompt_name + ) + try: + # Verify model supports vision + if not self.supports_vision(model=self.config.vision_img_model): + raise ValueError( + f"Model {self.config.vision_img_model} does not support vision" + ) + + # Encode image data if needed + if isinstance(data, bytes): + image_data = base64.b64encode(data).decode("utf-8") + else: + image_data = data + + # Configure the generation parameters + generation_config = GenerationConfig( + model=self.config.vision_img_model, + stream=False, + ) + + # Prepare message with image + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": self.vision_prompt_text}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_data}" + }, + }, + ], + } + ] + + # Get completion from LiteLLM provider + response = await self.llm_provider.aget_completion( + messages=messages, generation_config=generation_config + ) + + # Extract description from response + if response.choices and response.choices[0].message: + content = response.choices[0].message.content + if not content: + raise ValueError("No content in response") + yield content + else: + raise ValueError("No response content") + + except Exception as e: + logger.error(f"Error processing image with vision model: {str(e)}") + raise diff --git a/py/core/parsers/media/openai_helpers.py b/py/core/parsers/media/openai_helpers.py deleted file mode 100644 index 729426a63..000000000 --- a/py/core/parsers/media/openai_helpers.py +++ /dev/null @@ -1,63 +0,0 @@ -"""Implementations of parsers for different data types.""" - -import logging - -import requests - -logger = logging.getLogger() - - -def process_frame_with_openai( - data: bytes, - api_key: str, - model: str = "gpt-4o", - max_tokens: int = 2_048, - api_base: str = "https://api.openai.com/v1/chat/completions", -) -> str: - headers = { - "Content-Type": "application/json", - "Authorization": f"Bearer {api_key}", - } - - payload = { - "model": model, - "messages": [ - { - "role": "user", - "content": [ - { - "type": "text", - "text": "First, provide a title for the image, then explain everything that you see. Be very thorough in your analysis as a user will need to understand the image without seeing it. If it is possible to transcribe the image to text directly, then do so. The more detail you provide, the better the user will understand the image.", - }, - { - "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{data}"}, # type: ignore - }, - ], - } - ], - "max_tokens": max_tokens, - } - - response = requests.post(api_base, headers=headers, json=payload) - response_json = response.json() - return response_json["choices"][0]["message"]["content"] - - -def process_audio_with_openai( - audio_file, - api_key: str, - audio_api_base: str = "https://api.openai.com/v1/audio/transcriptions", -) -> str: - headers = {"Authorization": f"Bearer {api_key}"} - - transcription_response = requests.post( - audio_api_base, - headers=headers, - files={"file": audio_file}, - data={"model": "whisper-1"}, - ) - - transcription = transcription_response.json() - - return transcription["text"] diff --git a/py/core/parsers/media/pdf_parser.py b/py/core/parsers/media/pdf_parser.py index 2372c7fc8..976ed5168 100644 --- a/py/core/parsers/media/pdf_parser.py +++ b/py/core/parsers/media/pdf_parser.py @@ -1,5 +1,6 @@ # type: ignore import asyncio +import base64 import logging import os import string @@ -7,17 +8,216 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions import DataType +import aiofiles +from pdf2image import convert_from_path + +from core.base.abstractions import DataType, GenerationConfig from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) logger = logging.getLogger() -ZEROX_DEFAULT_MODEL = "openai/gpt-4o-mini" -class PDFParser(AsyncParser[DataType]): +class VLMPDFParser(AsyncParser[DataType]): + """A parser for PDF documents using vision models for page processing.""" + + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + self.vision_prompt_text = None + + try: + from litellm import supports_vision + + self.supports_vision = supports_vision + except ImportError: + logger.error("Failed to import LiteLLM vision support") + raise ImportError( + "Please install the `litellm` package to use the VLMPDFParser." + ) + + async def convert_pdf_to_images( + self, pdf_path: str, temp_dir: str + ) -> list[str]: + """Convert PDF pages to images asynchronously.""" + options = { + "pdf_path": pdf_path, + "output_folder": temp_dir, + "dpi": 300, # Configurable via config if needed + "fmt": "jpeg", + "thread_count": 4, + "paths_only": True, + } + try: + image_paths = await asyncio.to_thread(convert_from_path, **options) + return image_paths + except Exception as err: + logger.error(f"Error converting PDF to images: {err}") + raise + + async def process_page( + self, image_path: str, page_num: int + ) -> dict[str, str]: + """Process a single PDF page using the vision model.""" + + try: + # Read and encode image + async with aiofiles.open(image_path, "rb") as image_file: + image_data = await image_file.read() + image_base64 = base64.b64encode(image_data).decode("utf-8") + + # Verify model supports vision + if not self.supports_vision(model=self.config.vision_pdf_model): + raise ValueError( + f"Model {self.config.vision_pdf_model} does not support vision" + ) + + # Configure generation parameters + generation_config = GenerationConfig( + model=self.config.vision_pdf_model, + stream=False, + ) + + # Prepare message with image + messages = [ + { + "role": "user", + "content": [ + {"type": "text", "text": self.vision_prompt_text}, + { + "type": "image_url", + "image_url": { + "url": f"data:image/jpeg;base64,{image_base64}" + }, + }, + ], + } + ] + + # Get completion from LiteLLM provider + response = await self.llm_provider.aget_completion( + messages=messages, generation_config=generation_config + ) + + if response.choices and response.choices[0].message: + content = response.choices[0].message.content + if not content: + raise ValueError("No content in response") + return {"page": str(page_num), "content": content} + else: + raise ValueError("No response content") + + except Exception as e: + logger.error( + f"Error processing page {page_num} with vision model: {str(e)}" + ) + raise + + async def ingest( + self, data: DataType, maintain_order: bool = False, **kwargs + ) -> AsyncGenerator[dict[str, str], None]: + """ + Ingest PDF data and yield descriptions for each page using vision model. + + Args: + data: PDF file path or bytes + maintain_order: If True, yields results in page order. If False, yields as completed. + **kwargs: Additional arguments passed to the completion call + + Yields: + Dict containing page number and content for each processed page + """ + if not self.vision_prompt_text: + self.vision_prompt_text = await self.database_provider.get_prompt( # type: ignore + prompt_name=self.config.vision_pdf_prompt_name + ) + + temp_dir = None + try: + # Create temporary directory for image processing + temp_dir = os.path.join(os.getcwd(), "temp_pdf_images") + os.makedirs(temp_dir, exist_ok=True) + + # Handle both file path and bytes input + if isinstance(data, bytes): + pdf_path = os.path.join(temp_dir, "temp.pdf") + async with aiofiles.open(pdf_path, "wb") as f: + await f.write(data) + else: + pdf_path = data + + # Convert PDF to images + image_paths = await self.convert_pdf_to_images(pdf_path, temp_dir) + # Create tasks for all pages + tasks = { + asyncio.create_task( + self.process_page(image_path, page_num) + ): page_num + for page_num, image_path in enumerate(image_paths, 1) + } + + if maintain_order: + # Store results in order + pending = set(tasks.keys()) + results = {} + next_page = 1 + + while pending: + # Get next completed task + done, pending = await asyncio.wait( + pending, return_when=asyncio.FIRST_COMPLETED + ) + + # Process completed tasks + for task in done: + result = await task + page_num = int(result["page"]) + results[page_num] = result + + # Yield results in order + while next_page in results: + yield results.pop(next_page)["content"] + next_page += 1 + else: + # Yield results as they complete + for coro in asyncio.as_completed(tasks.keys()): + result = await coro + yield result["content"] + + except Exception as e: + logger.error(f"Error processing PDF: {str(e)}") + raise + + finally: + # Cleanup temporary files + if temp_dir and os.path.exists(temp_dir): + for file in os.listdir(temp_dir): + os.remove(os.path.join(temp_dir, file)) + os.rmdir(temp_dir) + + +class BasicPDFParser(AsyncParser[DataType]): """A parser for PDF data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config try: from pypdf import PdfReader @@ -65,54 +265,16 @@ async def ingest( yield page_text -class PDFParserSix(AsyncParser[DataType]): - """A parser for PDF data.""" - - def __init__(self): - try: - from pdfminer.high_level import extract_text_to_fp - from pdfminer.layout import LAParams - - self.extract_text_to_fp = extract_text_to_fp - self.LAParams = LAParams - except ImportError: - raise ValueError( - "Error, `pdfminer.six` is required to run `PDFParser`. Please install it using `pip install pdfminer.six`." - ) - - async def ingest(self, data: bytes, **kwargs) -> AsyncGenerator[str, None]: - """Ingest PDF data and yield text from each page.""" - if not isinstance(data, bytes): - raise ValueError("PDF data must be in bytes format.") - - pdf_file = BytesIO(data) - - async def process_page(page_number): - output = BytesIO() - await asyncio.to_thread( - self.extract_text_to_fp, - pdf_file, - output, - page_numbers=[page_number], - laparams=self.LAParams(), - ) - page_text = output.getvalue().decode("utf-8") - return "".join(filter(lambda x: x in string.printable, page_text)) - - from pdfminer.pdfdocument import PDFDocument - from pdfminer.pdfparser import PDFParser as pdfminer_PDFParser - - parser = pdfminer_PDFParser(pdf_file) - document = PDFDocument(parser) - - for page_number in range(len(list(document.get_pages()))): - page_text = await process_page(page_number) - if page_text: - yield page_text - - class PDFParserUnstructured(AsyncParser[DataType]): - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config try: from unstructured.partition.pdf import partition_pdf @@ -141,79 +303,3 @@ async def ingest( ) for element in elements: yield element.text - - -class PDFParserMarker(AsyncParser[DataType]): - model_refs = None - - def __init__(self): - try: - from marker.convert import convert_single_pdf - from marker.models import load_all_models - - self.convert_single_pdf = convert_single_pdf - if PDFParserMarker.model_refs is None: - PDFParserMarker.model_refs = load_all_models() - - except ImportError as e: - raise ValueError( - f"Error, marker is not installed {e}, please install using `pip install marker-pdf` " - ) - - async def ingest( - self, data: DataType, **kwargs - ) -> AsyncGenerator[str, None]: - if isinstance(data, str): - raise ValueError("PDF data must be in bytes format.") - - text, _, _ = self.convert_single_pdf( - BytesIO(data), PDFParserMarker.model_refs - ) - yield text - - -class ZeroxPDFParser(AsyncParser[DataType]): - """An advanced PDF parser using zerox.""" - - def __init__(self): - """ - Use the zerox library to parse PDF data. - - Args: - cleanup (bool, optional): Whether to clean up temporary files after processing. Defaults to True. - concurrency (int, optional): The number of concurrent processes to run. Defaults to 10. - file_data (Optional[str], optional): The file data to process. Defaults to an empty string. - maintain_format (bool, optional): Whether to maintain the format from the previous page. Defaults to False. - model (str, optional): The model to use for generating completions. Defaults to "gpt-4o-mini". Refer to LiteLLM Providers for the correct model name, as it may differ depending on the provider. - temp_dir (str, optional): The directory to store temporary files, defaults to some named folder in system's temp directory. If already exists, the contents will be deleted before zerox uses it. - custom_system_prompt (str, optional): The system prompt to use for the model, this overrides the default system prompt of zerox.Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning. Defaults to None. - kwargs (dict, optional): Additional keyword arguments to pass to the litellm.completion method. Refer to the LiteLLM Documentation and Completion Input for details. - - """ - try: - # from pyzerox import zerox - from .pyzerox import zerox - - self.zerox = zerox - - except ImportError as e: - raise ValueError( - f"Error, zerox installation failed with Error='{e}', please install through the R2R ingestion bundle with `pip install r2r -E ingestion-bundle` " - ) - - async def ingest( - self, data: DataType, **kwargs - ) -> AsyncGenerator[str, None]: - if isinstance(data, str): - raise ValueError("PDF data must be in bytes format.") - - model = kwargs.get("zerox_parsing_model", ZEROX_DEFAULT_MODEL) - model = model.split("/")[-1] # remove the provider prefix - result = await self.zerox( - file_data=data, - model=model, - verbose=True, - ) - - for page in result.pages: - yield page.content diff --git a/py/core/parsers/media/ppt_parser.py b/py/core/parsers/media/ppt_parser.py index 5f19a0171..6fa8f52e9 100644 --- a/py/core/parsers/media/ppt_parser.py +++ b/py/core/parsers/media/ppt_parser.py @@ -3,12 +3,25 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class PPTParser(AsyncParser[DataType]): """A parser for PPT data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config try: from pptx import Presentation diff --git a/py/core/parsers/media/pyzerox/__init__.py b/py/core/parsers/media/pyzerox/__init__.py deleted file mode 100644 index 18cd95ac3..000000000 --- a/py/core/parsers/media/pyzerox/__init__.py +++ /dev/null @@ -1,10 +0,0 @@ -from .constants.prompts import Prompts -from .zerox_core import zerox - -DEFAULT_SYSTEM_PROMPT = Prompts.DEFAULT_SYSTEM_PROMPT - -__all__ = [ - "zerox", - "Prompts", - "DEFAULT_SYSTEM_PROMPT", -] diff --git a/py/core/parsers/media/pyzerox/constants/__init__.py b/py/core/parsers/media/pyzerox/constants/__init__.py deleted file mode 100644 index 4378b38e0..000000000 --- a/py/core/parsers/media/pyzerox/constants/__init__.py +++ /dev/null @@ -1,9 +0,0 @@ -from .conversion import PDFConversionDefaultOptions -from .messages import Messages -from .prompts import Prompts - -__all__ = [ - "PDFConversionDefaultOptions", - "Messages", - "Prompts", -] diff --git a/py/core/parsers/media/pyzerox/constants/conversion.py b/py/core/parsers/media/pyzerox/constants/conversion.py deleted file mode 100644 index 4320e3484..000000000 --- a/py/core/parsers/media/pyzerox/constants/conversion.py +++ /dev/null @@ -1,8 +0,0 @@ -class PDFConversionDefaultOptions: - """Default options for converting PDFs to images""" - - DPI = 300 - FORMAT = "png" - SIZE = (None, 1056) - THREAD_COUNT = 4 - USE_PDFTOCAIRO = True diff --git a/py/core/parsers/media/pyzerox/constants/messages.py b/py/core/parsers/media/pyzerox/constants/messages.py deleted file mode 100644 index ffa3f68ec..000000000 --- a/py/core/parsers/media/pyzerox/constants/messages.py +++ /dev/null @@ -1,56 +0,0 @@ -class Messages: - """User-facing messages""" - - MISSING_ENVIRONMENT_VARIABLES = """ - Required environment variable (keys) from the model are Missing. Please set the required environment variables for the model provider. - Refer: https://docs.litellm.ai/docs/providers - """ - - NON_VISION_MODEL = """ - The provided model is not a vision model. Please provide a vision model. - """ - - MODEL_ACCESS_ERROR = """ - Your provided model can't be accessed. Please make sure you have access to the model and also required environment variables are setup correctly including valid api key(s). - Refer: https://docs.litellm.ai/docs/providers - """ - - CUSTOM_SYSTEM_PROMPT_WARNING = """ - Custom system prompt was provided which overrides the default system prompt. We assume that you know what you are doing. - """ - - MAINTAIN_FORMAT_SELECTED_PAGES_WARNING = """ - The maintain_format flag is set to True in conjunction with select_pages input given. This may result in unexpected behavior. - """ - - PAGE_NUMBER_OUT_OF_BOUND_ERROR = """ - The page number(s) provided is out of bound. Please provide a valid page number(s). - """ - - NON_200_RESPONSE = """ - Model API returned status code {status_code}: {data} - - Please check the litellm documentation for more information. https://docs.litellm.ai/docs/exception_mapping. - """ - - COMPLETION_ERROR = """ - Error in Completion Response. Error: {0} - Please check the status of your model provider API status. - """ - - PDF_CONVERSION_FAILED = """ - Error during PDF conversion: {0} - Please check the PDF file and try again. For more information: https://github.com/Belval/pdf2image - """ - - FILE_UNREACHAGBLE = """ - File not found or unreachable. Status Code: {0} - """ - - FILE_PATH_MISSING = """ - File path is invalid or missing. - """ - - FAILED_TO_SAVE_FILE = """Failed to save file to local drive""" - - FAILED_TO_PROCESS_IMAGE = """Failed to process image""" diff --git a/py/core/parsers/media/pyzerox/constants/patterns.py b/py/core/parsers/media/pyzerox/constants/patterns.py deleted file mode 100644 index 6be1a77e1..000000000 --- a/py/core/parsers/media/pyzerox/constants/patterns.py +++ /dev/null @@ -1,6 +0,0 @@ -class Patterns: - """Regex patterns for markdown and code blocks""" - - MATCH_MARKDOWN_BLOCKS = r"^```[a-z]*\n([\s\S]*?)\n```$" - - MATCH_CODE_BLOCKS = r"^```\n([\s\S]*?)\n```$" diff --git a/py/core/parsers/media/pyzerox/constants/prompts.py b/py/core/parsers/media/pyzerox/constants/prompts.py deleted file mode 100644 index a59680a37..000000000 --- a/py/core/parsers/media/pyzerox/constants/prompts.py +++ /dev/null @@ -1,8 +0,0 @@ -class Prompts: - """Class for storing prompts for the Zerox system.""" - - DEFAULT_SYSTEM_PROMPT = """ - Convert the following PDF page to markdown. - Return only the markdown with no explanation text. - Do not exclude any content from the page. - """ diff --git a/py/core/parsers/media/pyzerox/errors/__init__.py b/py/core/parsers/media/pyzerox/errors/__init__.py deleted file mode 100644 index 7fa7bedd3..000000000 --- a/py/core/parsers/media/pyzerox/errors/__init__.py +++ /dev/null @@ -1,21 +0,0 @@ -from .exceptions import ( - FailedToProcessFile, - FailedToSaveFile, - FileUnavailable, - MissingEnvironmentVariables, - ModelAccessError, - NotAVisionModel, - PageNumberOutOfBoundError, - ResourceUnreachableException, -) - -__all__ = [ - "NotAVisionModel", - "ModelAccessError", - "PageNumberOutOfBoundError", - "MissingEnvironmentVariables", - "ResourceUnreachableException", - "FileUnavailable", - "FailedToSaveFile", - "FailedToProcessFile", -] diff --git a/py/core/parsers/media/pyzerox/errors/base.py b/py/core/parsers/media/pyzerox/errors/base.py deleted file mode 100644 index f1e761141..000000000 --- a/py/core/parsers/media/pyzerox/errors/base.py +++ /dev/null @@ -1,21 +0,0 @@ -from typing import Optional - - -class CustomException(Exception): - """ - Base class for custom exceptions - """ - - def __init__( - self, - message: Optional[str] = None, - extra_info: Optional[dict] = None, - ): - self.message = message - self.extra_info = extra_info - super().__init__(self.message) - - def __str__(self): - if self.extra_info: - return f"{self.message} (Extra Info: {self.extra_info})" - return self.message diff --git a/py/core/parsers/media/pyzerox/errors/exceptions.py b/py/core/parsers/media/pyzerox/errors/exceptions.py deleted file mode 100644 index ee90873d6..000000000 --- a/py/core/parsers/media/pyzerox/errors/exceptions.py +++ /dev/null @@ -1,93 +0,0 @@ -from typing import Dict, Optional - -# Package Imports -from ..constants import Messages -from .base import CustomException - - -class MissingEnvironmentVariables(CustomException): - """Exception raised when the model provider environment variables, API key(s) are missing. Refer: https://docs.litellm.ai/docs/providers""" - - def __init__( - self, - message: str = Messages.MISSING_ENVIRONMENT_VARIABLES, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class NotAVisionModel(CustomException): - """Exception raised when the provided model is not a vision model.""" - - def __init__( - self, - message: str = Messages.NON_VISION_MODEL, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class ModelAccessError(CustomException): - """Exception raised when the provided model can't be accessed due to incorrect credentials/keys or incorrect environent variables setup.""" - - def __init__( - self, - message: str = Messages.MODEL_ACCESS_ERROR, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class PageNumberOutOfBoundError(CustomException): - """Exception invalid page number(s) provided.""" - - def __init__( - self, - message: str = Messages.PAGE_NUMBER_OUT_OF_BOUND_ERROR, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class ResourceUnreachableException(CustomException): - """Exception raised when a resource is unreachable.""" - - def __init__( - self, - message: str = Messages.FILE_UNREACHAGBLE, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class FileUnavailable(CustomException): - """Exception raised when a file is unavailable.""" - - def __init__( - self, - message: str = Messages.FILE_PATH_MISSING, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class FailedToSaveFile(CustomException): - """Exception raised when a file fails to save.""" - - def __init__( - self, - message: str = Messages.FAILED_TO_SAVE_FILE, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) - - -class FailedToProcessFile(CustomException): - """Exception raised when a file fails to process.""" - - def __init__( - self, - message: str = Messages.FAILED_TO_PROCESS_IMAGE, - extra_info: Optional[Dict] = None, - ): - super().__init__(message, extra_info) diff --git a/py/core/parsers/media/pyzerox/processor/__init__.py b/py/core/parsers/media/pyzerox/processor/__init__.py deleted file mode 100644 index 1124805e8..000000000 --- a/py/core/parsers/media/pyzerox/processor/__init__.py +++ /dev/null @@ -1,14 +0,0 @@ -from .image import encode_image_to_base64, save_image -from .pdf import convert_pdf_to_images, process_page, process_pages_in_batches -from .text import format_markdown -from .utils import download_file - -__all__ = [ - "save_image", - "encode_image_to_base64", - "convert_pdf_to_images", - "format_markdown", - "download_file", - "process_page", - "process_pages_in_batches", -] diff --git a/py/core/parsers/media/pyzerox/processor/image.py b/py/core/parsers/media/pyzerox/processor/image.py deleted file mode 100644 index 8ad973f4f..000000000 --- a/py/core/parsers/media/pyzerox/processor/image.py +++ /dev/null @@ -1,27 +0,0 @@ -import base64 -import io - -import aiofiles - - -async def encode_image_to_base64(image_path: str) -> str: - """Encode an image to base64 asynchronously.""" - async with aiofiles.open(image_path, "rb") as image_file: - image_data = await image_file.read() - return base64.b64encode(image_data).decode("utf-8") - - -async def save_image(image, image_path: str): - """Save an image to a file asynchronously.""" - # Convert PIL Image to BytesIO object - with io.BytesIO() as buffer: - image.save( - buffer, format=image.format - ) # Save the image to the BytesIO object - image_data = ( - buffer.getvalue() - ) # Get the image data from the BytesIO object - - # Write image data to file asynchronously - async with aiofiles.open(image_path, "wb") as f: - await f.write(image_data) diff --git a/py/core/parsers/media/pyzerox/processor/pdf.py b/py/core/parsers/media/pyzerox/processor/pdf.py deleted file mode 100644 index 5bc874382..000000000 --- a/py/core/parsers/media/pyzerox/processor/pdf.py +++ /dev/null @@ -1,115 +0,0 @@ -import asyncio -import logging -import os -from typing import TYPE_CHECKING, List, Optional, Tuple - -from pdf2image import convert_from_path - -from ..constants import Messages, PDFConversionDefaultOptions - -if TYPE_CHECKING: - from ..zerox_models import litellmmodel - -# Package Imports -from .image import save_image -from .text import format_markdown - - -async def convert_pdf_to_images(local_path: str, temp_dir: str) -> List[str]: - """Converts a PDF file to a series of images in the temp_dir. Returns a list of image paths in page order.""" - options = { - "pdf_path": local_path, - "output_folder": temp_dir, - "dpi": PDFConversionDefaultOptions.DPI, - "fmt": PDFConversionDefaultOptions.FORMAT, - "size": PDFConversionDefaultOptions.SIZE, - "thread_count": PDFConversionDefaultOptions.THREAD_COUNT, - "use_pdftocairo": PDFConversionDefaultOptions.USE_PDFTOCAIRO, - "paths_only": True, - } - - try: - image_paths = await asyncio.to_thread(convert_from_path, **options) - return image_paths - except Exception as err: - logging.error(f"Error converting PDF to images: {err}") - - -async def process_page( - image: str, - model: "litellmmodel", - temp_directory: str = "", - input_token_count: int = 0, - output_token_count: int = 0, - prior_page: str = "", - semaphore: Optional[asyncio.Semaphore] = None, -) -> Tuple[str, int, int, str]: - """Process a single page of a PDF""" - - # If semaphore is provided, acquire it before processing the page - if semaphore: - async with semaphore: - return await process_page( - image, - model, - temp_directory, - input_token_count, - output_token_count, - prior_page, - ) - - image_path = os.path.join(temp_directory, image) - - # Get the completion from LiteLLM - try: - completion = await model.completion( - image_path=image_path, - maintain_format=True, - prior_page=prior_page, - ) - - formatted_markdown = format_markdown(completion.content) - input_token_count += completion.input_tokens - output_token_count += completion.output_tokens - prior_page = formatted_markdown - - return ( - formatted_markdown, - input_token_count, - output_token_count, - prior_page, - ) - - except Exception as error: - logging.error(f"{Messages.FAILED_TO_PROCESS_IMAGE} Error:{error}") - return "", input_token_count, output_token_count, "" - - -async def process_pages_in_batches( - images: List[str], - concurrency: int, - model: "litellmmodel", - temp_directory: str = "", - input_token_count: int = 0, - output_token_count: int = 0, - prior_page: str = "", -): - # Create a semaphore to limit the number of concurrent tasks - semaphore = asyncio.Semaphore(concurrency) - - # Process each page in parallel - tasks = [ - process_page( - image, - model, - temp_directory, - input_token_count, - output_token_count, - prior_page, - semaphore, - ) - for image in images - ] - - # Wait for all tasks to complete - return await asyncio.gather(*tasks) diff --git a/py/core/parsers/media/pyzerox/processor/text.py b/py/core/parsers/media/pyzerox/processor/text.py deleted file mode 100644 index 524033e6e..000000000 --- a/py/core/parsers/media/pyzerox/processor/text.py +++ /dev/null @@ -1,14 +0,0 @@ -import re - -# Package imports -from ..constants.patterns import Patterns - - -def format_markdown(text: str) -> str: - """Format markdown text by removing markdown and code blocks""" - - formatted_markdown = re.sub(Patterns.MATCH_MARKDOWN_BLOCKS, r"\1", text) - formatted_markdown = re.sub( - Patterns.MATCH_CODE_BLOCKS, r"\1", formatted_markdown - ) - return formatted_markdown diff --git a/py/core/parsers/media/pyzerox/processor/utils.py b/py/core/parsers/media/pyzerox/processor/utils.py deleted file mode 100644 index da703240d..000000000 --- a/py/core/parsers/media/pyzerox/processor/utils.py +++ /dev/null @@ -1,52 +0,0 @@ -import os -import re -from typing import Iterable, Optional, Union -from urllib.parse import urlparse - -import aiofiles -import aiohttp -from PyPDF2 import PdfReader, PdfWriter - -from ..constants.messages import Messages - -# Package Imports -from ..errors.exceptions import ( - PageNumberOutOfBoundError, - ResourceUnreachableException, -) - - -async def download_file( - file_path: str, - temp_dir: str, -) -> Optional[str]: - """Downloads a file from a URL or local path to a temporary directory.""" - - local_pdf_path = os.path.join(temp_dir, os.path.basename(file_path)) - if is_valid_url(file_path): - async with aiohttp.ClientSession() as session: - async with session.get(file_path) as response: - if response.status != 200: - raise ResourceUnreachableException() - async with aiofiles.open(local_pdf_path, "wb") as f: - await f.write(await response.read()) - else: - async with ( - aiofiles.open(file_path, "rb") as src, - aiofiles.open(local_pdf_path, "wb") as dst, - ): - await dst.write(await src.read()) - return local_pdf_path - - -def is_valid_url(string: str) -> bool: - """Checks if a string is a valid URL.""" - - try: - result = urlparse(string) - return all([result.scheme, result.netloc]) and result.scheme in [ - "http", - "https", - ] - except ValueError: - return False diff --git a/py/core/parsers/media/pyzerox/zerox_core/__init__.py b/py/core/parsers/media/pyzerox/zerox_core/__init__.py deleted file mode 100644 index 825ed3f77..000000000 --- a/py/core/parsers/media/pyzerox/zerox_core/__init__.py +++ /dev/null @@ -1,5 +0,0 @@ -from .zerox import zerox - -__all__ = [ - "zerox", -] diff --git a/py/core/parsers/media/pyzerox/zerox_core/types.py b/py/core/parsers/media/pyzerox/zerox_core/types.py deleted file mode 100644 index 8474a5524..000000000 --- a/py/core/parsers/media/pyzerox/zerox_core/types.py +++ /dev/null @@ -1,42 +0,0 @@ -from dataclasses import dataclass, field -from typing import Any, Dict, Iterable, List, Optional, Union - - -@dataclass -class ZeroxArgs: - """ - Dataclass to store the arguments for the Zerox class. - """ - - file_path: str - cleanup: bool = True - concurrency: int = 10 - maintain_format: bool = False - model: str = ("gpt-4o-mini",) - output_dir: Optional[str] = None - temp_dir: Optional[str] = None - custom_system_prompt: Optional[str] = None - kwargs: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class Page: - """ - Dataclass to store the page content. - """ - - content: str - content_length: int - page: int - - -@dataclass -class ZeroxOutput: - """ - Dataclass to store the output of the Zerox class. - """ - - completion_time: float - input_tokens: int - output_tokens: int - pages: List[Page] diff --git a/py/core/parsers/media/pyzerox/zerox_core/zerox.py b/py/core/parsers/media/pyzerox/zerox_core/zerox.py deleted file mode 100644 index b89a20e54..000000000 --- a/py/core/parsers/media/pyzerox/zerox_core/zerox.py +++ /dev/null @@ -1,151 +0,0 @@ -import asyncio -import os -import tempfile -import warnings -from datetime import datetime -from typing import Iterable, List, Optional, Union - -import aiofiles -import aiofiles.os as async_os -import aioshutil as async_shutil - -from ..constants.messages import Messages -from ..errors import FileUnavailable - -# Package Imports -from ..processor import ( - convert_pdf_to_images, - process_page, - process_pages_in_batches, -) -from ..zerox_models import litellmmodel -from .types import Page, ZeroxOutput - - -async def zerox( - cleanup: bool = True, - concurrency: int = 10, - file_data: Optional[bytes] = None, - maintain_format: bool = False, - model: str = "gpt-4o-mini", - temp_dir: Optional[str] = None, - custom_system_prompt: Optional[str] = None, - **kwargs, -) -> ZeroxOutput: - """ - API to perform OCR to markdown using Vision models. - Please setup the environment variables for the model and model provider before using this API. Refer: https://docs.litellm.ai/docs/providers - - :param cleanup: Whether to cleanup the temporary files after processing, defaults to True - :type cleanup: bool, optional - :param concurrency: The number of concurrent processes to run, defaults to 10 - :type concurrency: int, optional - :param file_path: The path or URL to the PDF file to process. - :type file_path: str, optional - :param maintain_format: Whether to maintain the format from the previous page, defaults to False - :type maintain_format: bool, optional - :param model: The model to use for generating completions, defaults to "gpt-4o-mini". Note - Refer: https://docs.litellm.ai/docs/providers to pass correct model name as according to provider it might be different from actual name. - :type model: str, optional - :param temp_dir: The directory to store temporary files, defaults to some named folder in system's temp directory. If already exists, the contents will be deleted for zerox uses it. - :type temp_dir: str, optional - :param custom_system_prompt: The system prompt to use for the model, this overrides the default system prompt of zerox. Generally it is not required unless you want some specific behaviour. When set, it will raise a friendly warning, defaults to None - :type custom_system_prompt: str, optional - - :param kwargs: Additional keyword arguments to pass to the model.completion -> litellm.completion method. Refer: https://docs.litellm.ai/docs/providers and https://docs.litellm.ai/docs/completion/input - :return: The markdown content generated by the model. - """ - - input_token_count = 0 - output_token_count = 0 - prior_page = "" - aggregated_markdown: List[str] = [] - start_time = datetime.now() - # File Data Validators - if not file_data: - raise FileUnavailable() - - # Create an instance of the litellm model interface - vision_model = litellmmodel(model=model, **kwargs) - - # override the system prompt if a custom prompt is provided - if custom_system_prompt: - vision_model.system_prompt = custom_system_prompt - - if temp_dir: - if os.path.exists(temp_dir): - await async_shutil.rmtree(temp_dir) - await async_os.makedirs(temp_dir, exist_ok=True) - - # Create a temporary directory to store the PDF and images - with tempfile.TemporaryDirectory() as temp_dir_: - - if temp_dir: - ## use the user provided temp directory - temp_directory = temp_dir - else: - ## use the system temp directory - temp_directory = temp_dir_ - - local_path = os.path.join(temp_directory, "input.pdf") - async with aiofiles.open(local_path, "wb") as f: - await f.write(file_data) - - # Convert the file to a series of images, below function returns a list of image paths in page order - images = await convert_pdf_to_images( - local_path=local_path, temp_dir=temp_directory - ) - - if maintain_format: - for image in images: - result, input_token_count, output_token_count, prior_page = ( - await process_page( - image, - vision_model, - temp_directory, - input_token_count, - output_token_count, - prior_page, - ) - ) - - if result: - aggregated_markdown.append(result) - else: - results = await process_pages_in_batches( - images, - concurrency, - vision_model, - temp_directory, - input_token_count, - output_token_count, - prior_page, - ) - - aggregated_markdown = [ - result[0] for result in results if isinstance(result[0], str) - ] - - ## add token usage - input_token_count += sum([result[1] for result in results]) - output_token_count += sum([result[2] for result in results]) - - # Cleanup the downloaded PDF file - if cleanup and os.path.exists(temp_directory): - await async_shutil.rmtree(temp_directory) - - # Format JSON response - end_time = datetime.now() - completion_time = (end_time - start_time).total_seconds() * 1000 - - # Default behavior when no is provided - formatted_pages = [ - Page(content=content, page=i + 1, content_length=len(content)) - for i, content in enumerate(aggregated_markdown) - ] - - return ZeroxOutput( - completion_time=completion_time, - input_tokens=input_token_count, - output_tokens=output_token_count, - pages=formatted_pages, - ) diff --git a/py/core/parsers/media/pyzerox/zerox_models/__init__.py b/py/core/parsers/media/pyzerox/zerox_models/__init__.py deleted file mode 100644 index f19d77392..000000000 --- a/py/core/parsers/media/pyzerox/zerox_models/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -from .modellitellm import litellmmodel -from .types import CompletionResponse - -__all__ = [ - "litellmmodel", - "CompletionResponse", -] diff --git a/py/core/parsers/media/pyzerox/zerox_models/base.py b/py/core/parsers/media/pyzerox/zerox_models/base.py deleted file mode 100644 index 4e85dc344..000000000 --- a/py/core/parsers/media/pyzerox/zerox_models/base.py +++ /dev/null @@ -1,43 +0,0 @@ -from abc import ABC, abstractmethod -from typing import TYPE_CHECKING, Dict, Optional, Type, TypeVar - -if TYPE_CHECKING: - from ..zerox_models import CompletionResponse - -T = TypeVar("T", bound="BaseModel") - - -class BaseModel(ABC): - """ - Base class for all models. - """ - - @abstractmethod - async def completion( - self, - ) -> "CompletionResponse": - raise NotImplementedError("Subclasses must implement this method") - - @abstractmethod - def validate_access( - self, - ) -> None: - raise NotImplementedError("Subclasses must implement this method") - - @abstractmethod - def validate_model( - self, - ) -> None: - raise NotImplementedError("Subclasses must implement this method") - - def __init__( - self, - model: Optional[str] = None, - **kwargs, - ): - self.model = model - self.kwargs = kwargs - - ## validations - # self.validate_model() - # self.validate_access() diff --git a/py/core/parsers/media/pyzerox/zerox_models/modellitellm.py b/py/core/parsers/media/pyzerox/zerox_models/modellitellm.py deleted file mode 100644 index 02c5fe792..000000000 --- a/py/core/parsers/media/pyzerox/zerox_models/modellitellm.py +++ /dev/null @@ -1,169 +0,0 @@ -import os -import warnings -from typing import Any, Dict, List, Optional - -import aiohttp -import litellm - -from ..constants.messages import Messages -from ..constants.prompts import Prompts -from ..errors import ( - MissingEnvironmentVariables, - ModelAccessError, - NotAVisionModel, -) -from ..processor.image import encode_image_to_base64 - -# Package Imports -from .base import BaseModel -from .types import CompletionResponse - -DEFAULT_SYSTEM_PROMPT = Prompts.DEFAULT_SYSTEM_PROMPT - - -class litellmmodel(BaseModel): - ## setting the default system prompt - _system_prompt = DEFAULT_SYSTEM_PROMPT - - def __init__( - self, - model: Optional[str] = None, - **kwargs, - ): - """ - Initializes the Litellm model interface. - :param model: The model to use for generating completions, defaults to "gpt-4o-mini". Refer: https://docs.litellm.ai/docs/providers - :type model: str, optional - - :param kwargs: Additional keyword arguments to pass to self.completion -> litellm.completion. Refer: https://docs.litellm.ai/docs/providers and https://docs.litellm.ai/docs/completion/input - """ - super().__init__(model=model, **kwargs) - - ## calling custom methods to validate the environment and model - self.validate_environment() - self.validate_model() - self.validate_access() - - @property - def system_prompt(self) -> str: - """Returns the system prompt for the model.""" - return self._system_prompt - - @system_prompt.setter - def system_prompt(self, prompt: str) -> None: - """ - Sets/overrides the system prompt for the model. - Will raise a friendly warning to notify the user. - """ - warnings.warn( - f"{Messages.CUSTOM_SYSTEM_PROMPT_WARNING}. Default prompt for zerox is:\n {DEFAULT_SYSTEM_PROMPT}" - ) - self._system_prompt = prompt - - ## custom method on top of BaseModel - def validate_environment(self) -> None: - """Validates the environment variables required for the model.""" - env_config = litellm.validate_environment(model=self.model) - - if not env_config["keys_in_environment"]: - raise MissingEnvironmentVariables(extra_info=env_config) - - def validate_model(self) -> None: - """Validates the model to ensure it is a vision model.""" - if not litellm.supports_vision(model=self.model): - raise NotAVisionModel(extra_info={"model": self.model}) - - def validate_access(self) -> None: - """Validates access to the model -> if environment variables are set correctly with correct values.""" - if not litellm.check_valid_key(model=self.model, api_key=None): - raise ModelAccessError(extra_info={"model": self.model}) - - async def completion( - self, - image_path: str, - maintain_format: bool, - prior_page: str, - ) -> CompletionResponse: - """LitellM completion for image to markdown conversion. - - :param image_path: Path to the image file. - :type image_path: str - :param maintain_format: Whether to maintain the format from the previous page. - :type maintain_format: bool - :param prior_page: The markdown content of the previous page. - :type prior_page: str - - :return: The markdown content generated by the model. - """ - messages = await self._prepare_messages( - image_path=image_path, - maintain_format=maintain_format, - prior_page=prior_page, - ) - - try: - response = await litellm.acompletion( - model=self.model, messages=messages, **self.kwargs - ) - - ## completion response - response = CompletionResponse( - content=response["choices"][0]["message"]["content"], - input_tokens=response["usage"]["prompt_tokens"], - output_tokens=response["usage"]["completion_tokens"], - ) - return response - - except Exception as err: - raise Exception(Messages.COMPLETION_ERROR.format(err)) - - async def _prepare_messages( - self, - image_path: str, - maintain_format: bool, - prior_page: str, - ) -> List[Dict[str, Any]]: - """Prepares the messages to send to the LiteLLM Completion API. - - :param image_path: Path to the image file. - :type image_path: str - :param maintain_format: Whether to maintain the format from the previous page. - :type maintain_format: bool - :param prior_page: The markdown content of the previous page. - :type prior_page: str - """ - # Default system message - messages: List[Dict[str, Any]] = [ - { - "role": "system", - "content": self._system_prompt, - }, - ] - - # If content has already been generated, add it to context. - # This helps maintain the same format across pages. - if maintain_format and prior_page: - messages.append( - { - "role": "system", - "content": f'Markdown must maintain consistent formatting with the following page: \n\n """{prior_page}"""', - }, - ) - - # Add Image to request - base64_image = await encode_image_to_base64(image_path) - messages.append( - { - "role": "user", - "content": [ - { - "type": "image_url", - "image_url": { - "url": f"data:image/png;base64,{base64_image}" - }, - }, - ], - } - ) - - return messages diff --git a/py/core/parsers/media/pyzerox/zerox_models/types.py b/py/core/parsers/media/pyzerox/zerox_models/types.py deleted file mode 100644 index 0eea3e2ee..000000000 --- a/py/core/parsers/media/pyzerox/zerox_models/types.py +++ /dev/null @@ -1,12 +0,0 @@ -from dataclasses import dataclass - - -@dataclass -class CompletionResponse: - """ - A class representing the response of a completion. - """ - - content: str - input_tokens: int - output_tokens: int diff --git a/py/core/parsers/structured/csv_parser.py b/py/core/parsers/structured/csv_parser.py index ab1e55e0d..c8418f5a1 100644 --- a/py/core/parsers/structured/csv_parser.py +++ b/py/core/parsers/structured/csv_parser.py @@ -3,12 +3,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class CSVParser(AsyncParser[DataType]): """A parser for CSV data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + import csv from io import StringIO @@ -29,7 +43,12 @@ async def ingest( class CSVParserAdvanced(AsyncParser[DataType]): """A parser for CSV data.""" - def __init__(self): + def __init__( + self, config: IngestionConfig, llm_provider: CompletionProvider + ): + self.llm_provider = llm_provider + self.config = config + import csv from io import StringIO diff --git a/py/core/parsers/structured/json_parser.py b/py/core/parsers/structured/json_parser.py index aedb2482c..1efe29c78 100644 --- a/py/core/parsers/structured/json_parser.py +++ b/py/core/parsers/structured/json_parser.py @@ -5,11 +5,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class JSONParser(AsyncParser[DataType]): """A parser for JSON data.""" + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + async def ingest( self, data: DataType, *args, **kwargs ) -> AsyncGenerator[str, None]: diff --git a/py/core/parsers/structured/xlsx_parser.py b/py/core/parsers/structured/xlsx_parser.py index 5237439ea..e06a22d73 100644 --- a/py/core/parsers/structured/xlsx_parser.py +++ b/py/core/parsers/structured/xlsx_parser.py @@ -4,12 +4,25 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class XLSXParser(AsyncParser[DataType]): """A parser for XLSX data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config try: from openpyxl import load_workbook @@ -36,7 +49,11 @@ class XLSXParserAdvanced(AsyncParser[DataType]): """A parser for XLSX data.""" # identifies connected components in the excel graph and extracts data from each component - def __init__(self): + def __init__( + self, config: IngestionConfig, llm_provider: CompletionProvider + ): + self.llm_provider = llm_provider + self.config = config try: import networkx as nx import numpy as np diff --git a/py/core/parsers/text/html_parser.py b/py/core/parsers/text/html_parser.py index c2e893120..6f3e146c6 100644 --- a/py/core/parsers/text/html_parser.py +++ b/py/core/parsers/text/html_parser.py @@ -5,11 +5,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class HTMLParser(AsyncParser[DataType]): """A parser for HTML data.""" + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + async def ingest( self, data: DataType, *args, **kwargs ) -> AsyncGenerator[str, None]: diff --git a/py/core/parsers/text/md_parser.py b/py/core/parsers/text/md_parser.py index 725ae5724..2a181fbf9 100644 --- a/py/core/parsers/text/md_parser.py +++ b/py/core/parsers/text/md_parser.py @@ -5,12 +5,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class MDParser(AsyncParser[DataType]): """A parser for Markdown data.""" - def __init__(self): + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + import markdown self.markdown = markdown diff --git a/py/core/parsers/text/text_parser.py b/py/core/parsers/text/text_parser.py index da72ea85f..791f0783c 100644 --- a/py/core/parsers/text/text_parser.py +++ b/py/core/parsers/text/text_parser.py @@ -3,11 +3,26 @@ from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser +from core.base.providers import ( + CompletionProvider, + DatabaseProvider, + IngestionConfig, +) class TextParser(AsyncParser[DataType]): """A parser for raw text data.""" + def __init__( + self, + config: IngestionConfig, + database_provider: DatabaseProvider, + llm_provider: CompletionProvider, + ): + self.database_provider = database_provider + self.llm_provider = llm_provider + self.config = config + async def ingest( self, data: DataType, *args, **kwargs ) -> AsyncGenerator[DataType, None]: diff --git a/py/core/pipelines/search_pipeline.py b/py/core/pipelines/search_pipeline.py index 541c58025..7c120c3d2 100644 --- a/py/core/pipelines/search_pipeline.py +++ b/py/core/pipelines/search_pipeline.py @@ -103,9 +103,9 @@ async def enqueue_requests(): await enqueue_task vector_search_results = ( - await vector_search_task if use_vector_search else None + await vector_search_task if use_vector_search else [] ) - kg_results = await kg_task if do_kg else None + kg_results = await kg_task if do_kg else [] return AggregateSearchResult( vector_search_results=vector_search_results, diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index 955fc125e..0afb3050d 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -192,9 +192,13 @@ async def process_community( break except Exception as e: if attempt == 2: - raise ValueError( - f"Failed to generate a summary for community {community_number} at level {community_level}." - ) from e + logger.error( + f"KGCommunitySummaryPipe: Error generating community summary for community {community_number}: {e}" + ) + return { + "community_number": community_number, + "error": str(e), + } community_report = CommunityReport( community_number=community_number, @@ -267,11 +271,28 @@ async def _run_logic( # type: ignore ) ) + total_jobs = len(community_summary_jobs) + total_errors = 0 completed_community_summary_jobs = 0 for community_summary in asyncio.as_completed(community_summary_jobs): + + summary = await community_summary completed_community_summary_jobs += 1 if completed_community_summary_jobs % 50 == 0: logger.info( - f"KGCommunitySummaryPipe: {completed_community_summary_jobs}/{len(community_summary_jobs)} community summaries completed, elapsed time: {time.time() - start_time:.2f} seconds" + f"KGCommunitySummaryPipe: {completed_community_summary_jobs}/{total_jobs} community summaries completed, elapsed time: {time.time() - start_time:.2f} seconds" + ) + + if "error" in summary: + logger.error( + f"KGCommunitySummaryPipe: Error generating community summary for community {summary['community_number']}: {summary['error']}" ) - yield await community_summary + total_errors += 1 + continue + + yield summary + + if total_errors > 0: + raise ValueError( + f"KGCommunitySummaryPipe: Failed to generate community summaries for {total_errors} out of {total_jobs} communities. Please rerun the job if there are too many failures." + ) diff --git a/py/core/pipes/kg/deduplication.py b/py/core/pipes/kg/deduplication.py index d8eecd0d1..ff956fb01 100644 --- a/py/core/pipes/kg/deduplication.py +++ b/py/core/pipes/kg/deduplication.py @@ -8,6 +8,7 @@ from core.providers import ( LiteLLMCompletionProvider, LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, OpenAICompletionProvider, OpenAIEmbeddingProvider, PostgresDBProvider, @@ -26,7 +27,9 @@ def __init__( OpenAICompletionProvider, LiteLLMCompletionProvider ], embedding_provider: Union[ - LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider + LiteLLMEmbeddingProvider, + OpenAIEmbeddingProvider, + OllamaEmbeddingProvider, ], logging_provider: SqlitePersistentLoggingProvider, **kwargs, diff --git a/py/core/pipes/kg/deduplication_summary.py b/py/core/pipes/kg/deduplication_summary.py index fad7893f3..0e60c211c 100644 --- a/py/core/pipes/kg/deduplication_summary.py +++ b/py/core/pipes/kg/deduplication_summary.py @@ -9,6 +9,7 @@ from core.providers import ( LiteLLMCompletionProvider, LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, OpenAICompletionProvider, OpenAIEmbeddingProvider, PostgresDBProvider, @@ -30,7 +31,9 @@ def __init__( LiteLLMCompletionProvider, OpenAICompletionProvider ], embedding_provider: Union[ - LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider + LiteLLMEmbeddingProvider, + OpenAIEmbeddingProvider, + OllamaEmbeddingProvider, ], config: AsyncPipe.PipeConfig, logging_provider: SqlitePersistentLoggingProvider, diff --git a/py/core/pipes/kg/entity_description.py b/py/core/pipes/kg/entity_description.py index f7b9749e8..f480625a6 100644 --- a/py/core/pipes/kg/entity_description.py +++ b/py/core/pipes/kg/entity_description.py @@ -120,7 +120,10 @@ async def process_entity( .message.content ) - # will do more requests, but it is simpler + if not out_entity.description: + logger.error(f"No description for entity {out_entity.name}") + return out_entity.name + out_entity.description_embedding = ( await self.embedding_provider.async_get_embeddings( [out_entity.description] diff --git a/py/core/providers/__init__.py b/py/core/providers/__init__.py index 04520e415..a970f83eb 100644 --- a/py/core/providers/__init__.py +++ b/py/core/providers/__init__.py @@ -1,7 +1,12 @@ from .auth import R2RAuthProvider, SupabaseAuthProvider from .crypto import BCryptConfig, BCryptProvider from .database import PostgresDBProvider -from .embeddings import LiteLLMEmbeddingProvider, OpenAIEmbeddingProvider +from .email import AsyncSMTPEmailProvider, ConsoleMockEmailProvider +from .embeddings import ( + LiteLLMEmbeddingProvider, + OllamaEmbeddingProvider, + OpenAIEmbeddingProvider, +) from .ingestion import ( # type: ignore R2RIngestionConfig, R2RIngestionProvider, @@ -31,7 +36,11 @@ "PostgresDBProvider", # Embeddings "LiteLLMEmbeddingProvider", + "OllamaEmbeddingProvider", "OpenAIEmbeddingProvider", + # Email + "AsyncSMTPEmailProvider", + "ConsoleMockEmailProvider", # Orchestration "HatchetOrchestrationProvider", "SimpleOrchestrationProvider", diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index f1fb9f88f..3c2fb5642 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -12,6 +12,7 @@ AuthProvider, CryptoProvider, DatabaseProvider, + EmailProvider, R2RException, Token, TokenData, @@ -33,11 +34,12 @@ def __init__( config: AuthConfig, crypto_provider: CryptoProvider, database_provider: DatabaseProvider, + email_provider: EmailProvider, ): - super().__init__(config, crypto_provider) + super().__init__( + config, crypto_provider, database_provider, email_provider + ) logger.debug(f"Initializing R2RAuthProvider with config: {config}") - self.crypto_provider = crypto_provider - self.database_provider = database_provider self.secret_key = ( config.secret_key or os.getenv("R2R_SECRET_KEY") or DEFAULT_R2R_SK ) @@ -157,7 +159,10 @@ async def register(self, email: str, password: str) -> UserResponse: ) new_user.verification_code_expiry = expiry # TODO - Integrate email provider(s) - # self.providers.email.send_verification_email(new_user.email, verification_code) + + await self.email_provider.send_verification_email( + new_user.email, verification_code + ) else: expiry = datetime.now(timezone.utc) + timedelta(hours=366 * 10) @@ -301,7 +306,7 @@ async def request_password_reset(self, email: str) -> Dict[str, str]: ) # TODO: Integrate with email provider to send reset link - # self.email_provider.send_reset_email(email, reset_token) + await self.email_provider.send_password_reset_email(email, reset_token) return {"message": "If the email exists, a reset link has been sent"} diff --git a/py/core/providers/auth/supabase.py b/py/core/providers/auth/supabase.py index d959f4a33..c5cfbba71 100644 --- a/py/core/providers/auth/supabase.py +++ b/py/core/providers/auth/supabase.py @@ -10,6 +10,7 @@ AuthProvider, CryptoProvider, DatabaseProvider, + EmailProvider, R2RException, Token, TokenData, @@ -29,8 +30,11 @@ def __init__( config: AuthConfig, crypto_provider: CryptoProvider, database_provider: DatabaseProvider, + email_provider: EmailProvider, ): - super().__init__(config, crypto_provider) + super().__init__( + config, crypto_provider, database_provider, email_provider + ) self.supabase_url = config.extra_fields.get( "supabase_url", None ) or os.getenv("SUPABASE_URL") diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 5db7c0fef..655473192 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -152,6 +152,7 @@ def __init__( self.project_name, self.connection_manager, self.dimension, + self.quantization_type, self.enable_fts, ) self.kg_handler = PostgresKGHandler( diff --git a/py/core/providers/database/prompts/vision_img.yaml b/py/core/providers/database/prompts/vision_img.yaml new file mode 100644 index 000000000..4a1aa4777 --- /dev/null +++ b/py/core/providers/database/prompts/vision_img.yaml @@ -0,0 +1,4 @@ +vision_img: + template: > + First, provide a title for the image, then explain everything that you see. Be very thorough in your analysis as a user will need to understand the image without seeing it. If it is possible to transcribe the image to text directly, then do so. The more detail you provide, the better the user will understand the image. + input_types: {} diff --git a/py/core/providers/database/prompts/vision_pdf.yaml b/py/core/providers/database/prompts/vision_pdf.yaml new file mode 100644 index 000000000..350ead2d9 --- /dev/null +++ b/py/core/providers/database/prompts/vision_pdf.yaml @@ -0,0 +1,42 @@ +vision_pdf: + template: > + Convert this PDF page to markdown format, preserving all content and formatting. Follow these guidelines: + + Text: + - Maintain the original text hierarchy (headings, paragraphs, lists) + - Preserve any special formatting (bold, italic, underline) + - Include all footnotes, citations, and references + - Keep text in its original reading order + + Tables: + - Recreate tables using markdown table syntax + - Preserve all headers, rows, and columns + - Maintain alignment and formatting where possible + - Include any table captions or notes + + Equations: + - Convert mathematical equations using LaTeX notation + - Preserve equation numbers if present + - Include any surrounding context or references + + Images: + - Enclose image descriptions within [FIG] and [/FIG] tags + - Include detailed descriptions of: + * Main subject matter + * Text overlays or captions + * Charts, graphs, or diagrams + * Relevant colors, patterns, or visual elements + - Maintain image placement relative to surrounding text + + Additional Elements: + - Include page numbers if visible + - Preserve headers and footers + - Maintain sidebars or callout boxes + - Keep any special symbols or characters + + Quality Requirements: + - Ensure 100% content preservation + - Maintain logical document flow + - Verify all markdown syntax is valid + - Double-check completeness before submitting + input_types: {} diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index 6fca3bee2..90ca5377b 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -6,6 +6,8 @@ from typing import Any, Optional, Tuple, TypedDict, Union from uuid import UUID +import numpy as np + from core.base import ( IndexArgsHNSW, IndexArgsIVFFlat, @@ -33,6 +35,33 @@ def index_measure_to_ops( return _decorate_vector_type(measure.ops, quantization_type) +def quantize_vector_to_binary( + vector: Union[list[float], np.ndarray], threshold: float = 0.0 +) -> bytes: + """ + Quantizes a float vector to a binary vector string for PostgreSQL bit type. + Used when quantization_type is INT1. + + Args: + vector (Union[List[float], np.ndarray]): Input vector of floats + threshold (float, optional): Threshold for binarization. Defaults to 0.0. + + Returns: + str: Binary string representation for PostgreSQL bit type + """ + # Convert input to numpy array if it isn't already + if not isinstance(vector, np.ndarray): + vector = np.array(vector) + + # Convert to binary (1 where value > threshold, 0 otherwise) + binary_vector = (vector > threshold).astype(int) + + # Convert to string of 1s and 0s + # Convert to string of 1s and 0s, then to bytes + binary_string = "".join(map(str, binary_vector)) + return binary_string.encode("ascii") + + class HybridSearchIntermediateResult(TypedDict): semantic_rank: int full_text_rank: int @@ -55,10 +84,12 @@ def __init__( project_name: str, connection_manager: PostgresConnectionManager, dimension: int, + quantization_type: VectorQuantizationType, enable_fts: bool = False, ): super().__init__(project_name, connection_manager) self.dimension = dimension + self.quantization_type = quantization_type self.enable_fts = enable_fts async def create_tables(self): @@ -82,8 +113,12 @@ async def create_tables(self): "your database schema to the new version." ) - # TODO - Move ids to `UUID` type - # Create the vector table if it doesn't exist + binary_col = ( + "" + if self.quantization_type != VectorQuantizationType.INT1 + else f"vec_binary bit({self.dimension})," + ) + query = f""" CREATE TABLE IF NOT EXISTS {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} ( extraction_id UUID PRIMARY KEY, @@ -91,6 +126,7 @@ async def create_tables(self): user_id UUID, collection_ids UUID[], vec vector({self.dimension}), + {binary_col} text TEXT, metadata JSONB {",fts tsvector GENERATED ALWAYS AS (to_tsvector('english', text)) STORED" if self.enable_fts else ""} @@ -108,57 +144,134 @@ async def create_tables(self): await self.connection_manager.execute_query(query) async def upsert(self, entry: VectorEntry) -> None: - query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) - VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE SET - document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, - collection_ids = EXCLUDED.collection_ids, - vec = EXCLUDED.vec, - text = EXCLUDED.text, - metadata = EXCLUDED.metadata; """ - await self.connection_manager.execute_query( - query, - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - str(entry.vector.data), - entry.text, - json.dumps(entry.metadata), - ), - ) + Upsert function that handles vector quantization only when quantization_type is INT1. + Matches the table schema where vec_binary column only exists for INT1 quantization. + """ + # Check the quantization type to determine which columns to use + if self.quantization_type == VectorQuantizationType.INT1: + # For quantized vectors, use vec_binary column + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, vec_binary, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6::bit({self.dimension}), $7, $8) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + vec_binary = EXCLUDED.vec_binary, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + await self.connection_manager.execute_query( + query, + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + quantize_vector_to_binary( + entry.vector.data + ), # Convert to binary + entry.text, + json.dumps(entry.metadata), + ), + ) + else: + # For regular vectors, use vec column only + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + + await self.connection_manager.execute_query( + query, + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + entry.text, + json.dumps(entry.metadata), + ), + ) async def upsert_entries(self, entries: list[VectorEntry]) -> None: - query = f""" - INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} - (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) - VALUES ($1, $2, $3, $4, $5, $6, $7) - ON CONFLICT (extraction_id) DO UPDATE SET - document_id = EXCLUDED.document_id, - user_id = EXCLUDED.user_id, - collection_ids = EXCLUDED.collection_ids, - vec = EXCLUDED.vec, - text = EXCLUDED.text, - metadata = EXCLUDED.metadata; """ - params = [ - ( - entry.extraction_id, - entry.document_id, - entry.user_id, - entry.collection_ids, - str(entry.vector.data), - entry.text, - json.dumps(entry.metadata), - ) - for entry in entries - ] - await self.connection_manager.execute_many(query, params) + Batch upsert function that handles vector quantization only when quantization_type is INT1. + Matches the table schema where vec_binary column only exists for INT1 quantization. + """ + if self.quantization_type == VectorQuantizationType.INT1: + # For quantized vectors, use vec_binary column + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, vec_binary, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6::bit({self.dimension}), $7, $8) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + vec_binary = EXCLUDED.vec_binary, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + bin_params = [ + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + quantize_vector_to_binary( + entry.vector.data + ), # Convert to binary + entry.text, + json.dumps(entry.metadata), + ) + for entry in entries + ] + await self.connection_manager.execute_many(query, bin_params) + + else: + # For regular vectors, use vec column only + query = f""" + INSERT INTO {self._get_table_name(PostgresVectorHandler.TABLE_NAME)} + (extraction_id, document_id, user_id, collection_ids, vec, text, metadata) + VALUES ($1, $2, $3, $4, $5, $6, $7) + ON CONFLICT (extraction_id) DO UPDATE SET + document_id = EXCLUDED.document_id, + user_id = EXCLUDED.user_id, + collection_ids = EXCLUDED.collection_ids, + vec = EXCLUDED.vec, + text = EXCLUDED.text, + metadata = EXCLUDED.metadata; + """ + params = [ + ( + entry.extraction_id, + entry.document_id, + entry.user_id, + entry.collection_ids, + str(entry.vector.data), + entry.text, + json.dumps(entry.metadata), + ) + for entry in entries + ] + + await self.connection_manager.execute_many(query, params) async def semantic_search( self, query_vector: list[float], search_settings: VectorSearchSettings @@ -177,33 +290,110 @@ async def semantic_search( f"{table_name}.text", ] - # Use cosine distance calculation - distance_calc = f"{table_name}.vec <=> $1::vector" + params: list[Union[str, int, bytes]] = [] + # For binary vectors (INT1), implement two-stage search + if self.quantization_type == VectorQuantizationType.INT1: + # Convert query vector to binary format + binary_query = quantize_vector_to_binary(query_vector) + # TODO - Put depth multiplier in config / settings + extended_limit = ( + search_settings.search_limit * 20 + ) # Get 20x candidates for re-ranking + if ( + imeasure_obj == IndexMeasure.hamming_distance + or imeasure_obj == IndexMeasure.jaccard_distance + ): + binary_search_measure_repr = imeasure_obj.pgvector_repr + else: + binary_search_measure_repr = ( + IndexMeasure.hamming_distance.pgvector_repr + ) - if search_settings.include_values: - cols.append(f"({distance_calc}) AS distance") + # Use binary column and binary-specific distance measures for first stage + stage1_distance = f"{table_name}.vec_binary {binary_search_measure_repr} $1::bit({self.dimension})" + stage1_param = binary_query - if search_settings.include_metadatas: - cols.append(f"{table_name}.metadata") + cols.append( + f"{table_name}.vec" + ) # Need original vector for re-ranking + if search_settings.include_metadatas: + cols.append(f"{table_name}.metadata") - select_clause = ", ".join(cols) + select_clause = ", ".join(cols) + where_clause = "" + params.append(stage1_param) - where_clause = "" - params: list[Union[str, int]] = [str(query_vector)] - if search_settings.filters: - where_clause = self._build_filters(search_settings.filters, params) - where_clause = f"WHERE {where_clause}" + if search_settings.filters: + where_clause = self._build_filters( + search_settings.filters, params + ) + where_clause = f"WHERE {where_clause}" + + # First stage: Get candidates using binary search + query = f""" + WITH candidates AS ( + SELECT {select_clause}, + ({stage1_distance}) as binary_distance + FROM {table_name} + {where_clause} + ORDER BY {stage1_distance} + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} + ) + -- Second stage: Re-rank using original vectors + SELECT + extraction_id, + document_id, + user_id, + collection_ids, + text, + {"metadata," if search_settings.include_metadatas else ""} + (vec <=> ${len(params) + 4}::vector({self.dimension})) as distance + FROM candidates + ORDER BY distance + LIMIT ${len(params) + 3} + """ - query = f""" - SELECT {select_clause} - FROM {table_name} - {where_clause} - ORDER BY {distance_calc} - LIMIT ${len(params) + 1} - OFFSET ${len(params) + 2} - """ + params.extend( + [ + extended_limit, # First stage limit + search_settings.offset, + search_settings.search_limit, # Final limit + str(query_vector), # For re-ranking + ] + ) + + else: + # Standard float vector handling - unchanged from original + distance_calc = f"{table_name}.vec {search_settings.index_measure.pgvector_repr} $1::vector({self.dimension})" + query_param = str(query_vector) + + if search_settings.include_values: + cols.append(f"({distance_calc}) AS distance") + if search_settings.include_metadatas: + cols.append(f"{table_name}.metadata") + + select_clause = ", ".join(cols) + where_clause = "" + params.append(query_param) + + if search_settings.filters: + where_clause = self._build_filters( + search_settings.filters, params + ) + where_clause = f"WHERE {where_clause}" - params.extend([search_settings.search_limit, search_settings.offset]) + query = f""" + SELECT {select_clause} + FROM {table_name} + {where_clause} + ORDER BY {distance_calc} + LIMIT ${len(params) + 1} + OFFSET ${len(params) + 2} + """ + params.extend( + [search_settings.search_limit, search_settings.offset] + ) results = await self.connection_manager.fetch_query(query, params) @@ -216,7 +406,7 @@ async def semantic_search( text=result["text"], score=( (1 - float(result["distance"])) - if search_settings.include_values + if "distance" in result else -1 ), metadata=( @@ -237,7 +427,7 @@ async def full_text_search( ) where_clauses = [] - params: list[Union[str, int]] = [query_text] + params: list[Union[str, int, bytes]] = [query_text] if search_settings.filters: filters_clause = self._build_filters( @@ -400,7 +590,7 @@ async def hybrid_search( async def delete( self, filters: dict[str, Any] ) -> dict[str, dict[str, str]]: - params: list[Union[str, int]] = [] + params: list[Union[str, int, bytes]] = [] where_clause = self._build_filters(filters, params) query = f""" @@ -538,6 +728,7 @@ async def create_index( Union[IndexArgsIVFFlat, IndexArgsHNSW] ] = None, index_name: Optional[str] = None, + index_column: Optional[str] = None, concurrently: bool = True, ) -> None: """ @@ -574,7 +765,17 @@ async def create_index( if table_name == VectorTableName.VECTORS: table_name_str = f"{self.project_name}.{VectorTableName.VECTORS}" # TODO - Fix bug in vector table naming convention - col_name = "vec" + if index_column: + col_name = index_column + else: + col_name = ( + "vec" + if ( + index_measure != IndexMeasure.hamming_distance + and index_measure != IndexMeasure.jaccard_distance + ) + else "vec_binary" + ) elif table_name == VectorTableName.ENTITIES_DOCUMENT: table_name_str = ( f"{self.project_name}.{VectorTableName.ENTITIES_DOCUMENT}" @@ -592,6 +793,7 @@ async def create_index( col_name = "embedding" else: raise ArgError("invalid table name") + if index_method not in ( IndexMethod.ivfflat, IndexMethod.hnsw, @@ -634,7 +836,7 @@ async def create_index( index_name = ( index_name - or f"ix_{ops}_{index_method}__{time.strftime('%Y%m%d%H%M%S')}" + or f"ix_{ops}_{index_method}__{col_name}_{time.strftime('%Y%m%d%H%M%S')}" ) create_index_sql = f""" @@ -661,7 +863,7 @@ async def create_index( return None def _build_filters( - self, filters: dict, parameters: list[Union[str, int]] + self, filters: dict, parameters: list[Union[str, int, bytes]] ) -> str: def parse_condition(key: str, value: Any) -> str: # type: ignore @@ -1003,28 +1205,3 @@ def _get_index_options( return "WITH (m=16, ef_construction=64)" else: return "" # No options for other methods - - def _get_index_type(self, method: IndexMethod) -> str: - if method == IndexMethod.ivfflat: - return "ivfflat" - elif method == IndexMethod.hnsw: - return "hnsw" - elif method == IndexMethod.auto: - # Here you might want to implement logic to choose between ivfflat and hnsw - return "hnsw" - - def _get_index_operator(self, measure: IndexMeasure) -> str: - if measure == IndexMeasure.l2_distance: - return "vector_l2_ops" - elif measure == IndexMeasure.max_inner_product: - return "vector_ip_ops" - elif measure == IndexMeasure.cosine_distance: - return "vector_cosine_ops" - - def _get_distance_function(self, imeasure_obj: IndexMeasure) -> str: - if imeasure_obj == IndexMeasure.cosine_distance: - return "<=>" - elif imeasure_obj == IndexMeasure.l2_distance: - return "l2_distance" - elif imeasure_obj == IndexMeasure.max_inner_product: - return "max_inner_product" diff --git a/py/core/providers/email/__init__.py b/py/core/providers/email/__init__.py new file mode 100644 index 000000000..d70f65330 --- /dev/null +++ b/py/core/providers/email/__init__.py @@ -0,0 +1,4 @@ +from .console_mock import ConsoleMockEmailProvider +from .smtp import AsyncSMTPEmailProvider + +__all__ = ["ConsoleMockEmailProvider", "AsyncSMTPEmailProvider"] diff --git a/py/core/providers/email/console_mock.py b/py/core/providers/email/console_mock.py new file mode 100644 index 000000000..3bab24723 --- /dev/null +++ b/py/core/providers/email/console_mock.py @@ -0,0 +1,56 @@ +import logging +from typing import Optional + +from core.base import EmailProvider + +logger = logging.getLogger() + + +class ConsoleMockEmailProvider(EmailProvider): + """A simple email provider that logs emails to console, useful for testing""" + + async def send_email( + self, + to_email: str, + subject: str, + body: str, + html_body: Optional[str] = None, + ) -> None: + logger.info( + f""" + -------- Email Message -------- + To: {to_email} + Subject: {subject} + Body: + {body} + ----------------------------- + """ + ) + + async def send_verification_email( + self, to_email: str, verification_code: str + ) -> None: + logger.info( + f""" + -------- Email Message -------- + To: {to_email} + Subject: Please verify your email address + Body: + Verification code: {verification_code} + ----------------------------- + """ + ) + + async def send_password_reset_email( + self, to_email: str, reset_token: str + ) -> None: + logger.info( + f""" + -------- Email Message -------- + To: {to_email} + Subject: Password Reset Request + Body: + Reset token: {reset_token} + ----------------------------- + """ + ) diff --git a/py/core/providers/email/smtp.py b/py/core/providers/email/smtp.py new file mode 100644 index 000000000..c45f5ff61 --- /dev/null +++ b/py/core/providers/email/smtp.py @@ -0,0 +1,150 @@ +import asyncio +import logging +import os +import smtplib +import ssl +from email.mime.multipart import MIMEMultipart +from email.mime.text import MIMEText +from typing import Optional + +from core.base import EmailConfig, EmailProvider + +logger = logging.getLogger(__name__) + + +class AsyncSMTPEmailProvider(EmailProvider): + """Email provider implementation using Brevo SMTP relay""" + + def __init__(self, config: EmailConfig): + super().__init__(config) + self.smtp_server = config.smtp_server or os.getenv("R2R_SMTP_SERVER") + if not self.smtp_server: + raise ValueError("SMTP server is required") + + self.smtp_port = config.smtp_port or os.getenv("R2R_SMTP_PORT") + if not self.smtp_port: + raise ValueError("SMTP port is required") + + self.smtp_username = config.smtp_username or os.getenv( + "R2R_SMTP_USERNAME" + ) + if not self.smtp_username: + raise ValueError("SMTP username is required") + + self.smtp_password = config.smtp_password or os.getenv( + "R2R_SMTP_PASSWORD" + ) + if not self.smtp_password: + raise ValueError("SMTP password is required") + + self.from_email: Optional[str] = ( + config.from_email + or os.getenv("R2R_FROM_EMAIL") + or self.smtp_username + ) + self.ssl_context = ssl.create_default_context() + + async def _send_email_sync(self, msg: MIMEMultipart) -> None: + """Synchronous email sending wrapped in asyncio executor""" + loop = asyncio.get_running_loop() + + def _send(): + with smtplib.SMTP_SSL( + self.smtp_server, + self.smtp_port, + context=self.ssl_context, + timeout=30, + ) as server: + logger.info("Connected to SMTP server") + server.login(self.smtp_username, self.smtp_password) + logger.info("Login successful") + server.send_message(msg) + logger.info("Message sent successfully!") + + try: + await loop.run_in_executor(None, _send) + except Exception as e: + error_msg = f"Failed to send email: {str(e)}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e + + async def send_email( + self, + to_email: str, + subject: str, + body: str, + html_body: Optional[str] = None, + ) -> None: + msg = MIMEMultipart("alternative") + msg["Subject"] = subject + msg["From"] = self.from_email # type: ignore + msg["To"] = to_email + + msg.attach(MIMEText(body, "plain")) + if html_body: + msg.attach(MIMEText(html_body, "html")) + + try: + logger.info("Initializing SMTP connection...") + async with asyncio.timeout(30): # Overall timeout + await self._send_email_sync(msg) + except asyncio.TimeoutError: + error_msg = "Operation timed out while trying to send email" + logger.error(error_msg) + raise RuntimeError(error_msg) + except Exception as e: + error_msg = f"Failed to send email: {str(e)}" + logger.error(error_msg) + raise RuntimeError(error_msg) from e + + async def send_verification_email( + self, to_email: str, verification_code: str + ) -> None: + body = f""" + Please verify your email address by entering the following code: + + Verification code: {verification_code} + + If you did not request this verification, please ignore this email. + """ + + html_body = f""" +

Please verify your email address by entering the following code:

+

+ Verification code: {verification_code} +

+

If you did not request this verification, please ignore this email.

+ """ + + await self.send_email( + to_email=to_email, + subject="Please verify your email address", + body=body, + html_body=html_body, + ) + + async def send_password_reset_email( + self, to_email: str, reset_token: str + ) -> None: + body = f""" + You have requested to reset your password. + + Reset token: {reset_token} + + If you did not request a password reset, please ignore this email. + """ + + html_body = f""" +

You have requested to reset your password.

+

+ Reset token: {reset_token} +

+

If you did not request a password reset, please ignore this email.

+ """ + + await self.send_email( + to_email=to_email, + subject="Password Reset Request", + body=body, + html_body=html_body, + ) diff --git a/py/core/providers/embeddings/__init__.py b/py/core/providers/embeddings/__init__.py index ccff0f76f..3fa674425 100644 --- a/py/core/providers/embeddings/__init__.py +++ b/py/core/providers/embeddings/__init__.py @@ -1,7 +1,9 @@ from .litellm import LiteLLMEmbeddingProvider +from .ollama import OllamaEmbeddingProvider from .openai import OpenAIEmbeddingProvider __all__ = [ "LiteLLMEmbeddingProvider", "OpenAIEmbeddingProvider", + "OllamaEmbeddingProvider", ] diff --git a/py/core/providers/embeddings/ollama.py b/py/core/providers/embeddings/ollama.py new file mode 100644 index 000000000..e838b8842 --- /dev/null +++ b/py/core/providers/embeddings/ollama.py @@ -0,0 +1,185 @@ +import logging +import os +from typing import Any, List + +from ollama import AsyncClient, Client + +from core.base import ( + EmbeddingConfig, + EmbeddingProvider, + EmbeddingPurpose, + R2RException, + VectorSearchResult, +) + +logger = logging.getLogger() + + +class OllamaEmbeddingProvider(EmbeddingProvider): + def __init__(self, config: EmbeddingConfig): + super().__init__(config) + provider = config.provider + if not provider: + raise ValueError( + "Must set provider in order to initialize `OllamaEmbeddingProvider`." + ) + if provider != "ollama": + raise ValueError( + "OllamaEmbeddingProvider must be initialized with provider `ollama`." + ) + if config.rerank_model: + raise ValueError( + "OllamaEmbeddingProvider does not support separate reranking." + ) + + self.base_model = config.base_model + self.base_dimension = config.base_dimension + self.base_url = os.getenv("OLLAMA_API_BASE") + logger.info( + f"Using Ollama API base URL: {self.base_url or 'http://127.0.0.1:11434'}" + ) + self.client = Client(host=self.base_url) + self.aclient = AsyncClient(host=self.base_url) + + self.set_prefixes(config.prefixes or {}, self.base_model) + self.batch_size = config.batch_size or 32 + + def _get_embedding_kwargs(self, **kwargs): + embedding_kwargs = { + "model": self.base_model, + } + embedding_kwargs.update(kwargs) + return embedding_kwargs + + async def _execute_task(self, task: dict[str, Any]) -> List[List[float]]: + texts = task["texts"] + purpose = task.get("purpose", EmbeddingPurpose.INDEX) + kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) + + try: + embeddings = [] + for i in range(0, len(texts), self.batch_size): + batch = texts[i : i + self.batch_size] + prefixed_batch = [ + self.prefixes.get(purpose, "") + text for text in batch + ] + response = await self.aclient.embed( + input=prefixed_batch, **kwargs + ) + embeddings.extend(response["embeddings"]) + return embeddings + except Exception as e: + error_msg = f"Error getting embeddings: {str(e)}" + logger.error(error_msg) + raise R2RException(error_msg, 400) + + def _execute_task_sync(self, task: dict[str, Any]) -> List[List[float]]: + texts = task["texts"] + purpose = task.get("purpose", EmbeddingPurpose.INDEX) + kwargs = self._get_embedding_kwargs(**task.get("kwargs", {})) + + try: + embeddings = [] + for i in range(0, len(texts), self.batch_size): + batch = texts[i : i + self.batch_size] + prefixed_batch = [ + self.prefixes.get(purpose, "") + text for text in batch + ] + response = self.client.embed(input=prefixed_batch, **kwargs) + embeddings.extend(response["embeddings"]) + return embeddings + except Exception as e: + error_msg = f"Error getting embeddings: {str(e)}" + logger.error(error_msg) + raise R2RException(error_msg, 400) + + async def async_get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, + **kwargs, + ) -> List[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + task = { + "texts": [text], + "stage": stage, + "purpose": purpose, + "kwargs": kwargs, + } + result = await self._execute_with_backoff_async(task) + return result[0] + + def get_embedding( + self, + text: str, + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, + **kwargs, + ) -> List[float]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + task = { + "texts": [text], + "stage": stage, + "purpose": purpose, + "kwargs": kwargs, + } + result = self._execute_with_backoff_sync(task) + return result[0] + + async def async_get_embeddings( + self, + texts: List[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, + **kwargs, + ) -> List[List[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + task = { + "texts": texts, + "stage": stage, + "purpose": purpose, + "kwargs": kwargs, + } + return await self._execute_with_backoff_async(task) + + def get_embeddings( + self, + texts: List[str], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.BASE, + purpose: EmbeddingPurpose = EmbeddingPurpose.INDEX, + **kwargs, + ) -> List[List[float]]: + if stage != EmbeddingProvider.PipeStage.BASE: + raise ValueError( + "OllamaEmbeddingProvider only supports search stage." + ) + + task = { + "texts": texts, + "stage": stage, + "purpose": purpose, + "kwargs": kwargs, + } + return self._execute_with_backoff_sync(task) + + def rerank( + self, + query: str, + results: list[VectorSearchResult], + stage: EmbeddingProvider.PipeStage = EmbeddingProvider.PipeStage.RERANK, + limit: int = 10, + ) -> list[VectorSearchResult]: + return results[:limit] diff --git a/py/core/providers/ingestion/r2r/base.py b/py/core/providers/ingestion/r2r/base.py index 2632644dc..3ff29718e 100644 --- a/py/core/providers/ingestion/r2r/base.py +++ b/py/core/providers/ingestion/r2r/base.py @@ -1,5 +1,6 @@ # type: ignore import logging +import shutil import time from typing import Any, AsyncGenerator, Optional, Union @@ -19,6 +20,9 @@ from core.base.abstractions import DocumentExtraction from core.utils import generate_extraction_id +from ...database import PostgresDBProvider +from ...llm import LiteLLMCompletionProvider, OpenAICompletionProvider + logger = logging.getLogger() @@ -38,7 +42,7 @@ class R2RIngestionProvider(IngestionProvider): DocumentType.HTM: parsers.HTMLParser, DocumentType.JSON: parsers.JSONParser, DocumentType.MD: parsers.MDParser, - DocumentType.PDF: parsers.PDFParser, + DocumentType.PDF: parsers.VLMPDFParser, DocumentType.PPTX: parsers.PPTParser, DocumentType.TXT: parsers.TextParser, DocumentType.XLSX: parsers.XLSXParser, @@ -47,6 +51,8 @@ class R2RIngestionProvider(IngestionProvider): DocumentType.JPG: parsers.ImageParser, DocumentType.PNG: parsers.ImageParser, DocumentType.SVG: parsers.ImageParser, + DocumentType.WEBP: parsers.ImageParser, + DocumentType.ICO: parsers.ImageParser, DocumentType.MP3: parsers.AudioParser, } @@ -54,23 +60,25 @@ class R2RIngestionProvider(IngestionProvider): DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, DocumentType.PDF: { "unstructured": parsers.PDFParserUnstructured, - "zerox": parsers.ZeroxPDFParser, - "marker": parsers.PDFParserMarker, + "basic": parsers.BasicPDFParser, }, DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, } - IMAGE_TYPES = { - DocumentType.GIF, - DocumentType.JPG, - DocumentType.JPEG, - DocumentType.PNG, - DocumentType.SVG, - } - - def __init__(self, config: R2RIngestionConfig): - super().__init__(config) + def __init__( + self, + config: R2RIngestionConfig, + database_provider: PostgresDBProvider, + llm_provider: Union[ + LiteLLMCompletionProvider, OpenAICompletionProvider + ], + ): + super().__init__(config, database_provider, llm_provider) self.config: R2RIngestionConfig = config # for type hinting + self.database_provider: PostgresDBProvider = database_provider + self.llm_provider: Union[ + LiteLLMCompletionProvider, OpenAICompletionProvider + ] = llm_provider self.parsers: dict[DocumentType, AsyncParser] = {} self.text_splitter = self._build_text_splitter() self._initialize_parsers() @@ -83,10 +91,18 @@ def _initialize_parsers(self): for doc_type, parser in self.DEFAULT_PARSERS.items(): # will choose the first parser in the list if doc_type not in self.config.excluded_parsers: - self.parsers[doc_type] = parser() + self.parsers[doc_type] = parser( + config=self.config, + database_provider=self.database_provider, + llm_provider=self.llm_provider, + ) for doc_type, doc_parser_name in self.config.extra_parsers.items(): - self.parsers[f"{doc_parser_name}_{str(doc_type)}"] = ( - R2RIngestionProvider.EXTRA_PARSERS[doc_type][doc_parser_name]() + self.parsers[ + f"{doc_parser_name}_{str(doc_type)}" + ] = R2RIngestionProvider.EXTRA_PARSERS[doc_type][doc_parser_name]( + config=self.config, + database_provider=self.database_provider, + llm_provider=self.llm_provider, ) def _build_text_splitter( @@ -187,23 +203,27 @@ async def parse( # type: ignore else: t0 = time.time() contents = "" - parser_overrides = ingestion_config_override.get( - "parser_overrides", {} + + def check_vlm(model_name: str) -> bool: + return "gpt-4o" in model_name + + is_not_vlm = not check_vlm( + ingestion_config_override.get("vision_pdf_model") + or self.config.vision_pdf_model ) - if document.document_type.value in parser_overrides: + + has_not_poppler = not bool( + shutil.which("pdftoppm") + ) # Check if poppler is installed + + if document.document_type == DocumentType.PDF and ( + is_not_vlm or has_not_poppler + ): logger.info( - f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}" + f"Reverting to basic PDF parser as the provided is not a proper VLM model." ) - # TODO - Cleanup this approach to be less hardcoded - if ( - document.document_type != DocumentType.PDF - or parser_overrides[DocumentType.PDF.value] != "zerox" - ): - raise ValueError( - "Only Zerox PDF parser override is available." - ) async for text in self.parsers[ - f"zerox_{DocumentType.PDF.value}" + f"basic_{DocumentType.PDF.value}" ].ingest(file_content, **ingestion_config_override): contents += text + "\n" else: diff --git a/py/core/providers/ingestion/unstructured/base.py b/py/core/providers/ingestion/unstructured/base.py index a1d57af9e..e296782be 100644 --- a/py/core/providers/ingestion/unstructured/base.py +++ b/py/core/providers/ingestion/unstructured/base.py @@ -6,7 +6,7 @@ import time from copy import copy from io import BytesIO -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Optional, Union import httpx from unstructured_client import UnstructuredClient @@ -25,6 +25,9 @@ from core.base.providers.ingestion import IngestionConfig, IngestionProvider from core.utils import generate_extraction_id +from ...database import PostgresDBProvider +from ...llm import LiteLLMCompletionProvider, OpenAICompletionProvider + logger = logging.getLogger() @@ -83,6 +86,7 @@ class UnstructuredIngestionProvider(IngestionProvider): DocumentType.JPG: [parsers.ImageParser], DocumentType.PNG: [parsers.ImageParser], DocumentType.SVG: [parsers.ImageParser], + DocumentType.PDF: [parsers.VLMPDFParser], DocumentType.MP3: [parsers.AudioParser], DocumentType.JSON: [parsers.JSONParser], # type: ignore DocumentType.HTML: [parsers.HTMLParser], # type: ignore @@ -92,24 +96,27 @@ class UnstructuredIngestionProvider(IngestionProvider): EXTRA_PARSERS = { DocumentType.CSV: {"advanced": parsers.CSVParserAdvanced}, # type: ignore DocumentType.PDF: { - "unstructured": parsers.PDFParserUnstructured, - "zerox": parsers.ZeroxPDFParser, - "marker": parsers.PDFParserMarker, + "basic": parsers.BasicPDFParser, }, DocumentType.XLSX: {"advanced": parsers.XLSXParserAdvanced}, # type: ignore } - IMAGE_TYPES = { - DocumentType.GIF, - DocumentType.JPG, - DocumentType.JPEG, - DocumentType.PNG, - DocumentType.SVG, - } - - def __init__(self, config: UnstructuredIngestionConfig): - super().__init__(config) + def __init__( + self, + config: UnstructuredIngestionConfig, + database_provider: PostgresDBProvider, + llm_provider: Union[ + LiteLLMCompletionProvider, OpenAICompletionProvider + ], + ): + + super().__init__(config, database_provider, llm_provider) self.config: UnstructuredIngestionConfig = config + self.database_provider: PostgresDBProvider = database_provider + self.llm_provider: Union[ + LiteLLMCompletionProvider, OpenAICompletionProvider + ] = llm_provider + if config.provider == "unstructured_api": try: self.unstructured_api_auth = os.environ["UNSTRUCTURED_API_KEY"] @@ -142,25 +149,33 @@ def __init__(self, config: UnstructuredIngestionConfig): self.client = httpx.AsyncClient() - super().__init__(config) + super().__init__(config, database_provider, llm_provider) self.parsers: dict[DocumentType, AsyncParser] = {} self._initialize_parsers() def _initialize_parsers(self): - for doc_type, parser_infos in self.R2R_FALLBACK_PARSERS.items(): - for parser_info in parser_infos: + for doc_type, parsers in self.R2R_FALLBACK_PARSERS.items(): + for parser in parsers: if ( doc_type not in self.config.excluded_parsers and doc_type not in self.parsers ): # will choose the first parser in the list - self.parsers[doc_type] = parser_info() + self.parsers[doc_type] = parser( + config=self.config, + database_provider=self.database_provider, + llm_provider=self.llm_provider, + ) # TODO - Reduce code duplication between Unstructured & R2R for doc_type, doc_parser_name in self.config.extra_parsers.items(): - self.parsers[f"{doc_parser_name}_{str(doc_type)}"] = ( - UnstructuredIngestionProvider.EXTRA_PARSERS[doc_type][ - doc_parser_name - ]() + self.parsers[ + f"{doc_parser_name}_{str(doc_type)}" + ] = UnstructuredIngestionProvider.EXTRA_PARSERS[doc_type][ + doc_parser_name + ]( + config=self.config, + database_provider=self.database_provider, + llm_provider=self.llm_provider, ) async def parse_fallback( @@ -213,9 +228,25 @@ async def parse( ) elements = [] + # allow user to re-override places where unstructured is overriden above + # e.g. + # "ingestion_config": { + # ..., + # "parser_overrides": { + # "pdf": "unstructured" + # } + # } + reoverride_with_unst = ( + parser_overrides.get(document.document_type.value, None) + == "unstructured" + ) + # TODO - Cleanup this approach to be less hardcoded # TODO - Remove code duplication between Unstructured & R2R - if document.document_type.value in parser_overrides: + if ( + document.document_type.value in parser_overrides + and not reoverride_with_unst + ): logger.info( f"Using parser_override for {document.document_type} with input value {parser_overrides[document.document_type.value]}" ) @@ -226,7 +257,10 @@ async def parse( ): elements.append(element) - elif document.document_type in self.R2R_FALLBACK_PARSERS.keys(): + elif ( + document.document_type in self.R2R_FALLBACK_PARSERS.keys() + and not reoverride_with_unst + ): logger.info( f"Parsing {document.document_type}: {document.id} with fallback parser" ) diff --git a/py/poetry.lock b/py/poetry.lock index b191bea0a..50fc2e603 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. [[package]] name = "aiofiles" @@ -15,7 +15,7 @@ files = [ name = "aiohappyeyeballs" version = "2.4.3" description = "Happy Eyeballs for asyncio" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "aiohappyeyeballs-2.4.3-py3-none-any.whl", hash = "sha256:8a7a83727b2756f394ab2895ea0765a0a8c475e3c71e98d43d76f22b4b435572"}, @@ -26,7 +26,7 @@ files = [ name = "aiohttp" version = "3.10.10" description = "Async http client/server framework (asyncio)" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "aiohttp-3.10.10-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:be7443669ae9c016b71f402e43208e13ddf00912f47f623ee5994e12fc7d4b3f"}, @@ -162,7 +162,7 @@ files = [ name = "aiosignal" version = "1.3.1" description = "aiosignal: a list of registered asynchronous callbacks" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "aiosignal-1.3.1-py3-none-any.whl", hash = "sha256:f8376fb07dd1e86a584e4fcdec80b36b7f81aac666ebc724e2c090300dd83b17"}, @@ -172,6 +172,21 @@ files = [ [package.dependencies] frozenlist = ">=1.1.0" +[[package]] +name = "aiosmtplib" +version = "3.0.2" +description = "asyncio SMTP client" +optional = false +python-versions = ">=3.8" +files = [ + {file = "aiosmtplib-3.0.2-py3-none-any.whl", hash = "sha256:8783059603a34834c7c90ca51103c3aa129d5922003b5ce98dbaa6d4440f10fc"}, + {file = "aiosmtplib-3.0.2.tar.gz", hash = "sha256:08fd840f9dbc23258025dca229e8a8f04d2ccf3ecb1319585615bfc7933f7f47"}, +] + +[package.extras] +docs = ["furo (>=2023.9.10)", "sphinx (>=7.0.0)", "sphinx-autodoc-typehints (>=1.24.0)", "sphinx-copybutton (>=0.5.0)"] +uvloop = ["uvloop (>=0.18)"] + [[package]] name = "aiosqlite" version = "0.20.0" @@ -383,7 +398,7 @@ test = ["flake8 (>=6.1,<7.0)", "uvloop (>=0.15.3)"] name = "attrs" version = "24.2.0" description = "Classes Without Boilerplate" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "attrs-24.2.0-py3-none-any.whl", hash = "sha256:81921eb96de3191c8258c199618104dd27ac608d9366f5e35d011eae1867ede2"}, @@ -1367,7 +1382,7 @@ woff = ["brotli (>=1.0.1)", "brotlicffi (>=0.8.0)", "zopfli (>=0.1.4)"] name = "frozenlist" version = "1.5.0" description = "A list-like structure which implements collections.abc.MutableSequence" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "frozenlist-1.5.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:5b6a66c18b5b9dd261ca98dffcb826a525334b2f29e7caa54e182255c5f6a65a"}, @@ -1468,7 +1483,7 @@ files = [ name = "fsspec" version = "2024.10.0" description = "File-system specification" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "fsspec-2024.10.0-py3-none-any.whl", hash = "sha256:03b9a6785766a4de40368b88906366755e2819e758b83705c88cd7cb5fe81871"}, @@ -1967,7 +1982,7 @@ zstd = ["zstandard (>=0.18.0)"] name = "huggingface-hub" version = "0.26.1" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" -optional = true +optional = false python-versions = ">=3.8.0" files = [ {file = "huggingface_hub-0.26.1-py3-none-any.whl", hash = "sha256:5927a8fc64ae68859cd954b7cc29d1c8390a5e15caba6d3d349c973be8fdacf3"}, @@ -2057,7 +2072,7 @@ all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2 name = "importlib-metadata" version = "8.5.0" description = "Read metadata from Python packages" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "importlib_metadata-8.5.0-py3-none-any.whl", hash = "sha256:45e54197d28b7a7f1559e60b95e7c567032b602131fbd588f1497f47880aa68b"}, @@ -2108,7 +2123,7 @@ requirements-deprecated-finder = ["pip-api", "pipreqs"] name = "jinja2" version = "3.1.4" description = "A very fast and expressive template engine." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "jinja2-3.1.4-py3-none-any.whl", hash = "sha256:bc5dd2abb727a5319567b7a813e6a2e7318c39f4f487cfe6c89c6f9c7d25197d"}, @@ -2240,7 +2255,7 @@ files = [ name = "jsonschema" version = "4.23.0" description = "An implementation of JSON Schema validation for Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jsonschema-4.23.0-py3-none-any.whl", hash = "sha256:fbadb6f8b144a8f8cf9f0b89ba94501d143e50411a1278633f56a7acf7fd5566"}, @@ -2261,7 +2276,7 @@ format-nongpl = ["fqdn", "idna", "isoduration", "jsonpointer (>1.13)", "rfc3339- name = "jsonschema-specifications" version = "2024.10.1" description = "The JSON Schema meta-schemas and vocabularies, exposed as a Registry" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "jsonschema_specifications-2024.10.1-py3-none-any.whl", hash = "sha256:a09a0680616357d9a0ecf05c12ad234479f549239d0f5b55f3deea67475da9bf"}, @@ -2398,7 +2413,7 @@ files = [ name = "litellm" version = "1.50.4" description = "Library to easily interface with LLM API providers" -optional = true +optional = false python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ {file = "litellm-1.50.4-py3-none-any.whl", hash = "sha256:cc6992275e24a0bbb4a3b377e6842d45a8510fc85d7f255930a64bb872980a36"}, @@ -2836,7 +2851,7 @@ files = [ name = "multidict" version = "6.1.0" description = "multidict implementation" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "multidict-6.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:3380252550e372e8511d49481bd836264c009adb826b23fefcc5dd3c69692f60"}, @@ -3599,7 +3614,7 @@ virtualenv = ">=20.10.0" name = "propcache" version = "0.2.0" description = "Accelerated property cache" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "propcache-0.2.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:c5869b8fd70b81835a6f187c5fdbe67917a04d7e52b6e7cc4e5fe39d55c39d58"}, @@ -4396,7 +4411,7 @@ websockets = ">=11,<14" name = "referencing" version = "0.35.1" description = "JSON Referencing + Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "referencing-0.35.1-py3-none-any.whl", hash = "sha256:eda6d3234d62814d1c64e305c1331c9a3a6132da475ab6382eaa997b21ee75de"}, @@ -4411,7 +4426,7 @@ rpds-py = ">=0.7.0" name = "regex" version = "2024.9.11" description = "Alternative regular expression module, to replace re." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "regex-2024.9.11-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:1494fa8725c285a81d01dc8c06b55287a1ee5e0e382d8413adc0a9197aac6408"}, @@ -4549,7 +4564,7 @@ requests = ">=2.0.1,<3.0.0" name = "rpds-py" version = "0.20.0" description = "Python bindings to Rust's persistent data structures (rpds)" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "rpds_py-0.20.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:3ad0fda1635f8439cde85c700f964b23ed5fc2d28016b32b9ee5fe30da5c84e2"}, @@ -4696,11 +4711,6 @@ files = [ {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f60021ec1574e56632be2a36b946f8143bf4e5e6af4a06d85281adc22938e0dd"}, {file = "scikit_learn-1.5.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:394397841449853c2290a32050382edaec3da89e35b3e03d6cc966aebc6a8ae6"}, {file = "scikit_learn-1.5.2-cp312-cp312-win_amd64.whl", hash = "sha256:57cc1786cfd6bd118220a92ede80270132aa353647684efa385a74244a41e3b1"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:e9a702e2de732bbb20d3bad29ebd77fc05a6b427dc49964300340e4c9328b3f5"}, - {file = "scikit_learn-1.5.2-cp313-cp313-macosx_12_0_arm64.whl", hash = "sha256:b0768ad641981f5d3a198430a1d31c3e044ed2e8a6f22166b4d546a5116d7908"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:178ddd0a5cb0044464fc1bfc4cca5b1833bfc7bb022d70b05db8530da4bb3dd3"}, - {file = "scikit_learn-1.5.2-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f7284ade780084d94505632241bf78c44ab3b6f1e8ccab3d2af58e0e950f9c12"}, - {file = "scikit_learn-1.5.2-cp313-cp313-win_amd64.whl", hash = "sha256:b7b0f9a0b1040830d38c39b91b3a44e1b643f4b36e36567b80b7c6bd2202a27f"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:757c7d514ddb00ae249832fe87100d9c73c6ea91423802872d9e74970a0e40b9"}, {file = "scikit_learn-1.5.2-cp39-cp39-macosx_12_0_arm64.whl", hash = "sha256:52788f48b5d8bca5c0736c175fa6bdaab2ef00a8f536cda698db61bd89c551c1"}, {file = "scikit_learn-1.5.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:643964678f4b5fbdc95cbf8aec638acc7aa70f5f79ee2cdad1eec3df4ba6ead8"}, @@ -5121,7 +5131,7 @@ files = [ name = "tiktoken" version = "0.8.0" description = "tiktoken is a fast BPE tokeniser for use with OpenAI's models" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "tiktoken-0.8.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:b07e33283463089c81ef1467180e3e00ab00d46c2c4bbcef0acab5f771d6695e"}, @@ -5132,7 +5142,6 @@ files = [ {file = "tiktoken-0.8.0-cp310-cp310-win_amd64.whl", hash = "sha256:d8c2d0e5ba6453a290b86cd65fc51fedf247e1ba170191715b049dac1f628005"}, {file = "tiktoken-0.8.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:d622d8011e6d6f239297efa42a2657043aaed06c4f68833550cac9e9bc723ef1"}, {file = "tiktoken-0.8.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:2efaf6199717b4485031b4d6edb94075e4d79177a172f38dd934d911b588d54a"}, - {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5637e425ce1fc49cf716d88df3092048359a4b3bbb7da762840426e937ada06d"}, {file = "tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9fb0e352d1dbe15aba082883058b3cce9e48d33101bdaac1eccf66424feb5b47"}, {file = "tiktoken-0.8.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:56edfefe896c8f10aba372ab5706b9e3558e78db39dd497c940b47bf228bc419"}, {file = "tiktoken-0.8.0-cp311-cp311-win_amd64.whl", hash = "sha256:326624128590def898775b722ccc327e90b073714227175ea8febbc920ac0a99"}, @@ -5168,7 +5177,7 @@ blobfile = ["blobfile (>=2)"] name = "tokenizers" version = "0.19.0" description = "" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "tokenizers-0.19.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:8e1c2ab2e501d52c39fa61fecb1270ff5ece272beab9b893792176c6e077116a"}, @@ -5312,6 +5321,17 @@ notebook = ["ipywidgets (>=6)"] slack = ["slack-sdk"] telegram = ["requests"] +[[package]] +name = "types-aiofiles" +version = "24.1.0.20240626" +description = "Typing stubs for aiofiles" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-aiofiles-24.1.0.20240626.tar.gz", hash = "sha256:48604663e24bc2d5038eac05ccc33e75799b0779e93e13d6a8f711ddc306ac08"}, + {file = "types_aiofiles-24.1.0.20240626-py3-none-any.whl", hash = "sha256:7939eca4a8b4f9c6491b6e8ef160caee9a21d32e18534a57d5ed90aee47c66b4"}, +] + [[package]] name = "types-requests" version = "2.32.0.20241016" @@ -5731,7 +5751,7 @@ files = [ name = "yarl" version = "1.16.0" description = "Yet another URL library" -optional = true +optional = false python-versions = ">=3.9" files = [ {file = "yarl-1.16.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:32468f41242d72b87ab793a86d92f885355bcf35b3355aa650bfa846a5c60058"}, @@ -5827,7 +5847,7 @@ propcache = ">=0.2.0" name = "zipp" version = "3.20.2" description = "Backport of pathlib-compatible object wrapper for zip files" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "zipp-3.20.2-py3-none-any.whl", hash = "sha256:a817ac80d6cf4b23bf7f2828b7cabf326f15a001bea8b1f9b49631780ba28350"}, @@ -5843,10 +5863,10 @@ test = ["big-O", "importlib-resources", "jaraco.functools", "jaraco.itertools", type = ["pytest-mypy"] [extras] -core = ["aiosqlite", "apscheduler", "asyncpg", "bcrypt", "boto3", "deepdiff", "fire", "fsspec", "future", "graspologic", "gunicorn", "hatchet-sdk", "litellm", "networkx", "ollama", "passlib", "psutil", "pydantic", "pyjwt", "python-multipart", "pyyaml", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] +core = ["aiosqlite", "apscheduler", "asyncpg", "bcrypt", "boto3", "deepdiff", "fire", "fsspec", "future", "graspologic", "gunicorn", "hatchet-sdk", "networkx", "ollama", "passlib", "psutil", "pydantic", "pyjwt", "python-multipart", "pyyaml", "sqlalchemy", "supabase", "tokenizers", "unstructured-client", "uvicorn", "vecs"] ingestion-bundle = ["aiofiles", "aioshutil", "beautifulsoup4", "bs4", "markdown", "numpy", "openpyxl", "pdf2image", "pypdf", "pypdf2", "python-docx", "python-pptx"] [metadata] lock-version = "2.0" python-versions = ">=3.11,<3.13" -content-hash = "65a86f61c7efcbc23b9fc221cdd487e8aa4f3b3129444e0b6b75b28868f34654" +content-hash = "fb41515396b9a34291521c668a4d9b889406c781731a00cf6b06ef2e6347b28a" diff --git a/py/pyproject.toml b/py/pyproject.toml index 4f19cdbd7..8b29c03b9 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.2.35" +version = "3.2.36" description = "SciPhi R2R" authors = ["Owen Colegrove "] @@ -17,7 +17,6 @@ packages = [ { include = "shared", from = "." }, { include = "core", from = "." }, { include = "cli", from = "." }, - ] [tool.poetry.dependencies] @@ -29,6 +28,7 @@ asyncclick = "^8.1.7.2" click = "^8.0.0" fastapi = "^0.114.0" httpx = "^0.27.0" +litellm = "^1.42.3" # move back to optional after zerox integration is complete nest-asyncio = "^1.6.0" openai = "^1.11.1" posthog = "^3.5.0" @@ -52,7 +52,6 @@ future = { version = "^1.0.0", optional = true } graspologic = { version = "^3.4.1", optional = true } gunicorn = { version = "^21.2.0", optional = true } hatchet-sdk = { version = "^0.38.0", optional = true } -litellm = { version = "^1.42.3", optional = true } networkx = { version = "^3.3", optional = true } ollama = { version = "^0.3.1", optional = true } passlib = { version = "^1.7.4", optional = true } @@ -80,6 +79,8 @@ pypdf = { version = "^4.2.0", optional = true } pypdf2 = { version = "^3.0.1", optional = true } python-pptx = { version = "^1.0.1", optional = true } python-docx = { version = "^1.1.0", optional = true } +aiosmtplib = "^3.0.2" +types-aiofiles = "^24.1.0.20240626" [tool.poetry.extras] core = [ @@ -95,7 +96,6 @@ core = [ "graspologic", "gunicorn", "hatchet-sdk", - "litellm", "networkx", "ollama", "passlib", diff --git a/py/r2r.toml b/py/r2r.toml index b7fb67555..7ef4b5bea 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -15,7 +15,6 @@ require_email_verification = false default_admin_email = "admin@example.com" default_admin_password = "change_me_immediately" - [completion] provider = "litellm" concurrent_request_limit = 256 @@ -70,8 +69,17 @@ batch_size = 256 [embedding] provider = "litellm" + +# For basic applications, use `openai/text-embedding-3-small` with `base_dimension = 512` base_model = "openai/text-embedding-3-small" base_dimension = 512 + +# RECOMMENDED - For advanced applications, +# use `openai/text-embedding-3-large` with `base_dimension = 3072` and binary quantization +# base_model = "openai/text-embedding-3-large" +# base_dimension = 3072 +# quantization_settings = { quantization_type = "INT1" } + batch_size = 128 add_title_as_prefix = false rerank_model = "None" @@ -88,6 +96,12 @@ chunk_size = 1_024 chunk_overlap = 512 excluded_parsers = ["mp4"] +audio_transcription_model="openai/whisper-1" +vision_img_model = "gpt-4o-mini" +vision_pdf_model = "gpt-4o-mini" +# vision_img_prompt_name = "vision_img" # optional, default is "vision_img" +# vision_pdf_prompt_name = "vision_pdf" # optional, default is "vision_pdf" + [ingestion.chunk_enrichment_settings] enable_chunk_enrichment = false # disabled by default strategies = ["semantic", "neighborhood"] @@ -98,7 +112,7 @@ excluded_parsers = ["mp4"] generation_config = { model = "openai/gpt-4o-mini" } [ingestion.extra_parsers] - pdf = "zerox" + pdf = "basic" [logging] provider = "r2r" @@ -111,3 +125,6 @@ provider = "simple" [prompt] provider = "r2r" + +[email] +provider = "console_mock" diff --git a/py/sdk/async_client.py b/py/sdk/async_client.py index a92598be7..df48089d0 100644 --- a/py/sdk/async_client.py +++ b/py/sdk/async_client.py @@ -3,7 +3,7 @@ import httpx -from core.base.abstractions import R2RException +from shared.abstractions import R2RException from .base.base_client import BaseClient from .mixins import ( diff --git a/py/sdk/base/base_client.py b/py/sdk/base/base_client.py index 7badf13d7..096cd69aa 100644 --- a/py/sdk/base/base_client.py +++ b/py/sdk/base/base_client.py @@ -1,6 +1,6 @@ from typing import Optional -from core.base.abstractions import R2RException +from shared.abstractions import R2RException class BaseClient: diff --git a/py/sdk/mixins/ingestion.py b/py/sdk/mixins/ingestion.py index 6c2349ba4..4aa8d8d5e 100644 --- a/py/sdk/mixins/ingestion.py +++ b/py/sdk/mixins/ingestion.py @@ -4,7 +4,7 @@ from typing import Optional, Union from uuid import UUID -from core.base import IndexMeasure, IndexMethod, VectorTableName +from shared.abstractions import IndexMeasure, IndexMethod, VectorTableName class IngestionMixins: @@ -205,6 +205,7 @@ async def create_vector_index( index_measure: IndexMeasure = IndexMeasure.cosine_distance, index_arguments: Optional[dict] = None, index_name: Optional[str] = None, + index_column: Optional[list[str]] = None, concurrently: bool = True, ) -> dict: """ @@ -227,6 +228,7 @@ async def create_vector_index( "index_measure": index_measure, "index_arguments": index_arguments, "index_name": index_name, + "index_column": index_column, "concurrently": concurrently, } return await self._make_request( # type: ignore diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py index 225308646..f625255d5 100644 --- a/py/shared/abstractions/__init__.py +++ b/py/shared/abstractions/__init__.py @@ -52,10 +52,15 @@ ) from .user import Token, TokenData, UserStats from .vector import ( + IndexArgsHNSW, + IndexArgsIVFFlat, + IndexMeasure, + IndexMethod, StorageResult, Vector, VectorEntry, VectorQuantizationType, + VectorTableName, VectorType, ) @@ -124,6 +129,11 @@ "Vector", "VectorEntry", "VectorType", + "IndexMethod", + "IndexMeasure", + "IndexArgsIVFFlat", + "IndexArgsHNSW", + "VectorTableName", "VectorQuantizationType", "StorageResult", ] diff --git a/py/shared/abstractions/document.py b/py/shared/abstractions/document.py index 04629ffad..7c4daee3b 100644 --- a/py/shared/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -49,6 +49,8 @@ class DocumentType(str, Enum): TIFF = "tiff" JPG = "jpg" SVG = "svg" + WEBP = "webp" + ICO = "ico" # Markdown MD = "md" diff --git a/py/shared/abstractions/graph.py b/py/shared/abstractions/graph.py index 0b01f8dbd..049679cdc 100644 --- a/py/shared/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -166,35 +166,35 @@ class Community(BaseModel): id: int | None = None """The ID of the community.""" - level: int | None = None - """Community level.""" - - entity_ids: list[str] | None = None - """List of entity IDs related to the community (optional).""" + community_number: int | None = None + """The community number.""" - relationship_ids: list[str] | None = None - """List of relationship IDs related to the community (optional).""" + collection_id: uuid.UUID | None = None + """The ID of the collection this community is associated with.""" - covariate_ids: dict[str, list[str]] | None = None - """Dictionary of different types of covariates related to the community (optional), e.g. claims""" + level: int | None = None + """Community level.""" - attributes: dict[str, Any] | None = None - """A dictionary of additional attributes associated with the community (optional). To be included in the search prompt.""" + name: str = "" + """The name of the community.""" summary: str = "" """Summary of the report.""" - full_content: str = "" - """Full content of the report.""" + findings: list[str] = [] + """Findings of the report.""" + + rating: float | None = None + """Rating of the report.""" - rank: float | None = 1.0 - """Rank of the report, used for sorting (optional). Higher means more important""" + rating_explanation: str | None = None + """Explanation of the rating.""" embedding: list[float] | None = None - """The semantic (i.e. text) embedding of the report summary (optional).""" + """Embedding of summary and findings.""" - full_content_embedding: list[float] | None = None - """The semantic (i.e. text) embedding of the full report content (optional).""" + attributes: dict[str, Any] | None = None + """A dictionary of additional attributes associated with the community (optional). To be included in the search prompt.""" def __init__(self, **kwargs): super().__init__(**kwargs) diff --git a/py/shared/abstractions/search.py b/py/shared/abstractions/search.py index 2e8a2bad6..ad6d8d760 100644 --- a/py/shared/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -6,10 +6,10 @@ from pydantic import Field -from shared.abstractions.graph import EntityLevel - from .base import R2RSerializable +from .graph import EntityLevel from .llm import GenerationConfig +from .vector import IndexMeasure class VectorSearchResult(R2RSerializable): @@ -165,22 +165,6 @@ def as_dict(self) -> dict: } -# TODO - stop duplication of this enum, move collections primitives to 'abstractions' -class IndexMeasure(str, Enum): - """ - An enum representing the types of distance measures available for indexing. - - Attributes: - cosine_distance (str): The cosine distance measure for indexing. - l2_distance (str): The Euclidean (L2) distance measure for indexing. - max_inner_product (str): The maximum inner product measure for indexing. - """ - - cosine_distance = "cosine_distance" - l2_distance = "l2_distance" - max_inner_product = "max_inner_product" - - class HybridSearchSettings(R2RSerializable): full_text_weight: float = Field( default=1.0, description="Weight to apply to full text search" diff --git a/py/shared/abstractions/vector.py b/py/shared/abstractions/vector.py index b367edd03..7b7150f1d 100644 --- a/py/shared/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -44,9 +44,12 @@ class IndexMeasure(str, Enum): max_inner_product (str): The maximum inner product measure for indexing. """ - cosine_distance = "cosine_distance" l2_distance = "l2_distance" max_inner_product = "max_inner_product" + cosine_distance = "cosine_distance" + l1_distance = "l1_distance" + hamming_distance = "hamming_distance" + jaccard_distance = "jaccard_distance" def __str__(self) -> str: return self.value @@ -54,9 +57,23 @@ def __str__(self) -> str: @property def ops(self) -> str: return { - IndexMeasure.cosine_distance: "_cosine_ops", IndexMeasure.l2_distance: "_l2_ops", IndexMeasure.max_inner_product: "_ip_ops", + IndexMeasure.cosine_distance: "_cosine_ops", + IndexMeasure.l1_distance: "_l1_ops", + IndexMeasure.hamming_distance: "_hamming_ops", + IndexMeasure.jaccard_distance: "_jaccard_ops", + }[self] + + @property + def pgvector_repr(self) -> str: + return { + IndexMeasure.l2_distance: "<->", + IndexMeasure.max_inner_product: "<#>", + IndexMeasure.cosine_distance: "<=>", + IndexMeasure.l1_distance: "<+>", + IndexMeasure.hamming_distance: "<~>", + IndexMeasure.jaccard_distance: "<%>", }[self] @@ -92,13 +109,6 @@ class IndexArgsHNSW(R2RSerializable): ef_construction: Optional[int] = 64 -INDEX_MEASURE_TO_SQLA_ACC = { - IndexMeasure.cosine_distance: lambda x: x.cosine_distance, - IndexMeasure.l2_distance: lambda x: x.l2_distance, - IndexMeasure.max_inner_product: lambda x: x.max_inner_product, -} - - class VectorTableName(str, Enum): """ This enum represents the different tables where we store vectors. diff --git a/py/tests/conftest.py b/py/tests/conftest.py index badd6c8ff..af6b482dc 100644 --- a/py/tests/conftest.py +++ b/py/tests/conftest.py @@ -12,6 +12,7 @@ BCryptConfig, CompletionConfig, DatabaseConfig, + EmailConfig, EmbeddingConfig, PersistentLoggingConfig, SqlitePersistentLoggingProvider, @@ -29,6 +30,7 @@ ) from core.providers import ( BCryptProvider, + ConsoleMockEmailProvider, LiteLLMCompletionProvider, LiteLLMEmbeddingProvider, PostgresDBProvider, @@ -169,12 +171,25 @@ def auth_config(app_config): ) +@pytest.fixture(scope="function") +def email_provider(app_config): + return ConsoleMockEmailProvider( + EmailConfig(provider="console_mock", app=app_config) + ) + + @pytest.fixture(scope="function") async def r2r_auth_provider( - auth_config, crypto_provider, temporary_postgres_db_provider + auth_config, + crypto_provider, + temporary_postgres_db_provider, + email_provider, ): auth_provider = R2RAuthProvider( - auth_config, crypto_provider, temporary_postgres_db_provider + auth_config, + crypto_provider, + temporary_postgres_db_provider, + email_provider, ) await auth_provider.initialize() yield auth_provider diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py index 89f5822ca..dde41db93 100644 --- a/py/tests/core/providers/database/relational/test_document_db.py +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -45,7 +45,7 @@ async def test_upsert_documents_overview(temporary_postgres_db_provider): assert inserted_document.id == document_info.id assert inserted_document.collection_ids == document_info.collection_ids assert inserted_document.user_id == document_info.user_id - assert inserted_document.type == document_info.document_type + assert inserted_document.document_type == document_info.document_type assert inserted_document.metadata == document_info.metadata assert inserted_document.title == document_info.title assert inserted_document.version == document_info.version diff --git a/py/tests/integration/runner_sdk.py b/py/tests/integration/runner_sdk.py index 7ffccddc3..3cf6f452a 100644 --- a/py/tests/integration/runner_sdk.py +++ b/py/tests/integration/runner_sdk.py @@ -585,8 +585,7 @@ def test_user_search_and_rag(): def test_user_password_management(): print("Testing: User password management") - # Test for duplicate user - client.login("password_mgmt@example.com", "password123") + client.login("user_test@example.com", "password123") # Change password client.change_password("password123", "new_password") @@ -597,6 +596,9 @@ def test_user_password_management(): # reset_confirm_result = client.confirm_password_reset("reset_token_here", "password123") # print(f"Reset Confirm Result:\n{reset_confirm_result}") + # Change password back to the original password + client.change_password("new_password", "password123") + print("User password management test passed") print("~" * 100) @@ -1990,6 +1992,228 @@ def test_get_prompt(): assert "You are a helpful agent." in response["results"]["message"] +def test_add_prompt(): + print("Testing: Add Prompt") + + # Test adding a new prompt + prompt_data = { + "name": "test_prompt", + "template": "This is a test prompt with {input_var}", + "input_types": {"input_var": "string"}, + } + + add_result = client.add_prompt( + name=prompt_data["name"], + template=prompt_data["template"], + input_types=prompt_data["input_types"], + )["results"] + + # Verify the prompt was added successfully + assert add_result["name"] == prompt_data["name"] + assert add_result["template"] == prompt_data["template"] + assert add_result["input_types"] == prompt_data["input_types"] + assert "prompt_id" in add_result + assert "created_at" in add_result + assert "updated_at" in add_result + + print("Add prompt test passed") + print("~" * 100) + + +def test_update_prompt(): + print("Testing: Update Prompt") + + # Update an existing prompt + updated_template = "This is an updated test prompt with {input_var}" + updated_input_types = {"input_var": "string", "new_var": "integer"} + + update_result = client.update_prompt( + name="test_prompt", + template=updated_template, + input_types=updated_input_types, + )["results"] + + # Verify the prompt was updated successfully + assert update_result["template"] == updated_template + assert update_result["input_types"] == updated_input_types + assert update_result["name"] == "test_prompt" + assert "updated_at" in update_result + + # Test partial updates + template_only_update = "Template only update with {input_var}" + template_update_result = client.update_prompt( + name="test_prompt", template=template_only_update + )["results"] + + assert template_update_result["template"] == template_only_update + assert template_update_result["input_types"] == updated_input_types + + print("Update prompt test passed") + print("~" * 100) + + +def test_get_prompt(): + print("Testing: Get Prompt") + + # Test getting a prompt without inputs + basic_result = client.get_prompt("test_prompt")["results"] + assert "message" in basic_result + + # Test getting a prompt with inputs + inputs = {"input_var": "test value"} + result_with_inputs = client.get_prompt("test_prompt", inputs=inputs)[ + "results" + ] + assert "message" in result_with_inputs + assert "test value" in result_with_inputs["message"] + + # Test getting a prompt with override + override_template = "Override template with {input_var}" + result_with_override = client.get_prompt( + "test_prompt", inputs=inputs, prompt_override=override_template + )["results"] + assert "message" in result_with_override + assert ( + "Override template with test value" in result_with_override["message"] + ) + + print("Get prompt test passed") + print("~" * 100) + + +def test_get_all_prompts(): + print("Testing: Get All Prompts") + + result = client.get_all_prompts()["results"] + + # Verify structure of the response + assert "prompts" in result + prompts = result["prompts"] + + # Verify our test prompt is in the list + test_prompt = prompts.get("test_prompt") + assert test_prompt is not None + assert test_prompt["name"] == "test_prompt" + assert "template" in test_prompt + assert "input_types" in test_prompt + assert "prompt_id" in test_prompt + assert "created_at" in test_prompt + assert "updated_at" in test_prompt + + # Verify required system prompts exist + required_prompts = {"default_system", "rag_agent", "hyde"} + assert all(prompt in prompts for prompt in required_prompts) + + print("Get all prompts test passed") + print("~" * 100) + + +def test_delete_prompt(): + print("Testing: Delete Prompt") + + # First, verify the prompt exists + all_prompts_before = client.get_all_prompts()["results"]["prompts"] + assert "test_prompt" in all_prompts_before + + # Delete the prompt + delete_result = client.delete_prompt("test_prompt")["results"] + assert delete_result["message"] == "Prompt deleted successfully" + + # Verify the prompt was deleted + all_prompts_after = client.get_all_prompts()["results"]["prompts"] + assert "test_prompt" not in all_prompts_after + + # Test deleting a non-existent prompt + try: + client.delete_prompt("non_existent_prompt") + assert False, "Expected an error when deleting non-existent prompt" + except Exception as e: + assert "not found" in str(e).lower() + + print("Delete prompt test passed") + print("~" * 100) + + +def test_prompt_error_handling(): + print("Testing: Prompt Error Handling") + + # Test adding a prompt with invalid input types + try: + client.add_prompt( + name="invalid_prompt", + template="Test template", + input_types={"var": "invalid_type"}, + ) + assert False, "Expected an error for invalid input type" + except Exception as e: + assert "invalid input type" in str(e).lower() + + # Test adding a prompt with invalid template + try: + client.add_prompt( + name="invalid_prompt", + template="Template with {undefined_var}", + input_types={"other_var": "string"}, + ) + assert False, "Expected an error for undefined template variable" + except Exception as e: + assert "undefined variable" in str(e).lower() + + # Test updating a non-existent prompt + try: + client.update_prompt( + name="non_existent_prompt", template="New template" + ) + assert False, "Expected an error when updating non-existent prompt" + except Exception as e: + assert "not found" in str(e).lower() + + print("Prompt error handling test passed") + print("~" * 100) + + +def test_prompt_access_control(): + print("Testing: Prompt Access Control") + + # Create a new non-admin user + client.register("prompt_test_user@example.com", "password123") + client.login("prompt_test_user@example.com", "password123") + + # Test that non-admin user can't add prompts + try: + client.add_prompt( + name="unauthorized_prompt", + template="Test template", + input_types={"var": "string"}, + ) + assert False, "Expected an error for unauthorized prompt creation" + except Exception as e: + assert "unauthorized" in str(e).lower() + + # Test that non-admin user can't update system prompts + try: + client.update_prompt( + name="default_system", template="Modified system prompt" + ) + assert False, "Expected an error for unauthorized prompt update" + except Exception as e: + assert "unauthorized" in str(e).lower() + + # Test that non-admin user can't delete prompts + try: + client.delete_prompt("default_system") + assert False, "Expected an error for unauthorized prompt deletion" + except Exception as e: + assert "unauthorized" in str(e).lower() + + # Verify that non-admin user can still get prompts + get_result = client.get_prompt("default_system") + assert "message" in get_result["results"] + + print("Prompt access control test passed") + print("~" * 100) + + def create_client(base_url): return R2RClient(base_url)