diff --git a/.gitignore b/.gitignore index 54aedb76f..b80511bfd 100644 --- a/.gitignore +++ b/.gitignore @@ -8,6 +8,8 @@ py/workspace/ uploads/ env/ **/__pycache__ +**/.mypy_cache +**/.pytest_cache dump/* .next node_modules diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 1a086bbdc..73010fcef 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -23,3 +23,10 @@ repos: language: system types: [python] pass_filenames: false + + - id: mypy + name: mypy + entry: bash -c 'cd py && poetry run mypy .' + language: system + types: [python] + pass_filenames: false diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json index ef1a7fa88..a44a15e9d 100644 --- a/docs/api-reference/openapi.json +++ b/docs/api-reference/openapi.json @@ -1 +1 @@ - -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/score_completion":{"post":{"summary":"Score Completion","operationId":"score_completion_v2_score_completion_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_score_completion_v2_score_completion_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ScoreCompletionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/inspect_knowledge_graph":{"get":{"summary":"Inspect Knowledge Graph","operationId":"inspect_knowledge_graph_v2_inspect_knowledge_graph_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":100,"title":"Limit"}},{"name":"print_descriptions","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Print Descriptions"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_str_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AddUserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"kg_search_generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"kg_search_generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"kg_search_generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"kg_search_generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.\n\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. You can view the graph through the neo4j browser.\n\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCreationResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEnrichmentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AddUserResponse":{"properties":{"result":{"type":"boolean","title":"Result"}},"type":"object","required":["result"],"title":"AddUserResponse"},"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Input"},"type":"array","title":"Messages","description":"List of message objects"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true}},"type":"object","required":["messages"],"title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"document_ids":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Document Ids","description":"List of document IDs to create the graph on."},"kg_creation_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGCreationSettings"}},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"skip_clustering":{"type":"boolean","title":"Skip Clustering","description":"Whether to skip leiden clustering on the graph or not.","default":false},"force_enrichment":{"type":"boolean","title":"Force Enrichment","description":"Force Enrichment step even if graph creation is still in progress for some documents.","default":false},"kg_enrichment_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGEnrichmentSettings"}},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_retry_ingest_files_v2_retry_ingest_files_post":{"properties":{"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."}},"type":"object","required":["document_ids"],"title":"Body_retry_ingest_files_v2_retry_ingest_files_post"},"Body_score_completion_v2_score_completion_post":{"properties":{"message_id":{"type":"string","title":"Message Id","description":"Message ID"},"score":{"type":"number","title":"Score","description":"Completion score"}},"type":"object","required":["message_id","score"],"title":"Body_score_completion_v2_score_completion_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"ChunkingConfig":{"properties":{"provider":{"type":"string"}},"type":"object","title":"ChunkingConfig","example":{"chunking_strategy":"by_title","combine_under_n_chars":128,"max_characters":1024,"new_after_n_chars":512,"overlap":20,"provider":"unstructured_local","strategy":"auto"}},"DocumentChunkResponse":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"restructuring_status":{"type":"string","title":"Restructuring Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","restructuring_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"GroupOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"GroupOverviewResponse"},"GroupResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"GroupResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationSettings":{"properties":{"entity_types":{"items":{"type":"string"},"type":"array","title":"Entity Types","description":"The types of entities to extract.","default":[]},"relation_types":{"items":{"type":"string"},"type":"array","title":"Relation Types","description":"The types of relations to extract.","default":[]},"fragment_merge_count":{"type":"integer","title":"Fragment Merge Count","description":"The number of fragments to merge into a single KG extraction.","default":4},"max_knowledge_triples":{"type":"integer","title":"Max Knowledge Triples","description":"The maximum number of knowledge triples to extract from each chunk.","default":100},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."}},"type":"object","title":"KGCreationSettings","description":"Settings for knowledge graph creation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentSettings":{"properties":{"max_description_input_length":{"type":"integer","title":"Max Description Input Length","description":"The maximum length of the description for a node in the graph.","default":65536},"max_summary_input_length":{"type":"integer","title":"Max Summary Input Length","description":"The maximum length of the summary for a community.","default":65536},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."},"leiden_params":{"type":"object","title":"Leiden Params","description":"Parameters for the Leiden algorithm."}},"type":"object","title":"KGEnrichmentSettings","description":"Settings for knowledge graph enrichment."},"KGGlobalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"search_result":{"items":{"type":"string"},"type":"array","title":"Search Result"}},"type":"object","required":["query","search_result"],"title":"KGGlobalSearchResult","description":"Result of a global knowledge graph search operation.","query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"KGLocalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"entities":{"type":"object","title":"Entities"},"relationships":{"type":"object","title":"Relationships"},"communities":{"type":"object","title":"Communities"}},"type":"object","required":["query","entities","relationships","communities"],"title":"KGLocalSearchResult","description":"Result of a local knowledge graph search operation.","communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}},"KGSearchResult":{"properties":{"local_result":{"anyOf":[{"$ref":"#/components/schemas/KGLocalSearchResult"},{"type":"null"}]},"global_result":{"anyOf":[{"$ref":"#/components/schemas/KGGlobalSearchResult"},{"type":"null"}]}},"type":"object","title":"KGSearchResult","description":"Result of a knowledge graph search operation.","global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}},"KGSearchSettings":{"properties":{"use_kg_search":{"type":"boolean","title":"Use Kg Search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","default":"global"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level"},"kg_search_generation_config":{"anyOf":[{"$ref":"#/components/schemas/GenerationConfig"},{"type":"null"}]},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","kg_search_generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"global","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message-Input":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message"},"Message-Output":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Output"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"}},"type":"object","required":["messages"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"ResultsWrapper_AddUserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AddUserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AddUserResponse]"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_GroupResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GroupResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GroupResponse]"},"ResultsWrapper_KGCreationResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCreationResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCreationResponse]"},"ResultsWrapper_KGEnrichmentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEnrichmentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEnrichmentResponse]"},"ResultsWrapper_List_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentChunkResponse]]"},"ResultsWrapper_List_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentOverviewResponse]]"},"ResultsWrapper_List_GroupOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupOverviewResponse]]"},"ResultsWrapper_List_GroupResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupResponse]]"},"ResultsWrapper_List_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[LogResponse]]"},"ResultsWrapper_List_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[UserOverviewResponse]]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_ScoreCompletionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ScoreCompletionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ScoreCompletionResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_str_":{"properties":{"results":{"type":"string","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[str]"},"ScoreCompletionResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"ScoreCompletionResponse"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-09-18T18:14:34.700273"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-09-18T18:14:34.700279"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","probes":10,"search_limit":20,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} + -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -F \"file=@pg_essay_1_v2.txt;type=text/plain\" \\\n -F 'document_ids=[\"b4ac4dd6-5f27-596e-a55b-7cf242ca30aa\"]'\n"}]}},"/v2/health":{"get":{"summary":"Health Check","operationId":"health_check_v2_health_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}},"/v2/server_stats":{"get":{"summary":"Server Stats","operationId":"server_stats_v2_server_stats_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ServerStats_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/update_prompt":{"post":{"summary":"Update Prompt App","operationId":"update_prompt_app_v2_update_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_prompt_app_v2_update_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/add_prompt":{"post":{"summary":"Add Prompt App","operationId":"add_prompt_app_v2_add_prompt_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_prompt_app_v2_add_prompt_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_prompt/{prompt_name}":{"get":{"summary":"Get Prompt App","operationId":"get_prompt_app_v2_get_prompt__prompt_name__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"},{"name":"inputs","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"description":"JSON-encoded prompt inputs","title":"Inputs"},"description":"JSON-encoded prompt inputs"},{"name":"prompt_override","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"description":"Prompt override","title":"Prompt Override"},"description":"Prompt override"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UpdatePromptResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/get_all_prompts":{"get":{"summary":"Get All Prompts App","operationId":"get_all_prompts_app_v2_get_all_prompts_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AllPromptsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_prompt/{prompt_name}":{"delete":{"summary":"Delete Prompt App","operationId":"delete_prompt_app_v2_delete_prompt__prompt_name__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"prompt_name","in":"path","required":true,"schema":{"type":"string","description":"Prompt name","title":"Prompt Name"},"description":"Prompt name"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/analytics":{"get":{"summary":"Get Analytics App","operationId":"get_analytics_app_v2_analytics_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filter_criteria","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Filter Criteria"}},{"name":"analysis_types","in":"query","required":false,"schema":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"type":"object"}},{"type":"null"}],"default":{},"title":"Analysis Types"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AnalyticsResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logs":{"get":{"summary":"Logs App","operationId":"logs_app_v2_logs_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"run_type_filter","in":"query","required":false,"schema":{"anyOf":[{"type":"string"},{"type":"null"}],"default":"","title":"Run Type Filter"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_LogResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/app_settings":{"get":{"summary":"App Settings","operationId":"app_settings_v2_app_settings_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AppSettingsResponse_"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/score_completion":{"post":{"summary":"Score Completion","operationId":"score_completion_v2_score_completion_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_score_completion_v2_score_completion_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_ScoreCompletionResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/users_overview":{"get":{"summary":"Users Overview App","operationId":"users_overview_app_v2_users_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"default":[],"title":"User Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_UserOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/delete":{"delete":{"summary":"Delete App","operationId":"delete_app_v2_delete_delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"filters","in":"query","required":true,"schema":{"type":"string","description":"JSON-encoded filters","title":"Filters"},"description":"JSON-encoded filters"}],"responses":{"204":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/download_file/{document_id}":{"get":{"summary":"Download File App","description":"Download a file by its document ID as a stream.","operationId":"download_file_app_v2_download_file__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"}],"responses":{"200":{"description":"Successful Response"},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/documents_overview":{"get":{"summary":"Documents Overview App","operationId":"documents_overview_app_v2_documents_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_ids","in":"query","required":false,"schema":{"type":"array","items":{"type":"string"},"default":[],"title":"Document Ids"}},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/document_chunks/{document_id}":{"get":{"summary":"Document Chunks App","operationId":"document_chunks_app_v2_document_chunks__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","title":"Document Id"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentChunkResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/inspect_knowledge_graph":{"get":{"summary":"Inspect Knowledge Graph","operationId":"inspect_knowledge_graph_v2_inspect_knowledge_graph_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","default":100,"title":"Limit"}},{"name":"print_descriptions","in":"query","required":false,"schema":{"type":"boolean","default":false,"title":"Print Descriptions"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_str_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collections_overview":{"get":{"summary":"Collections Overview App","operationId":"collections_overview_app_v2_collections_overview_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_ids","in":"query","required":false,"schema":{"anyOf":[{"type":"array","items":{"type":"string"}},{"type":"null"}],"title":"Collection Ids"}},{"name":"offset","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","minimum":0},{"type":"null"}],"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"anyOf":[{"type":"integer","maximum":1000,"minimum":1},{"type":"null"}],"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_collection":{"post":{"summary":"Create Collection App","operationId":"create_collection_app_v2_create_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_collection_app_v2_create_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_collection/{collection_id}":{"get":{"summary":"Get Collection App","operationId":"get_collection_app_v2_get_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/update_collection":{"put":{"summary":"Update Collection App","operationId":"update_collection_app_v2_update_collection_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_update_collection_app_v2_update_collection_put"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GroupResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/delete_collection/{collection_id}":{"delete":{"summary":"Delete Collection App","operationId":"delete_collection_app_v2_delete_collection__collection_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/list_collections":{"get":{"summary":"List Collections App","operationId":"list_collections_app_v2_list_collections_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/add_user_to_collection":{"post":{"summary":"Add User To Collection App","operationId":"add_user_to_collection_app_v2_add_user_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_add_user_to_collection_app_v2_add_user_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_AddUserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_user_from_collection":{"post":{"summary":"Remove User From Collection App","operationId":"remove_user_from_collection_app_v2_remove_user_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/get_users_in_collection/{collection_id}":{"get":{"summary":"Get Users In Collection App","operationId":"get_users_in_collection_app_v2_get_users_in_collection__collection_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user_collections/{user_id}":{"get":{"summary":"Get Collections For User App","operationId":"get_collections_for_user_app_v2_user_collections__user_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"User ID","title":"User Id"},"description":"User ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"description":"Pagination offset","default":0,"title":"Offset"},"description":"Pagination offset"},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"description":"Pagination limit","default":100,"title":"Limit"},"description":"Pagination limit"}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/assign_document_to_collection":{"post":{"summary":"Assign Document To Collection App","operationId":"assign_document_to_collection_app_v2_assign_document_to_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/remove_document_from_collection":{"post":{"summary":"Remove Document From Collection App","operationId":"remove_document_from_collection_app_v2_remove_document_from_collection_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/document_collections/{document_id}":{"get":{"summary":"Document Collections App","operationId":"document_collections_app_v2_document_collections__document_id__get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"document_id","in":"path","required":true,"schema":{"type":"string","description":"Document ID","title":"Document Id"},"description":"Document ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_GroupResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/collection/{collection_id}/documents":{"get":{"summary":"Documents In Collection App","operationId":"documents_in_collection_app_v2_collection__collection_id__documents_get","security":[{"HTTPBearer":[]}],"parameters":[{"name":"collection_id","in":"path","required":true,"schema":{"type":"string","description":"Collection ID","title":"Collection Id"},"description":"Collection ID"},{"name":"offset","in":"query","required":false,"schema":{"type":"integer","minimum":0,"default":0,"title":"Offset"}},{"name":"limit","in":"query","required":false,"schema":{"type":"integer","maximum":1000,"minimum":1,"default":100,"title":"Limit"}}],"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_List_DocumentOverviewResponse__"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/search":{"post":{"summary":"Search App","description":"Perform a search query on the vector database and knowledge graph.\n\nThis endpoint allows for complex filtering of search results using PostgreSQL-based queries.\nFilters can be applied to various fields such as document_id, and internal metadata values.\n\n\nAllowed operators include `eq`, `neq`, `gt`, `gte`, `lt`, `lte`, `like`, `ilike`, `in`, and `nin`.","operationId":"search_app_v2_search_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_search_app_v2_search_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_SearchResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.search(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/search\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n }\n }'\n"}]}},"/v2/rag":{"post":{"summary":"Rag App","description":"Execute a RAG (Retrieval-Augmented Generation) query.\n\nThis endpoint combines search results with language model generation.\nIt supports the same filtering capabilities as the search endpoint,\nallowing for precise control over the retrieved context.\n\nThe generation process can be customized using the rag_generation_config parameter.","operationId":"rag_app_v2_rag_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_rag_app_v2_rag_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.rag(\n query=\"Who is Aristotle?\",\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n kg_search_settings={\n \"use_kg_search\": True,\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7,\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/rag\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"query\": \"Who is Aristotle?\",\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n \"kg_search_settings\": {\n \"use_kg_search\": true, # graph needs to be constructed first\n \"kg_search_type\": \"local\",\n \"kg_search_level\": \"0\",\n \"generation_config\": {\n \"model\": \"gpt-4o-mini\",\n \"temperature\": 0.7\n },\n \"local_search_limits\": {\n \"__Entity__\": 20,\n \"__Relationship__\": 20,\n \"__Community__\": 20,\n },\n \"max_community_description_length\": 65536,\n \"max_llm_queries_for_global_search\": 250\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 150\n }\n }'\n"}]}},"/v2/agent":{"post":{"summary":"Agent App","description":"Implement an agent-based interaction for complex query processing.\n\nThis endpoint supports multi-turn conversations and can handle complex queries\nby breaking them down into sub-tasks. It uses the same filtering capabilities\nas the search and RAG endpoints for retrieving relevant information.\n\nThe agent's behavior can be customized using the rag_generation_config and\ntask_prompt_override parameters.","operationId":"agent_app_v2_agent_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_agent_app_v2_agent_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_RAGAgentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}],"x-codeSamples":[{"lang":"Python","source":"from r2r import R2RClient\n\nclient = R2RClient(\"http://localhost:7272\")\n# when using auth, do client.login(...)\n\nresult = client.agent(\n messages=[\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n vector_search_settings={\n \"use_vector_search\": True,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": True\n },\n rag_generation_config={\n \"stream\": False,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n include_title_if_available=True\n)\n"},{"lang":"Shell","source":"curl -X POST \"https://api.example.com/agent\" \\\n -H \"Content-Type: application/json\" \\\n -H \"Authorization: Bearer YOUR_API_KEY\" \\\n -d '{\n \"messages\": [\n {\"role\": \"user\", \"content\": \"Who is the greatest philospher of all time?\"},\n {\"role\": \"assistant\", \"content\": \"Aristotle is widely considered the greatest philospher of all time.\"},\n {\"role\": \"user\", \"content\": \"Can you tell me more about him?\"}\n ],\n \"vector_search_settings\": {\n \"use_vector_search\": true,\n \"filters\": {\"document_id\": {\"eq\": \"5e157b3a-8469-51db-90d9-52e7d896b49b\"}},\n \"search_limit\": 20,\n \"use_hybrid_search\": true\n },\n \"kg_search_settings\": {\n \"use_kg_search\": false # to enable this, please read the graphrag cookbook\n },\n \"rag_generation_config\": {\n \"stream\": false,\n \"temperature\": 0.7,\n \"max_tokens\": 200\n },\n \"include_title_if_available\": true\n }'\n"}]}},"/v2/register":{"post":{"summary":"Register App","description":"Register a new user with the given email and password.","operationId":"register_app_v2_register_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_register_app_v2_register_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/verify_email":{"post":{"summary":"Verify Email App","description":"Verify a user's email address.\n\nThis endpoint is used to confirm a user's email address using the verification code\nsent to their email after registration.","operationId":"verify_email_app_v2_verify_email_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_verify_email_app_v2_verify_email_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/login":{"post":{"summary":"Login App","description":"Authenticate a user and provide access tokens.\n\nThis endpoint authenticates a user using their email (username) and password,\nand returns access and refresh tokens upon successful authentication.","operationId":"login_app_v2_login_post","requestBody":{"content":{"application/x-www-form-urlencoded":{"schema":{"$ref":"#/components/schemas/Body_login_app_v2_login_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/logout":{"post":{"summary":"Logout App","description":"Log out the current user.\n\nThis endpoint invalidates the user's current access token, effectively logging them out.","operationId":"logout_app_v2_logout_post","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}}},"security":[{"OAuth2PasswordBearer":[]},{"HTTPBearer":[]}]}},"/v2/user":{"get":{"summary":"Get User App","description":"Get the current user's profile information.\n\nThis endpoint returns the profile information of the currently authenticated user.","operationId":"get_user_app_v2_user_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}}},"security":[{"HTTPBearer":[]}]},"put":{"summary":"Put User App","description":"Update the current user's profile information.\n\nThis endpoint allows the authenticated user to update their profile information.","operationId":"put_user_app_v2_user_put","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_put_user_app_v2_user_put"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_UserResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/refresh_access_token":{"post":{"summary":"Refresh Access Token App","description":"Refresh the access token using a refresh token.\n\nThis endpoint allows users to obtain a new access token using their refresh token.","operationId":"refresh_access_token_app_v2_refresh_access_token_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","title":"Refresh Token","description":"Refresh token"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_TokenResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/change_password":{"post":{"summary":"Change Password App","description":"Change the authenticated user's password.\n\nThis endpoint allows users to change their password by providing their current password\nand a new password.","operationId":"change_password_app_v2_change_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_change_password_app_v2_change_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/request_password_reset":{"post":{"summary":"Request Password Reset App","description":"Request a password reset for a user.\n\nThis endpoint initiates the password reset process by sending a reset link\nto the specified email address.","operationId":"request_password_reset_app_v2_request_password_reset_post","requestBody":{"content":{"application/json":{"schema":{"type":"string","format":"email","title":"Email","description":"User's email address"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/reset_password":{"post":{"summary":"Reset Password App","operationId":"reset_password_app_v2_reset_password_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_reset_password_app_v2_reset_password_post"}}},"required":true},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/user/{user_id}":{"delete":{"summary":"Delete User App","description":"Delete a user account.\n\nThis endpoint allows users to delete their own account or, for superusers,\nto delete any user account.","operationId":"delete_user_app_v2_user__user_id__delete","security":[{"HTTPBearer":[]}],"parameters":[{"name":"user_id","in":"path","required":true,"schema":{"type":"string","description":"ID of the user to delete","title":"User Id"},"description":"ID of the user to delete"}],"requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_delete_user_app_v2_user__user_id__delete"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_GenericMessageResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}}}},"/v2/create_graph":{"post":{"summary":"Create Graph","description":"Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system.\n\nThis step extracts the relevant entities and relationships from the documents and creates a graph based on the extracted information. You can view the graph through the neo4j browser.\n\nIn order to do GraphRAG, you will need to run the enrich_graph endpoint.","operationId":"create_graph_v2_create_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_create_graph_v2_create_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGCreationResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/enrich_graph":{"post":{"summary":"Enrich Graph","description":"This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work.","operationId":"enrich_graph_v2_enrich_graph_post","requestBody":{"content":{"application/json":{"schema":{"$ref":"#/components/schemas/Body_enrich_graph_v2_enrich_graph_post"}}}},"responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{"$ref":"#/components/schemas/ResultsWrapper_KGEnrichmentResponse_"}}}},"422":{"description":"Validation Error","content":{"application/json":{"schema":{"$ref":"#/components/schemas/HTTPValidationError"}}}}},"security":[{"HTTPBearer":[]}]}},"/v2/openapi_spec":{"get":{"summary":"Openapi Spec","operationId":"openapi_spec_v2_openapi_spec_get","responses":{"200":{"description":"Successful Response","content":{"application/json":{"schema":{}}}}}}}},"components":{"schemas":{"AddUserResponse":{"properties":{"result":{"type":"boolean","title":"Result"}},"type":"object","required":["result"],"title":"AddUserResponse"},"AllPromptsResponse":{"properties":{"prompts":{"additionalProperties":{"$ref":"#/components/schemas/PromptResponse"},"type":"object","title":"Prompts"}},"type":"object","required":["prompts"],"title":"AllPromptsResponse"},"AnalyticsResponse":{"properties":{"analytics_data":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Analytics Data"},"filtered_logs":{"type":"object","title":"Filtered Logs"}},"type":"object","required":["filtered_logs"],"title":"AnalyticsResponse"},"AppSettingsResponse":{"properties":{"config":{"type":"object","title":"Config"},"prompts":{"type":"object","title":"Prompts"}},"type":"object","required":["config","prompts"],"title":"AppSettingsResponse"},"Body_add_prompt_app_v2_add_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"type":"string","title":"Template","description":"Prompt template"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name","template"],"title":"Body_add_prompt_app_v2_add_prompt_post"},"Body_add_user_to_collection_app_v2_add_user_to_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_add_user_to_collection_app_v2_add_user_to_collection_post"},"Body_agent_app_v2_agent_post":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Input"},"type":"array","title":"Messages","description":"List of message objects"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":true}},"type":"object","required":["messages"],"title":"Body_agent_app_v2_agent_post"},"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_assign_document_to_collection_app_v2_assign_document_to_collection_post"},"Body_change_password_app_v2_change_password_post":{"properties":{"current_password":{"type":"string","title":"Current Password","description":"Current password"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["current_password","new_password"],"title":"Body_change_password_app_v2_change_password_post"},"Body_create_collection_app_v2_create_collection_post":{"properties":{"name":{"type":"string","title":"Name","description":"Collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Collection description","default":""}},"type":"object","required":["name"],"title":"Body_create_collection_app_v2_create_collection_post"},"Body_create_graph_v2_create_graph_post":{"properties":{"document_ids":{"anyOf":[{"items":{"type":"string"},"type":"array"},{"type":"null"}],"title":"Document Ids","description":"List of document IDs to create the graph on."},"kg_creation_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGCreationSettings"}},{"type":"null"}],"title":"Kg Creation Settings","description":"Settings for the graph creation process."}},"type":"object","title":"Body_create_graph_v2_create_graph_post"},"Body_delete_user_app_v2_user__user_id__delete":{"properties":{"password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Password","description":"User's current password"},"delete_vector_data":{"anyOf":[{"type":"boolean"},{"type":"null"}],"title":"Delete Vector Data","description":"Whether to delete the user's vector data","default":false}},"type":"object","title":"Body_delete_user_app_v2_user__user_id__delete"},"Body_enrich_graph_v2_enrich_graph_post":{"properties":{"skip_clustering":{"type":"boolean","title":"Skip Clustering","description":"Whether to skip leiden clustering on the graph or not.","default":false},"force_enrichment":{"type":"boolean","title":"Force Enrichment","description":"Force Enrichment step even if graph creation is still in progress for some documents.","default":false},"kg_enrichment_settings":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/KGEnrichmentSettings"}},{"type":"null"}],"title":"Kg Enrichment Settings","description":"Settings for the graph enrichment process."}},"type":"object","title":"Body_enrich_graph_v2_enrich_graph_post"},"Body_ingest_files_app_v2_ingest_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to ingest into the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_ingest_files_app_v2_ingest_files_post"},"Body_login_app_v2_login_post":{"properties":{"grant_type":{"anyOf":[{"type":"string","pattern":"password"},{"type":"null"}],"title":"Grant Type"},"username":{"type":"string","title":"Username"},"password":{"type":"string","title":"Password"},"scope":{"type":"string","title":"Scope","default":""},"client_id":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Id"},"client_secret":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Client Secret"}},"type":"object","required":["username","password"],"title":"Body_login_app_v2_login_post"},"Body_put_user_app_v2_user_put":{"properties":{"email":{"anyOf":[{"type":"string","format":"email"},{"type":"null"}],"title":"Email","description":"Updated email address"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated user name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio","description":"Updated user bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture","description":"Updated profile picture URL"}},"type":"object","title":"Body_put_user_app_v2_user_put"},"Body_rag_app_v2_rag_post":{"properties":{"query":{"type":"string","title":"Query","description":"RAG query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"},"rag_generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"RAG generation configuration"},"task_prompt_override":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Task Prompt Override","description":"Task prompt override"},"include_title_if_available":{"type":"boolean","title":"Include Title If Available","description":"Includes document title in chunk response, if available.","default":false}},"type":"object","required":["query"],"title":"Body_rag_app_v2_rag_post"},"Body_register_app_v2_register_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"password":{"type":"string","title":"Password","description":"User's password"}},"type":"object","required":["email","password"],"title":"Body_register_app_v2_register_post"},"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post":{"properties":{"document_id":{"type":"string","title":"Document Id","description":"Document ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["document_id","collection_id"],"title":"Body_remove_document_from_collection_app_v2_remove_document_from_collection_post"},"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post":{"properties":{"user_id":{"type":"string","title":"User Id","description":"User ID"},"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"}},"type":"object","required":["user_id","collection_id"],"title":"Body_remove_user_from_collection_app_v2_remove_user_from_collection_post"},"Body_reset_password_app_v2_reset_password_post":{"properties":{"reset_token":{"type":"string","title":"Reset Token","description":"Password reset token"},"new_password":{"type":"string","title":"New Password","description":"New password"}},"type":"object","required":["reset_token","new_password"],"title":"Body_reset_password_app_v2_reset_password_post"},"Body_retry_ingest_files_v2_retry_ingest_files_post":{"properties":{"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."}},"type":"object","required":["document_ids"],"title":"Body_retry_ingest_files_v2_retry_ingest_files_post"},"Body_score_completion_v2_score_completion_post":{"properties":{"message_id":{"type":"string","title":"Message Id","description":"Message ID"},"score":{"type":"number","title":"Score","description":"Completion score"}},"type":"object","required":["message_id","score"],"title":"Body_score_completion_v2_score_completion_post"},"Body_search_app_v2_search_post":{"properties":{"query":{"type":"string","title":"Query","description":"Search query"},"vector_search_settings":{"$ref":"#/components/schemas/VectorSearchSettings","description":"Vector search settings"},"kg_search_settings":{"$ref":"#/components/schemas/KGSearchSettings","description":"Knowledge graph search settings"}},"type":"object","required":["query"],"title":"Body_search_app_v2_search_post"},"Body_update_collection_app_v2_update_collection_put":{"properties":{"collection_id":{"type":"string","title":"Collection Id","description":"Collection ID"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name","description":"Updated collection name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description","description":"Updated collection description"}},"type":"object","required":["collection_id"],"title":"Body_update_collection_app_v2_update_collection_put"},"Body_update_files_app_v2_update_files_post":{"properties":{"files":{"items":{"type":"string","format":"binary"},"type":"array","title":"Files","description":"The list of paths of input files to update in the system."},"document_ids":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"string","format":"uuid"},"type":"array"}},{"type":"null"}],"title":"Document Ids","description":"An optional list of document ids for each file. If not provided, the system will generate a unique document id via the `generate_user_document_id` method."},"metadatas":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"items":{"type":"object"},"type":"array"}},{"type":"null"}],"title":"Metadatas","description":"An optional list of JSON metadata to affix to each file"},"chunking_config":{"anyOf":[{"type":"string","contentMediaType":"application/json","contentSchema":{"$ref":"#/components/schemas/ChunkingConfig"}},{"type":"null"}],"title":"Chunking Config","description":"An optional dictionary to override the default chunking configuration for the ingestion process. If not provided, the system will use the default server-side chunking configuration."}},"type":"object","required":["files"],"title":"Body_update_files_app_v2_update_files_post"},"Body_update_prompt_app_v2_update_prompt_post":{"properties":{"name":{"type":"string","title":"Name","description":"Prompt name"},"template":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Template","description":"Prompt template"},"input_types":{"anyOf":[{"additionalProperties":{"type":"string"},"type":"object"},{"type":"null"}],"title":"Input Types","description":"Input types","default":{}}},"type":"object","required":["name"],"title":"Body_update_prompt_app_v2_update_prompt_post"},"Body_verify_email_app_v2_verify_email_post":{"properties":{"email":{"type":"string","format":"email","title":"Email","description":"User's email address"},"verification_code":{"type":"string","title":"Verification Code","description":"Email verification code"}},"type":"object","required":["email","verification_code"],"title":"Body_verify_email_app_v2_verify_email_post"},"ChunkingConfig":{"properties":{"provider":{"type":"string"}},"type":"object","title":"ChunkingConfig","example":{"chunking_strategy":"by_title","combine_under_n_chars":128,"max_characters":1024,"new_after_n_chars":512,"overlap":20,"provider":"unstructured_local","strategy":"auto"}},"DocumentChunkResponse":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","text","metadata"],"title":"DocumentChunkResponse"},"DocumentOverviewResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"title":{"type":"string","title":"Title"},"user_id":{"type":"string","format":"uuid","title":"User Id"},"type":{"type":"string","title":"Type"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"ingestion_status":{"type":"string","title":"Ingestion Status"},"restructuring_status":{"type":"string","title":"Restructuring Status"},"version":{"type":"string","title":"Version"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["id","title","user_id","type","created_at","updated_at","ingestion_status","restructuring_status","version","collection_ids","metadata"],"title":"DocumentOverviewResponse"},"GenerationConfig":{"properties":{"model":{"type":"string","title":"Model"},"temperature":{"type":"number","title":"Temperature"},"top_p":{"type":"number","title":"Top P"},"max_tokens_to_sample":{"type":"integer","title":"Max Tokens To Sample"},"stream":{"type":"boolean","title":"Stream"},"functions":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Functions"},"tools":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tools"},"add_generation_kwargs":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Add Generation Kwargs"},"api_base":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Api Base"}},"type":"object","title":"GenerationConfig","max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"GenericMessageResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"GenericMessageResponse"},"GroupOverviewResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"user_count":{"type":"integer","title":"User Count"},"document_count":{"type":"integer","title":"Document Count"}},"type":"object","required":["collection_id","name","description","created_at","updated_at","user_count","document_count"],"title":"GroupOverviewResponse"},"GroupResponse":{"properties":{"collection_id":{"type":"string","format":"uuid","title":"Collection Id"},"name":{"type":"string","title":"Name"},"description":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Description"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"}},"type":"object","required":["collection_id","name","description","created_at","updated_at"],"title":"GroupResponse"},"HTTPValidationError":{"properties":{"detail":{"items":{"$ref":"#/components/schemas/ValidationError"},"type":"array","title":"Detail"}},"type":"object","title":"HTTPValidationError"},"HybridSearchSettings":{"properties":{"full_text_weight":{"type":"number","title":"Full Text Weight","description":"Weight to apply to full text search","default":1.0},"semantic_weight":{"type":"number","title":"Semantic Weight","description":"Weight to apply to semantic search","default":5.0},"full_text_limit":{"type":"integer","title":"Full Text Limit","description":"Maximum number of results to return from full text search","default":200},"rrf_k":{"type":"integer","title":"Rrf K","description":"K-value for RRF (Rank Reciprocal Fusion)","default":50}},"type":"object","title":"HybridSearchSettings"},"IndexMeasure":{"type":"string","enum":["cosine_distance","l2_distance","max_inner_product"],"title":"IndexMeasure","description":"An enum representing the types of distance measures available for indexing.\n\nAttributes:\n cosine_distance (str): The cosine distance measure for indexing.\n l2_distance (str): The Euclidean (L2) distance measure for indexing.\n max_inner_product (str): The maximum inner product measure for indexing."},"IngestionResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_id":{"type":"string","format":"uuid","title":"Document Id","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_id"],"title":"IngestionResponse","example":{"document_id":"9fbe403b-c11c-5aae-8ade-ef22980c3ad1","message":"Ingestion task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGCreationResponse","example":{"message":"Graph creation queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGCreationSettings":{"properties":{"entity_types":{"items":{"type":"string"},"type":"array","title":"Entity Types","description":"The types of entities to extract.","default":[]},"relation_types":{"items":{"type":"string"},"type":"array","title":"Relation Types","description":"The types of relations to extract.","default":[]},"fragment_merge_count":{"type":"integer","title":"Fragment Merge Count","description":"The number of fragments to merge into a single KG extraction.","default":4},"max_knowledge_triples":{"type":"integer","title":"Max Knowledge Triples","description":"The maximum number of knowledge triples to extract from each chunk.","default":100},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."}},"type":"object","title":"KGCreationSettings","description":"Settings for knowledge graph creation."},"KGEnrichmentResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the restructure request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the restructure request."}},"type":"object","required":["message","task_id"],"title":"KGEnrichmentResponse","example":{"message":"Graph enrichment queued successfuly.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"KGEnrichmentSettings":{"properties":{"max_description_input_length":{"type":"integer","title":"Max Description Input Length","description":"The maximum length of the description for a node in the graph.","default":65536},"max_summary_input_length":{"type":"integer","title":"Max Summary Input Length","description":"The maximum length of the summary for a community.","default":65536},"generation_config":{"$ref":"#/components/schemas/GenerationConfig","description":"Configuration for text generation during graph enrichment."},"leiden_params":{"type":"object","title":"Leiden Params","description":"Parameters for the Leiden algorithm."}},"type":"object","title":"KGEnrichmentSettings","description":"Settings for knowledge graph enrichment."},"KGGlobalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"search_result":{"items":{"type":"string"},"type":"array","title":"Search Result"}},"type":"object","required":["query","search_result"],"title":"KGGlobalSearchResult","description":"Result of a global knowledge graph search operation.","query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"KGLocalSearchResult":{"properties":{"query":{"type":"string","title":"Query"},"entities":{"type":"object","title":"Entities"},"relationships":{"type":"object","title":"Relationships"},"communities":{"type":"object","title":"Communities"}},"type":"object","required":["query","entities","relationships","communities"],"title":"KGLocalSearchResult","description":"Result of a local knowledge graph search operation.","communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}},"KGSearchResult":{"properties":{"local_result":{"anyOf":[{"$ref":"#/components/schemas/KGLocalSearchResult"},{"type":"null"}]},"global_result":{"anyOf":[{"$ref":"#/components/schemas/KGGlobalSearchResult"},{"type":"null"}]}},"type":"object","title":"KGSearchResult","description":"Result of a knowledge graph search operation.","global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}},"KGSearchSettings":{"properties":{"use_kg_search":{"type":"boolean","title":"Use Kg Search","default":false},"kg_search_type":{"type":"string","title":"Kg Search Type","default":"global"},"kg_search_level":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Kg Search Level"},"generation_config":{"anyOf":[{"$ref":"#/components/schemas/GenerationConfig"},{"type":"null"}]},"max_community_description_length":{"type":"integer","title":"Max Community Description Length","default":65536},"max_llm_queries_for_global_search":{"type":"integer","title":"Max Llm Queries For Global Search","default":250},"local_search_limits":{"additionalProperties":{"type":"integer"},"type":"object","title":"Local Search Limits","default":{"__Entity__":20,"__Relationship__":20,"__Community__":20}}},"type":"object","title":"KGSearchSettings","generation_config":{"max_tokens_to_sample":1024,"model":"openai/gpt-4o","stream":false,"temperature":0.1,"top_p":1.0},"kg_search_level":"0","kg_search_type":"global","local_search_limits":{"__Community__":20,"__Entity__":20,"__Relationship__":20},"max_community_description_length":65536,"max_llm_queries_for_global_search":250,"use_kg_search":true},"LogEntry":{"properties":{"key":{"type":"string","title":"Key"},"value":{"title":"Value"},"timestamp":{"type":"string","format":"date-time","title":"Timestamp"}},"type":"object","required":["key","value","timestamp"],"title":"LogEntry"},"LogResponse":{"properties":{"run_id":{"type":"string","format":"uuid","title":"Run Id"},"run_type":{"type":"string","title":"Run Type"},"entries":{"items":{"$ref":"#/components/schemas/LogEntry"},"type":"array","title":"Entries"},"timestamp":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Timestamp"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"}},"type":"object","required":["run_id","run_type","entries","timestamp","user_id"],"title":"LogResponse"},"Message-Input":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message"},"Message-Output":{"properties":{"role":{"anyOf":[{"$ref":"#/components/schemas/MessageType"},{"type":"string"}],"title":"Role"},"content":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Content"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"function_call":{"anyOf":[{"type":"object"},{"type":"null"}],"title":"Function Call"},"tool_calls":{"anyOf":[{"items":{"type":"object"},"type":"array"},{"type":"null"}],"title":"Tool Calls"}},"type":"object","required":["role"],"title":"Message","content":"This is a test message.","role":"user"},"MessageType":{"type":"string","enum":["system","user","assistant","function","tool"],"title":"MessageType"},"PromptResponse":{"properties":{"name":{"type":"string","title":"Name"},"template":{"type":"string","title":"Template"},"created_at":{"type":"string","format":"date-time","title":"Created At"},"updated_at":{"type":"string","format":"date-time","title":"Updated At"},"input_types":{"additionalProperties":{"type":"string"},"type":"object","title":"Input Types"}},"type":"object","required":["name","template","created_at","updated_at","input_types"],"title":"PromptResponse"},"RAGAgentResponse":{"properties":{"messages":{"items":{"$ref":"#/components/schemas/Message-Output"},"type":"array","title":"Messages","description":"List of messages in the RAG agent response"}},"type":"object","required":["messages"],"title":"RAGAgentResponse","example":[{"content":"## You are a helpful assistant that can search for information.\n\nWhen asked a question, perform a search to find relevant information and provide a response.\n\nThe response should contain line-item attributions to relevent search results, and be as informative if possible.\nIf no relevant results are found, then state that no results were found.\nIf no obvious question is present, then do not carry out a search, and instead ask for clarification.","role":"system"},{"content":"You are a helpful assistant.","role":"system"},{"content":"Who is the greatest philospher of all time?","role":"user"},{"content":"Aristotle is widely considered the greatest philospher of all time.","role":"assistant"},{"content":"Can you tell me more about him?","role":"user"},{"function_call":{"arguments":"{\"query\":\"Aristotle biography\"}","name":"search"},"role":"assistant"},{"content":"1. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n2. Aristotle[A] (Greek: Ἀριστοτέλης Aristotélēs, pronounced [aristotélɛːs]; 384–322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\n3. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n4. Aristotle was born in 384 BC[C] in Stagira, Chalcidice,[2] about 55 km (34 miles) east of modern-day Thessaloniki.[3][4] His father, Nicomachus, was the personal physician to King Amyntas of Macedon. While he was young, Aristotle learned about biology and medical information, which was taught by his father.[5] Both of Aristotle's parents died when he was about thirteen, and Proxenus of Atarneus became his guardian.[6] Although little information about Aristotle's childhood has survived, he probably spent\n5. Life\nIn general, the details of Aristotle's life are not well-established. The biographies written in ancient times are often speculative and historians only agree on a few salient points.[B]\n","name":"search","role":"function"},{"content":"Aristotle (384–322 BC) was an Ancient Greek philosopher and polymath whose contributions have had a profound impact on various fields of knowledge. Here are some key points about his life and work:\n\n1. **Early Life**: Aristotle was born in 384 BC in Stagira, Chalcidice, which is near modern-day Thessaloniki, Greece. His father, Nicomachus, was the personal physician to King Amyntas of Macedon, which exposed Aristotle to medical and biological knowledge from a young age [C].\n\n2. **Education and Career**: After the death of his parents, Aristotle was sent to Athens to study at Plato's Academy, where he remained for about 20 years. After Plato's death, Aristotle left Athens and eventually became the tutor of Alexander the Great [C].\n\n3. **Philosophical Contributions**: Aristotle founded the Lyceum in Athens, where he established the Peripatetic school of philosophy. His works cover a wide range of subjects, including metaphysics, ethics, politics, logic, biology, and aesthetics. His writings laid the groundwork for many modern scientific and philosophical inquiries [A].\n\n4. **Legacy**: Aristotle's influence extends beyond philosophy to the natural sciences, linguistics, economics, and psychology. His method of systematic observation and analysis has been foundational to the development of modern science [A].\n\nAristotle's comprehensive approach to knowledge and his systematic methodology have earned him a lasting legacy as one of the greatest philosophers of all time.\n\nSources:\n- [A] Aristotle's broad range of writings and influence on modern science.\n- [C] Details about Aristotle's early life and education.","role":"assistant"}]},"RAGResponse":{"properties":{"completion":{"title":"Completion","description":"The generated completion from the RAG process"},"search_results":{"$ref":"#/components/schemas/SearchResponse","description":"The search results used for the RAG process"}},"type":"object","required":["completion","search_results"],"title":"RAGResponse","example":{"completion":{"choices":[{"finish_reason":"stop","index":0,"message":{"content":"Paris is the capital of France.","role":"assistant"}}],"id":"chatcmpl-example123"},"search_results":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}}},"ResultsWrapper_AddUserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AddUserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AddUserResponse]"},"ResultsWrapper_AllPromptsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AllPromptsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AllPromptsResponse]"},"ResultsWrapper_AnalyticsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AnalyticsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AnalyticsResponse]"},"ResultsWrapper_AppSettingsResponse_":{"properties":{"results":{"$ref":"#/components/schemas/AppSettingsResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[AppSettingsResponse]"},"ResultsWrapper_GenericMessageResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GenericMessageResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GenericMessageResponse]"},"ResultsWrapper_GroupResponse_":{"properties":{"results":{"$ref":"#/components/schemas/GroupResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[GroupResponse]"},"ResultsWrapper_KGCreationResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGCreationResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGCreationResponse]"},"ResultsWrapper_KGEnrichmentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/KGEnrichmentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[KGEnrichmentResponse]"},"ResultsWrapper_List_DocumentChunkResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentChunkResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentChunkResponse]]"},"ResultsWrapper_List_DocumentOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/DocumentOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[DocumentOverviewResponse]]"},"ResultsWrapper_List_GroupOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupOverviewResponse]]"},"ResultsWrapper_List_GroupResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/GroupResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[GroupResponse]]"},"ResultsWrapper_List_LogResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/LogResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[LogResponse]]"},"ResultsWrapper_List_UserOverviewResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/UserOverviewResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[List[UserOverviewResponse]]"},"ResultsWrapper_RAGAgentResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGAgentResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGAgentResponse]"},"ResultsWrapper_RAGResponse_":{"properties":{"results":{"$ref":"#/components/schemas/RAGResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[RAGResponse]"},"ResultsWrapper_ScoreCompletionResponse_":{"properties":{"results":{"$ref":"#/components/schemas/ScoreCompletionResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[ScoreCompletionResponse]"},"ResultsWrapper_SearchResponse_":{"properties":{"results":{"$ref":"#/components/schemas/SearchResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[SearchResponse]"},"ResultsWrapper_ServerStats_":{"properties":{"results":{"$ref":"#/components/schemas/ServerStats"}},"type":"object","required":["results"],"title":"ResultsWrapper[ServerStats]"},"ResultsWrapper_TokenResponse_":{"properties":{"results":{"$ref":"#/components/schemas/TokenResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[TokenResponse]"},"ResultsWrapper_UpdatePromptResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdatePromptResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdatePromptResponse]"},"ResultsWrapper_UpdateResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UpdateResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UpdateResponse]"},"ResultsWrapper_UserResponse_":{"properties":{"results":{"$ref":"#/components/schemas/UserResponse"}},"type":"object","required":["results"],"title":"ResultsWrapper[UserResponse]"},"ResultsWrapper_list_IngestionResponse__":{"properties":{"results":{"items":{"$ref":"#/components/schemas/IngestionResponse"},"type":"array","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[list[IngestionResponse]]"},"ResultsWrapper_str_":{"properties":{"results":{"type":"string","title":"Results"}},"type":"object","required":["results"],"title":"ResultsWrapper[str]"},"ScoreCompletionResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"ScoreCompletionResponse"},"SearchResponse":{"properties":{"vector_search_results":{"items":{"$ref":"#/components/schemas/VectorSearchResult"},"type":"array","title":"Vector Search Results","description":"List of vector search results"},"kg_search_results":{"anyOf":[{"items":{"$ref":"#/components/schemas/KGSearchResult"},"type":"array"},{"type":"null"}],"title":"Kg Search Results","description":"Knowledge graph search results, if applicable"}},"type":"object","required":["vector_search_results"],"title":"SearchResponse","example":{"kg_search_results":[{"global_result":{"query":"What were Aristotles key contributions to philosophy?","search_result":["### Aristotle's Key Contributions to Philosophy\n\nAristotle's extensive body of work laid the foundation for numerous fields within philosophy and beyond, significantly shaping the trajectory of Western thought. His systematic approach to data collection and analysis has had a lasting impact on modern scientific methods. Below, we explore some of his most influential contributions.\n\n#### Foundational Works and Systematic Approach\n\nAristotle's writings cover a broad spectrum of topics, including logic, biology, ethics, and political science. His key works such as 'Physics,' 'On the Soul,' and 'Nicomachean Ethics' delve into fundamental concepts like substance, memory, and the nature of the city [Data: Reports (1, 2, 3, 4, 5, +more)]. These texts not only provided a comprehensive framework for understanding various aspects of the natural and human world but also established methodologies that continue to influence contemporary scientific inquiry.\n\n#### Ethical and Political Philosophy\n\nIn 'Nicomachean Ethics,' Aristotle explores the concept of a virtuous character, emphasizing the importance of moral virtues and the development of good habits. His work 'Politics' further examines the structure and function of the city (polis), addressing issues related to property, trade, and governance. Aristotle's classification of political constitutions and his definition of the city as the natural political community have had a profound and enduring impact on political thought [Data: Reports (11, 12); Triples (21, 22, 23, 24, 25)].\n\n#### Theories on Memory and Perception\n\nAristotle's theories on memory and perception are articulated in his works 'On the Soul' and 'De Anima iii 3.' He defines memory as the retention of experiences shaped by sensation and discusses the faculty of imagination (phantasia). These theories have significantly influenced subsequent philosophical and psychological studies on cognition and perception [Data: Reports (13, 14); Triples (26, 27, 28, 29, 30)].\n\n#### Epistemology and Scientific Method\n\nAristotle's epistemology, known as immanent realism, is based on the study of things that exist or happen in the world. This approach emphasizes empirical observation and has been instrumental in shaping the development of scientific methods. His insistence on grounding knowledge in observable phenomena laid the groundwork for future empirical research [Data: Reports (3)].\n\n#### Engagement with Predecessors and Contemporaries\n\nAristotle was also known for his critical engagement with the ideas of his predecessors and contemporaries. For instance, he refuted Democritus's claim about the Milky Way and criticized Empedocles's materialist theory of 'survival of the fittest.' These critiques highlight Aristotle's active participation in the broader philosophical discourse of his time and his contributions to refining and advancing philosophical thought [Data: Reports (15, 16); Triples (31, 32, 33, 34, 35)].\n\n### Conclusion\n\nAristotle's contributions to philosophy are vast and multifaceted, encompassing ethics, politics, epistemology, and more. His works continue to be studied and revered for their depth, rigor, and enduring relevance. Through his systematic approach and critical engagement with existing ideas, Aristotle has left an indelible mark on the landscape of Western philosophy."]},"local_result":{"communities":{"0":{"summary":{"findings":[{"explanation":"Aristotle is credited with the earliest study of formal logic, and his conception of it was the dominant form of Western logic until the 19th-century advances in mathematical logic. His works compiled into a set of six books ...","summary":"Aristotle's Foundational Role in Logic"}],"rating":9.5,"rating_explanation":"The impact severity rating is high due to Aristotle's foundational influence on multiple disciplines and his enduring legacy in Western philosophy and science.","summary":"The community revolves around Aristotle, an ancient Greek philosopher and polymath, who made significant contributions to various fields including logic, biology, political science, and economics. His works, such as 'Politics' and 'Nicomachean Ethics', have influenced numerous disciplines and thinkers from antiquity through the Middle Ages and beyond. The relationships between his various works and the fields he contributed to highlight his profound impact on Western thought.","title":"Aristotle and His Contributions"}}},"entities":{"0":{"description":"Aristotle was an ancient Greek philosopher and polymath, recognized as the father of various fields including logic, biology, and political science. He authored significant works such as the *Nicomachean Ethics* and *Politics*, where he explored concepts of virtue, governance, and the nature of reality, while also critiquing Platos ideas. His teachings and observations laid the groundwork for numerous disciplines, influencing thinkers ...","name":"Aristotle"}},"query":"Who is Aristotle?","relationships":{}}}],"vector_search_results":[{"collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"}]}},"ServerStats":{"properties":{"start_time":{"type":"string","format":"date-time","title":"Start Time"},"uptime_seconds":{"type":"number","title":"Uptime Seconds"},"cpu_usage":{"type":"number","title":"Cpu Usage"},"memory_usage":{"type":"number","title":"Memory Usage"}},"type":"object","required":["start_time","uptime_seconds","cpu_usage","memory_usage"],"title":"ServerStats"},"Token":{"properties":{"token":{"type":"string","title":"Token"},"token_type":{"type":"string","title":"Token Type"}},"type":"object","required":["token","token_type"],"title":"Token"},"TokenResponse":{"properties":{"access_token":{"$ref":"#/components/schemas/Token"},"refresh_token":{"$ref":"#/components/schemas/Token"}},"type":"object","required":["access_token","refresh_token"],"title":"TokenResponse"},"UpdatePromptResponse":{"properties":{"message":{"type":"string","title":"Message"}},"type":"object","required":["message"],"title":"UpdatePromptResponse"},"UpdateResponse":{"properties":{"message":{"type":"string","title":"Message","description":"A message describing the result of the ingestion request."},"task_id":{"type":"string","format":"uuid","title":"Task Id","description":"The task ID of the ingestion request."},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids","description":"The ID of the document that was ingested."}},"type":"object","required":["message","task_id","document_ids"],"title":"UpdateResponse","example":{"document_ids":["9fbe403b-c11c-5aae-8ade-ef22980c3ad1"],"message":"Update task queued successfully.","task_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96"}},"UserOverviewResponse":{"properties":{"user_id":{"type":"string","format":"uuid","title":"User Id"},"num_files":{"type":"integer","title":"Num Files"},"total_size_in_bytes":{"type":"integer","title":"Total Size In Bytes"},"document_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Document Ids"}},"type":"object","required":["user_id","num_files","total_size_in_bytes","document_ids"],"title":"UserOverviewResponse"},"UserResponse":{"properties":{"id":{"type":"string","format":"uuid","title":"Id"},"email":{"type":"string","title":"Email"},"is_active":{"type":"boolean","title":"Is Active","default":true},"is_superuser":{"type":"boolean","title":"Is Superuser","default":false},"created_at":{"type":"string","format":"date-time","title":"Created At","default":"2024-09-18T18:14:34.700273"},"updated_at":{"type":"string","format":"date-time","title":"Updated At","default":"2024-09-18T18:14:34.700279"},"is_verified":{"type":"boolean","title":"Is Verified","default":false},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids","default":[]},"hashed_password":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Hashed Password"},"verification_code_expiry":{"anyOf":[{"type":"string","format":"date-time"},{"type":"null"}],"title":"Verification Code Expiry"},"name":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Name"},"bio":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Bio"},"profile_picture":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Profile Picture"}},"type":"object","required":["id","email"],"title":"UserResponse"},"ValidationError":{"properties":{"loc":{"items":{"anyOf":[{"type":"string"},{"type":"integer"}]},"type":"array","title":"Location"},"msg":{"type":"string","title":"Message"},"type":{"type":"string","title":"Error Type"}},"type":"object","required":["loc","msg","type"],"title":"ValidationError"},"VectorSearchResult":{"properties":{"fragment_id":{"type":"string","format":"uuid","title":"Fragment Id"},"extraction_id":{"type":"string","format":"uuid","title":"Extraction Id"},"document_id":{"type":"string","format":"uuid","title":"Document Id"},"user_id":{"anyOf":[{"type":"string","format":"uuid"},{"type":"null"}],"title":"User Id"},"collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Collection Ids"},"score":{"type":"number","title":"Score"},"text":{"type":"string","title":"Text"},"metadata":{"type":"object","title":"Metadata"}},"type":"object","required":["fragment_id","extraction_id","document_id","user_id","collection_ids","score","text","metadata"],"title":"VectorSearchResult","description":"Result of a search operation.","collection_ids":[],"document_id":"3e157b3a-8469-51db-90d9-52e7d896b49b","extraction_id":"3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09","fragment_id":"c68dc72e-fc23-5452-8f49-d7bd46088a96","metadata":{"associated_query":"What is the capital of France?","title":"example_document.pdf"},"score":0.23943702876567796,"text":"Example text from the document","user_id":"2acb499e-8428-543b-bd85-0d9098718220"},"VectorSearchSettings":{"properties":{"use_vector_search":{"type":"boolean","title":"Use Vector Search","description":"Whether to use vector search","default":true},"use_hybrid_search":{"type":"boolean","title":"Use Hybrid Search","description":"Whether to perform a hybrid search (combining vector and keyword search)","default":false},"filters":{"type":"object","title":"Filters","description":"Filters to apply to the vector search"},"search_limit":{"type":"integer","maximum":1000.0,"minimum":1.0,"title":"Search Limit","description":"Maximum number of results to return","default":10},"selected_collection_ids":{"items":{"type":"string","format":"uuid"},"type":"array","title":"Selected Collection Ids","description":"Collection IDs to search for"},"index_measure":{"$ref":"#/components/schemas/IndexMeasure","description":"The distance measure to use for indexing","default":"cosine_distance"},"include_values":{"type":"boolean","title":"Include Values","description":"Whether to include search score values in the search results","default":true},"include_metadatas":{"type":"boolean","title":"Include Metadatas","description":"Whether to include element metadata in the search results","default":true},"probes":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Probes","description":"Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.","default":10},"ef_search":{"anyOf":[{"type":"integer"},{"type":"null"}],"title":"Ef Search","description":"Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.","default":40},"hybrid_search_settings":{"anyOf":[{"$ref":"#/components/schemas/HybridSearchSettings"},{"type":"null"}],"description":"Settings for hybrid search","default":{"full_text_weight":1.0,"semantic_weight":5.0,"full_text_limit":200,"rrf_k":50}},"search_strategy":{"anyOf":[{"type":"string"},{"type":"null"}],"title":"Search Strategy","description":"Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')","default":"vanilla"}},"type":"object","title":"VectorSearchSettings","ef_search":40,"filters":{"category":"technology"},"hybrid_search_settings":{"full_text_limit":200,"full_text_weight":1.0,"rrf_k":50,"semantic_weight":5.0},"include_metadata":true,"index_measure":"cosine_distance","probes":10,"search_limit":20,"selected_collection_ids":["2acb499e-8428-543b-bd85-0d9098718220","3e157b3a-8469-51db-90d9-52e7d896b49b"],"use_hybrid_search":true,"use_vector_search":true}},"securitySchemes":{"HTTPBearer":{"type":"http","scheme":"bearer"},"OAuth2PasswordBearer":{"type":"oauth2","flows":{"password":{"scopes":{},"tokenUrl":"token"}}}}}} diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx index 881be26be..a9d08dc6e 100644 --- a/docs/cookbooks/graphrag.mdx +++ b/docs/cookbooks/graphrag.mdx @@ -32,9 +32,9 @@ r2r serve [kg] provider = "neo4j" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted max_knowledge_triples = 100 @@ -47,15 +47,15 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - [kg.kg_search_config] - model = "gpt-4o-mini" + [kg.kg_search_settings] + generation_config = { model = "gpt-4o-mini" } ``` ```bash -r2r serve --config-name=local_llm_neo4j_kg +r2r serve --config-name=local_llm ``` ### Local LLM Setup (Optional) @@ -72,7 +72,7 @@ When running with local RAG, you must have the Triplex model available locally. ``` - + ``` toml [completion] provider = "litellm" @@ -96,9 +96,9 @@ excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] [kg] provider = "neo4j" -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted max_knowledge_triples = 100 @@ -111,8 +111,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - [kg.kg_search_config] - model = "ollama/llama3.1" + [kg.kg_search_settings] + generation_config = { model = "ollama/llama3.1" } [database] provider = "postgres" diff --git a/docs/documentation/configuration/knowledge-graph/enrichment.mdx b/docs/documentation/configuration/knowledge-graph/enrichment.mdx index a500adf0a..1f15f5d9a 100644 --- a/docs/documentation/configuration/knowledge-graph/enrichment.mdx +++ b/docs/documentation/configuration/knowledge-graph/enrichment.mdx @@ -11,9 +11,9 @@ You can configure knowledge graph enrichment in the R2R configuration file. To d [kg] provider = "neo4j" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction @@ -26,8 +26,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "gpt-4o-mini" } # and other generation params leiden_params = { max_levels = 10 } # more params in graspologic/partition/leiden.py - [kg.kg_search_config] - model = "gpt-4o-mini" + [kg.kg_search_settings] + generation_config = { model = "gpt-4o-mini" } ``` Next you can do GraphRAG with the knowledge graph. Find out more about GraphRAG in the [GraphRAG Guide](/cookbooks/graphrag). diff --git a/docs/documentation/configuration/knowledge-graph/overview.mdx b/docs/documentation/configuration/knowledge-graph/overview.mdx index d17fe2082..14f278abb 100644 --- a/docs/documentation/configuration/knowledge-graph/overview.mdx +++ b/docs/documentation/configuration/knowledge-graph/overview.mdx @@ -30,8 +30,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "gpt-4o-mini" } # and other generation params below leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - [kg.kg_search_config] - model = "gpt-4o-mini" + [kg.kg_search_settings] + generation_config = { model = "gpt-4o-mini" } ``` @@ -47,7 +47,7 @@ Let's break down the knowledge graph configuration options: - `kg_enrichment_settings`: Similar configuration for the model used in knowledge graph enrichment. - `generation_config`: Configuration for the model used in knowledge graph enrichment. - `leiden_params`: Parameters for the Leiden algorithm. -- `kg_search_config`: Similar configuration for the model used in knowledge graph search operations. +- `kg_search_settings`: Similar configuration for the model used in knowledge graph search operations. ### Neo4j Configuration @@ -88,7 +88,7 @@ The Neo4jKGProvider supports various operations: ### Customization -You can customize the knowledge graph extraction and search processes by modifying the `kg_extraction_prompt` and adjusting the model configurations in `kg_extraction_config` and `kg_search_config`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search). +You can customize the knowledge graph extraction and search processes by modifying the `kg_extraction_prompt` and adjusting the model configurations in `kg_extraction_settings` and `kg_search_settings`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search). By leveraging the knowledge graph capabilities, you can enhance R2R's understanding of document relationships and improve the quality of search and retrieval operations. diff --git a/docs/documentation/configuration/postgres.mdx b/docs/documentation/configuration/postgres.mdx index 6041d3762..9a5c21d2d 100644 --- a/docs/documentation/configuration/postgres.mdx +++ b/docs/documentation/configuration/postgres.mdx @@ -35,7 +35,7 @@ password = "your_postgres_password" host = "your_postgres_host" port = "your_postgres_port" db_name = "your_database_name" -your_project_name = "your_project_collection_name" +your_project_name = "your_project_name" ``` 2. Alternatively, you can set the following environment variables: @@ -46,7 +46,7 @@ export POSTGRES_PASSWORD=your_postgres_password export POSTGRES_HOST=your_postgres_host export POSTGRES_PORT=your_postgres_port export POSTGRES_DBNAME=your_database_name -export POSTGRES_PROJECT_NAME=your_vector_collection_name +export POSTGRES_PROJECT_NAME=your_project_name ``` ## Advanced Postgres Features in R2R diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx index 17eb8b5ad..4f08989b9 100644 --- a/docs/documentation/configuration/rag.mdx +++ b/docs/documentation/configuration/rag.mdx @@ -44,7 +44,7 @@ kg_search_settings = { "use_kg_search": True, "kg_search_type": "global", "kg_search_level": None, - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4", "temperature": 0.1 }, diff --git a/docs/documentation/configuration/retrieval/knowledge-graph.mdx b/docs/documentation/configuration/retrieval/knowledge-graph.mdx index 2d2c2d5d6..1d7be1984 100644 --- a/docs/documentation/configuration/retrieval/knowledge-graph.mdx +++ b/docs/documentation/configuration/retrieval/knowledge-graph.mdx @@ -9,7 +9,7 @@ kg_search_settings = { "use_kg_search": True, "kg_search_type": "global", "kg_search_level": None, - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4", "temperature": 0.1 }, @@ -28,7 +28,7 @@ response = client.search("query", kg_search_settings=kg_search_settings) 1. `use_kg_search` (bool): Whether to use knowledge graph search 2. `kg_search_type` (str): Type of knowledge graph search ('global' or 'local') 3. `kg_search_level` (Optional[str]): Level of knowledge graph search -4. `kg_search_generation_config` (Optional[GenerationConfig]): Configuration for knowledge graph search generation +4. `generation_config` (Optional[GenerationConfig]): Configuration for knowledge graph search generation 5. `entity_types` (list): Types of entities to search for 6. `relationships` (list): Types of relationships to search for 7. `max_community_description_length` (int): Maximum length of community descriptions (default: 65536) diff --git a/docs/documentation/deep-dive/providers/database.mdx b/docs/documentation/deep-dive/providers/database.mdx index f6c500cd3..2e09b1587 100644 --- a/docs/documentation/deep-dive/providers/database.mdx +++ b/docs/documentation/deep-dive/providers/database.mdx @@ -40,7 +40,7 @@ export POSTGRES_PASSWORD=your_postgres_password export POSTGRES_HOST=your_postgres_host export POSTGRES_PORT=your_postgres_port export POSTGRES_DBNAME=your_database_name -export POSTGRES_PROJECT_NAME=your_collection_name +export POSTGRES_PROJECT_NAME=your_project_name ``` Environment variables take precedence over the config settings in case of conflicts. The R2R Docker includes configuration options that facilitate integration with a combined Postgres+pgvector database setup. diff --git a/docs/documentation/deep-dive/providers/knowledge-graph.mdx b/docs/documentation/deep-dive/providers/knowledge-graph.mdx index 04e733c17..e516fb0af 100644 --- a/docs/documentation/deep-dive/providers/knowledge-graph.mdx +++ b/docs/documentation/deep-dive/providers/knowledge-graph.mdx @@ -29,13 +29,13 @@ These are located in the `r2r.toml` file, under the `[kg]` section. [kg] provider = "neo4j" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" user = "your_neo4j_user" password = "your_neo4j_password" url = "your_neo4j_url" database = "your_neo4j_database" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = ["Person", "Organization", "Location"] # if empty, all entities are extracted relation_types = ["works at", "founded by", "invested in"] # if empty, all relations are extracted max_knowledge_triples = 100 diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx index cdaadf2ca..f10ddb521 100644 --- a/docs/documentation/js-sdk/retrieval.mdx +++ b/docs/documentation/js-sdk/retrieval.mdx @@ -109,7 +109,7 @@ const searchResponse = await client.search("What was Uber's profit in 2020?"); Level of knowledge graph search. - + Configuration for knowledge graph search generation. @@ -331,7 +331,7 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?"); Level of knowledge graph search. - + Configuration for knowledge graph search generation. @@ -620,7 +620,7 @@ Note that any of the customization seen in AI powered search and RAG documentati Whether to use knowledge graph search. - + Optional configuration for knowledge graph search generation. diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx index b551eb9b1..b38c5a316 100644 --- a/docs/documentation/python-sdk/retrieval.mdx +++ b/docs/documentation/python-sdk/retrieval.mdx @@ -129,7 +129,7 @@ search_response = client.search("What was Uber's profit in 2020?") Level of knowledge graph search. - + Configuration for knowledge graph search generation. @@ -206,7 +206,7 @@ kg_search_response = client.search( "use_kg_search": True, "kg_search_type": "local", "kg_search_level": "0", - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4o-mini", "temperature": 0.7, }, @@ -393,7 +393,7 @@ rag_response = client.rag("What was Uber's profit in 2020?") Level of knowledge graph search. - + Configuration for knowledge graph search generation. @@ -703,7 +703,7 @@ Note that any of the customization seen in AI powered search and RAG documentati Level of knowledge graph search. - + Configuration for knowledge graph search generation. diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json index 4f16d468a..dd9159003 100644 --- a/js/sdk/package-lock.json +++ b/js/sdk/package-lock.json @@ -1,12 +1,12 @@ { "name": "r2r-js", - "version": "0.3.1", + "version": "0.3.3", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "r2r-js", - "version": "0.3.0", + "version": "0.3.3", "license": "ISC", "dependencies": { "axios": "^1.7.4", diff --git a/js/sdk/package.json b/js/sdk/package.json index e39fd8e8b..d94515387 100644 --- a/js/sdk/package.json +++ b/js/sdk/package.json @@ -1,6 +1,6 @@ { "name": "r2r-js", - "version": "0.3.2", + "version": "0.3.3", "description": "", "main": "dist/index.js", "browser": "dist/index.browser.js", diff --git a/js/sdk/src/models.tsx b/js/sdk/src/models.tsx index 09145c46d..77f79d4f2 100644 --- a/js/sdk/src/models.tsx +++ b/js/sdk/src/models.tsx @@ -40,6 +40,7 @@ export interface VectorSearchSettings { use_hybrid_search?: boolean; filters?: Record; search_limit?: number; + offset?: number; selected_collection_ids?: string[]; index_measure: IndexMeasure; include_values?: boolean; @@ -54,7 +55,7 @@ export interface KGSearchSettings { use_kg_search?: boolean; kg_search_type?: "global" | "local"; kg_search_level?: number | null; - kg_search_generation_config?: GenerationConfig; + generation_config?: GenerationConfig; entity_types?: any[]; relationships?: any[]; max_community_description_length?: number; diff --git a/js/sdk/src/r2rClient.ts b/js/sdk/src/r2rClient.ts index 8ac8e6cb1..9e7209760 100644 --- a/js/sdk/src/r2rClient.ts +++ b/js/sdk/src/r2rClient.ts @@ -738,14 +738,30 @@ export class r2rClient { /** * An overview of the users in the R2R deployment. - * @param user_ids + * @param user_ids List of user IDs to get an overview for. + * * @param offset The offset to start listing users from. + * @param limit The maximum number of users to return. * @returns */ @feature("usersOverview") - async usersOverview(user_ids?: string[]): Promise> { + async usersOverview( + user_ids?: string[], + offset?: number, + limit?: number, + ): Promise> { this._ensureAuthenticated(); - const params: { user_ids?: string[] } = {}; + let params: Record = {}; + if (user_ids && user_ids.length > 0) { + params.user_ids = user_ids; + } + if (offset !== undefined) { + params.offset = offset; + } + if (limit !== undefined) { + params.limit = limit; + } + if (user_ids && user_ids.length > 0) { params.user_ids = user_ids; } @@ -786,16 +802,28 @@ export class r2rClient { /** * Get an overview of documents in the R2R deployment. * @param document_ids List of document IDs to get an overview for. + * @param offset The offset to start listing documents from. + * @param limit The maximum number of documents to return. * @returns A promise that resolves to the response from the server. */ @feature("documentsOverview") - async documentsOverview(document_ids?: string[]): Promise { + async documentsOverview( + document_ids?: string[], + offset?: number, + limit?: number, + ): Promise { this._ensureAuthenticated(); - let params: Record = {}; + let params: Record = {}; if (document_ids && document_ids.length > 0) { params.document_ids = document_ids; } + if (offset !== undefined) { + params.offset = offset; + } + if (limit !== undefined) { + params.limit = limit; + } return this._makeRequest("GET", "documents_overview", { params }); } @@ -806,13 +834,26 @@ export class r2rClient { * @returns A promise that resolves to the response from the server. */ @feature("documentChunks") - async documentChunks(document_id: string): Promise { + async documentChunks( + document_id: string, + offset?: number, + limit?: number, + ): Promise { this._ensureAuthenticated(); + const params: Record = {}; + if (offset !== undefined) { + params.offset = offset; + } + if (limit !== undefined) { + params.limit = limit; + } + return this._makeRequest("GET", `document_chunks/${document_id}`, { headers: { "Content-Type": "application/json", }, + params, }); } @@ -822,10 +863,16 @@ export class r2rClient { * @returns A promise that resolves to the response from the server. */ @feature("inspectKnowledgeGraph") - async inspectKnowledgeGraph(limit?: number): Promise> { + async inspectKnowledgeGraph( + offset?: number, + limit?: number, + ): Promise> { this._ensureAuthenticated(); - const params: { limit?: number } = {}; + const params: Record = {}; + if (offset !== undefined) { + params.offset = offset; + } if (limit !== undefined) { params.limit = limit; } @@ -843,8 +890,8 @@ export class r2rClient { @feature("collectionsOverview") async collectionsOverview( collectionIds?: string[], - limit?: number, offset?: number, + limit?: number, ): Promise> { this._ensureAuthenticated(); @@ -891,7 +938,10 @@ export class r2rClient { @feature("getCollection") async getCollection(collectionId: string): Promise> { this._ensureAuthenticated(); - return this._makeRequest("GET", `get_collection/${encodeURIComponent(collectionId)}`); + return this._makeRequest( + "GET", + `get_collection/${encodeURIComponent(collectionId)}`, + ); } /** @@ -909,9 +959,10 @@ export class r2rClient { ): Promise> { this._ensureAuthenticated(); - const data: { collection_id: string; name?: string; description?: string } = { - collection_id: collectionId, - }; + const data: { collection_id: string; name?: string; description?: string } = + { + collection_id: collectionId, + }; if (name !== undefined) { data.name = name; } @@ -1030,11 +1081,25 @@ export class r2rClient { * @returns A promise that resolves to the response from the server. */ @feature("getCollectionsForUser") - async getCollectionsForUser(userId: string): Promise> { + async getCollectionsForUser( + userId: string, + offset?: number, + limit?: number, + ): Promise> { this._ensureAuthenticated(); + + const params: Record = {}; + if (offset !== undefined) { + params.offset = offset; + } + if (limit !== undefined) { + params.limit = limit; + } + return this._makeRequest( "GET", - `get_collections_for_user/${encodeURIComponent(userId)}`, + `user_collections/${encodeURIComponent(userId)}`, + { params }, ); } @@ -1080,12 +1145,25 @@ export class r2rClient { * @returns */ @feature("getDocumentCollections") - async getDocumentCollections(documentId: string): Promise> { + async getDocumentCollections( + documentId: string, + offset?: number, + limit?: number, + ): Promise> { this._ensureAuthenticated(); + const params: Record = {}; + if (offset !== undefined) { + params.offset = offset; + } + if (limit !== undefined) { + params.limit = limit; + } + return this._makeRequest( "GET", `get_document_collections/${encodeURIComponent(documentId)}`, + { params }, ); } diff --git a/py/Dockerfile b/py/Dockerfile index 0031e09ee..ba9470bab 100644 --- a/py/Dockerfile +++ b/py/Dockerfile @@ -38,6 +38,7 @@ COPY . /app # Copy the application and config COPY core /app/core COPY r2r /app/r2r +COPY shared /app/shared COPY r2r.toml /app/r2r.toml COPY pyproject.toml /app/pyproject.toml diff --git a/py/Dockerfile.dev b/py/Dockerfile.dev index ee307bbc3..6f997ca20 100644 --- a/py/Dockerfile.dev +++ b/py/Dockerfile.dev @@ -26,6 +26,7 @@ RUN poetry config virtualenvs.create false \ # Copy only the necessary application files COPY core /app/core COPY r2r /app/r2r +COPY shared /app/shared COPY r2r.toml /app/r2r.toml # Stage 2: Final Image @@ -45,8 +46,11 @@ COPY --from=builder /usr/local/bin /usr/local/bin # Copy only the necessary application files from the builder COPY --from=builder /app/core /app/core COPY --from=builder /app/r2r /app/r2r +COPY --from=builder /app/shared /app/shared COPY --from=builder /app/r2r.toml /app/r2r.toml +COPY pyproject.toml /app/pyproject.toml + # Set environment variables for port and host ARG PORT=7272 ARG HOST=0.0.0.0 diff --git a/py/cli/__init__.py b/py/cli/__init__.py index 8f4bb2e43..d9abf15cc 100644 --- a/py/cli/__init__.py +++ b/py/cli/__init__.py @@ -6,7 +6,7 @@ # From cli.py "cli", "main", - # From Command Group + # From Command Collection "command_group_cli", # From Commands "auth", diff --git a/py/cli/cli.py b/py/cli/cli.py index 9e656cb2f..11030c136 100644 --- a/py/cli/cli.py +++ b/py/cli/cli.py @@ -59,8 +59,8 @@ def main(): pass except Exception as e: # Handle other exceptions if needed - print(f"An error occurred: {e}") - sys.exit(1) + print(f"CLI error: An error occurred") + raise e if __name__ == "__main__": diff --git a/py/cli/commands/ingestion.py b/py/cli/commands/ingestion.py index 2414a6679..d58362f0d 100644 --- a/py/cli/commands/ingestion.py +++ b/py/cli/commands/ingestion.py @@ -55,20 +55,6 @@ def ingest_files_from_urls(client, urls): os.unlink(temp_file.name) -@cli.command() -@pass_context -def ingest_sample_file(ctx): - """Ingest the first sample file into R2R.""" - client = ctx.obj - sample_file_url = "https://raw.githubusercontent.com/SciPhi-AI/R2R/main/py/core/examples/data/aristotle.txt" - - with timer(): - response = ingest_files_from_urls(client, [sample_file_url]) - click.echo( - f"Sample file ingestion completed. Ingest files response:\n\n{response}" - ) - - @cli.command() @click.argument( "file_paths", nargs=-1, required=True, type=click.Path(exists=True) @@ -136,48 +122,6 @@ def update_files(ctx, file_paths, document_ids, metadatas): click.echo(json.dumps(response, indent=2)) -def ingest_files_from_urls(client, urls): - """Download and ingest files from given URLs.""" - files_to_ingest = [] - metadatas = [] - document_ids = [] - temp_files = [] - - try: - for url in urls: - filename = os.path.basename(urlparse(url).path) - is_pdf = filename.lower().endswith(".pdf") - - temp_file = tempfile.NamedTemporaryFile( - mode="wb" if is_pdf else "w+", - delete=False, - suffix=f"_{filename}", - ) - temp_files.append(temp_file) - - response = requests.get(url) - response.raise_for_status() - if is_pdf: - temp_file.write(response.content) - else: - temp_file.write(response.text) - temp_file.close() - - files_to_ingest.append(temp_file.name) - metadatas.append({"title": filename}) - document_ids.append(uuid.uuid5(uuid.NAMESPACE_DNS, url)) - - response = client.ingest_files( - files_to_ingest, metadatas=metadatas, document_ids=document_ids - ) - - return response["results"] - finally: - # Clean up temporary files - for temp_file in temp_files: - os.unlink(temp_file.name) - - @cli.command() @pass_context def ingest_sample_file(ctx): diff --git a/py/cli/commands/restructure.py b/py/cli/commands/restructure.py index cc60ac453..4c7e54d93 100644 --- a/py/cli/commands/restructure.py +++ b/py/cli/commands/restructure.py @@ -11,7 +11,7 @@ @click.option( "--document-ids", required=False, - default="", + default=None, help="Document IDs to create graph for (comma-separated)", ) @pass_context @@ -21,7 +21,7 @@ def create_graph(ctx, document_ids): """ client = ctx.obj with timer(): - if document_ids == "": + if document_ids is None: document_ids = [] else: document_ids = document_ids.split(",") diff --git a/py/cli/commands/retrieval.py b/py/cli/commands/retrieval.py index b72ab895e..33afaa41c 100644 --- a/py/cli/commands/retrieval.py +++ b/py/cli/commands/retrieval.py @@ -91,7 +91,7 @@ def search(ctx, query, **kwargs): "use_kg_search", "kg_search_type", "kg_search_level", - "kg_search_generation_config", + "generation_config", "entity_types", "relationships", "max_community_description_length", @@ -147,7 +147,7 @@ def search(ctx, query, **kwargs): @click.option( "--use-kg-search", is_flag=True, help="Use knowledge graph search" ) -@click.option("--kg-search-type", default="global", help="Local or Global") +@click.option("--kg-search-type", default="local", help="Local or Global") @click.option( "--kg-search-level", default=None, @@ -219,11 +219,12 @@ def rag(ctx, query, **kwargs): } if kg_search_settings.get("kg_search_model"): - kg_search_settings["kg_search_generation_config"] = { + kg_search_settings["generation_config"] = { "model": kg_search_settings.pop("kg_search_model") } with timer(): + print("passing kg_search_settings = ", kg_search_settings) response = client.rag( query, rag_generation_config, diff --git a/py/cli/commands/server.py b/py/cli/commands/server.py index 69fffdef9..90cba2a78 100644 --- a/py/cli/commands/server.py +++ b/py/cli/commands/server.py @@ -297,19 +297,6 @@ def image_exists(img): check=True, ) - subprocess.run( - [ - "docker", - "build", - "-t", - "unstructured-docker", - "-f", - "Dockerfile.unstructured", - ".", - ], - check=True, - ) - if config_path: config_path = os.path.abspath(config_path) diff --git a/py/cli/utils/param_types.py b/py/cli/utils/param_types.py index 41765cc74..4916b1c28 100644 --- a/py/cli/utils/param_types.py +++ b/py/cli/utils/param_types.py @@ -1,5 +1,5 @@ import json -from typing import Any, Dict +from typing import Any, Dict, Optional import asyncclick as click @@ -7,7 +7,7 @@ class JsonParamType(click.ParamType): name = "json" - def convert(self, value, param, ctx) -> Dict[str, Any]: + def convert(self, value, param, ctx) -> Optional[Dict[str, Any]]: if value is None: return None if isinstance(value, dict): diff --git a/py/compose.yaml b/py/compose.yaml index 62f393e00..b0b716b2d 100644 --- a/py/compose.yaml +++ b/py/compose.yaml @@ -194,7 +194,12 @@ services: retries: 5 restart: on-failure volumes: - - ${CONFIG_PATH:-/}:${CONFIG_PATH:-/app/config} + - type: bind + source: ${CONFIG_PATH:-/} + target: ${CONFIG_PATH:-/app/config} + read_only: true + bind: + propagation: rslave - hatchet_api_key:/hatchet_api_key:ro labels: - "traefik.enable=true" diff --git a/py/core/__init__.py b/py/core/__init__.py index ca8315779..dc7f6de24 100644 --- a/py/core/__init__.py +++ b/py/core/__init__.py @@ -112,8 +112,6 @@ "LoggingConfig", "PostgresLoggingConfig", "PostgresRunLoggingProvider", - "RedisLoggingConfig", - "RedisRunLoggingProvider", "RunLoggingSingleton", # Run Manager "RunManager", @@ -246,7 +244,6 @@ "StreamingSearchRAGPipe", "VectorSearchPipe", "VectorStoragePipe", - "WebSearchPipe", "KGStoragePipe", "MultiSearchPipe", ## PROVIDERS diff --git a/py/core/agent/base.py b/py/core/agent/base.py index 1b9375418..602ba6302 100644 --- a/py/core/agent/base.py +++ b/py/core/agent/base.py @@ -6,9 +6,10 @@ AsyncSyncMeta, LLMChatCompletion, LLMChatCompletionChunk, + Message, syncable, ) -from core.base.agent import Agent, Message +from core.base.agent import Agent class CombinedMeta(AsyncSyncMeta, ABCMeta): @@ -48,7 +49,7 @@ async def arun( messages: Optional[list[Message]] = None, *args, **kwargs, - ) -> list[LLMChatCompletion]: + ) -> list[Message]: self._reset() if system_instruction or not self.conversation: @@ -74,7 +75,7 @@ async def arun( async def process_llm_response( self, response: LLMChatCompletion, *args, **kwargs - ) -> str: + ) -> None: if not self._completed: message = response.choices[0].message if message.function_call: @@ -100,7 +101,7 @@ async def process_llm_response( class R2RStreamingAgent(Agent): - async def arun( + async def arun( # type: ignore self, system_instruction: Optional[str] = None, messages: Optional[list[Message]] = None, @@ -140,8 +141,11 @@ def run( self.arun(system_instruction, messages, *args, **kwargs) ) - async def process_llm_response( - self, stream: LLMChatCompletionChunk, *args, **kwargs + async def process_llm_response( # type: ignore + self, + stream: Generator[LLMChatCompletionChunk, None, None], + *args, + **kwargs, ) -> AsyncGenerator[str, None]: function_name = None function_arguments = "" @@ -151,17 +155,30 @@ async def process_llm_response( delta = chunk.choices[0].delta if delta.tool_calls: for tool_call in delta.tool_calls: + if not tool_call.function: + raise ValueError( + "Tool function not found in tool call." + ) + name = tool_call.function.name + if not name: + raise ValueError("Tool name not found in tool call.") + arguments = tool_call.function.arguments + if not arguments: + raise ValueError( + "Tool arguments not found in tool call." + ) + results = await self.handle_function_or_tool_call( - tool_call.function.name, - tool_call.function.arguments, + name, + arguments, # FIXME: tool_call.id, *args, **kwargs, ) yield "" - yield f"{tool_call.function.name}" - yield f"{tool_call.function.arguments}" + yield f"{name}" + yield f"{arguments}" yield f"{results}" yield "" @@ -177,6 +194,11 @@ async def process_llm_response( yield delta.content if chunk.choices[0].finish_reason == "function_call": + if not function_name: + raise ValueError( + "Function name not found in function call." + ) + yield "" yield f"{function_name}" yield f"{function_arguments}" diff --git a/py/core/agent/rag.py b/py/core/agent/rag.py index e017b5035..35c9c5f0a 100644 --- a/py/core/agent/rag.py +++ b/py/core/agent/rag.py @@ -1,5 +1,3 @@ -from typing import AsyncGenerator - from core.agent import R2RAgent, R2RStreamingAgent from core.base import ( format_search_results_for_llm, @@ -59,6 +57,7 @@ async def search( ) -> list[AggregateSearchResult]: response = await self.search_pipeline.run( to_async_generator([query]), + state=None, vector_search_settings=vector_search_settings, kg_search_settings=kg_search_settings, ) diff --git a/py/core/base/__init__.py b/py/core/base/__init__.py index 7a504b877..b3f34af79 100644 --- a/py/core/base/__init__.py +++ b/py/core/base/__init__.py @@ -86,8 +86,6 @@ "LoggingConfig", "PostgresLoggingConfig", "PostgresRunLoggingProvider", - "RedisLoggingConfig", - "RedisRunLoggingProvider", "RunLoggingSingleton", # Run Manager "RunManager", diff --git a/py/core/base/abstractions/__init__.py b/py/core/base/abstractions/__init__.py index b90959e28..f9947e8fb 100644 --- a/py/core/base/abstractions/__init__.py +++ b/py/core/base/abstractions/__init__.py @@ -1,6 +1,6 @@ -from .base import AsyncSyncMeta, syncable -from .completion import CompletionRecord, MessageType -from .document import ( +from shared.abstractions.base import AsyncSyncMeta, R2RSerializable, syncable +from shared.abstractions.completion import CompletionRecord, MessageType +from shared.abstractions.document import ( DataType, Document, DocumentExtraction, @@ -10,9 +10,15 @@ IngestionStatus, RestructureStatus, ) -from .embedding import EmbeddingPurpose, default_embedding_prefixes -from .exception import R2RDocumentProcessingError, R2RException -from .graph import ( +from shared.abstractions.embedding import ( + EmbeddingPurpose, + default_embedding_prefixes, +) +from shared.abstractions.exception import ( + R2RDocumentProcessingError, + R2RException, +) +from shared.abstractions.graph import ( Community, CommunityReport, Entity, @@ -21,27 +27,43 @@ RelationshipType, Triple, ) -from .llm import ( +from shared.abstractions.llm import ( GenerationConfig, LLMChatCompletion, LLMChatCompletionChunk, Message, RAGCompletion, ) -from .prompt import Prompt -from .restructure import KGCreationSettings, KGEnrichmentSettings -from .search import ( +from shared.abstractions.prompt import Prompt +from shared.abstractions.restructure import ( + KGCreationSettings, + KGEnrichmentSettings, +) +from shared.abstractions.search import ( AggregateSearchResult, + HybridSearchSettings, + KGCommunityResult, + KGEntityResult, + KGGlobalResult, + KGRelationshipResult, + KGSearchMethod, KGSearchResult, + KGSearchResultType, KGSearchSettings, VectorSearchResult, VectorSearchSettings, ) -from .user import Token, TokenData, UserStats -from .vector import StorageResult, Vector, VectorEntry, VectorType +from shared.abstractions.user import Token, TokenData, UserStats +from shared.abstractions.vector import ( + StorageResult, + Vector, + VectorEntry, + VectorType, +) __all__ = [ # Base abstractions + "R2RSerializable", "AsyncSyncMeta", "syncable", # Completion abstractions @@ -81,9 +103,16 @@ # Search abstractions "AggregateSearchResult", "KGSearchResult", + "KGSearchMethod", + "KGSearchResultType", + "KGEntityResult", + "KGRelationshipResult", + "KGCommunityResult", + "KGGlobalResult", "KGSearchSettings", "VectorSearchResult", "VectorSearchSettings", + "HybridSearchSettings", # Restructure abstractions "KGCreationSettings", "KGEnrichmentSettings", diff --git a/py/core/base/agent/__init__.py b/py/core/base/agent/__init__.py index 0f31d707c..53414f437 100644 --- a/py/core/base/agent/__init__.py +++ b/py/core/base/agent/__init__.py @@ -1,11 +1,10 @@ -from .agent import Agent, AgentConfig, Conversation, Message, Tool, ToolResult +from .agent import Agent, AgentConfig, Conversation, Tool, ToolResult __all__ = [ # Agent abstractions "Agent", "AgentConfig", "Conversation", - "Message", "Tool", "ToolResult", ] diff --git a/py/core/base/agent/agent.py b/py/core/base/agent/agent.py index f477bb45b..705b1b23e 100644 --- a/py/core/base/agent/agent.py +++ b/py/core/base/agent/agent.py @@ -7,11 +7,12 @@ from core.base.abstractions import ( GenerationConfig, LLMChatCompletion, + Message, MessageType, ) from core.base.providers import CompletionProvider, PromptProvider -from .base import Message, Tool, ToolResult +from .base import Tool, ToolResult class Conversation: @@ -73,9 +74,9 @@ def __init__( self.llm_provider = llm_provider self.prompt_provider = prompt_provider self.config = config - self.conversation = [] + self.conversation: list[Message] = [] self._completed = False - self._tools = [] + self._tools: list[Tool] = [] self._register_tools() @abstractmethod @@ -116,15 +117,15 @@ async def arun( @abstractmethod async def process_llm_response( self, - response: Union[Dict[str, Any], AsyncGenerator[Dict[str, Any], None]], + response: Any, *args, **kwargs, - ) -> Union[str, AsyncGenerator[str, None]]: + ) -> Union[None, AsyncGenerator[str, None]]: pass async def execute_tool(self, tool_name: str, *args, **kwargs) -> str: if tool := next((t for t in self.tools if t.name == tool_name), None): - return await tool.function(*args, **kwargs) + return await tool.results_function(*args, **kwargs) else: return f"Error: Tool {tool_name} not found." @@ -176,7 +177,7 @@ async def handle_function_or_tool_call( tool_id: Optional[str] = None, *args, **kwargs, - ) -> Union[str, AsyncGenerator[str, None]]: + ) -> ToolResult: ( self.conversation.append( Message( @@ -225,7 +226,6 @@ async def handle_function_or_tool_call( ( self.conversation.append( Message( - tool_call_id=tool_id, role="tool", content=str(tool_result.llm_formatted_result), name=function_name, diff --git a/py/core/base/agent/base.py b/py/core/base/agent/base.py index 201728b6f..8b347f31e 100644 --- a/py/core/base/agent/base.py +++ b/py/core/base/agent/base.py @@ -1,11 +1,9 @@ -from typing import Any, Callable, Dict, Optional, Union +from typing import Any, Callable, Dict, Optional -from pydantic import BaseModel +from ..abstractions import R2RSerializable -from core.base.abstractions import MessageType - -class Tool(BaseModel): +class Tool(R2RSerializable): name: str description: str results_function: Callable @@ -17,15 +15,7 @@ class Config: arbitrary_types_allowed = True -class ToolResult(BaseModel): +class ToolResult(R2RSerializable): raw_result: Any llm_formatted_result: str stream_result: Optional[str] = None - - -class Message(BaseModel): - role: Union[MessageType, str] - content: Optional[str] = None - name: Optional[str] = None - function_call: Optional[Dict[str, Any]] = None - tool_calls: Optional[list[Dict[str, Any]]] = None diff --git a/py/core/base/api/models/__init__.py b/py/core/base/api/models/__init__.py index 961c4234c..777ad2ee5 100644 --- a/py/core/base/api/models/__init__.py +++ b/py/core/base/api/models/__init__.py @@ -1,16 +1,23 @@ -from .auth.responses import GenericMessageResponse, TokenResponse, UserResponse -from .ingestion.responses import ( +from shared.api.models.auth.responses import ( + GenericMessageResponse, + TokenResponse, + UserResponse, + WrappedGenericMessageResponse, + WrappedTokenResponse, + WrappedUserResponse, +) +from shared.api.models.ingestion.responses import ( IngestionResponse, WrappedIngestionResponse, WrappedUpdateResponse, ) -from .management.responses import ( +from shared.api.models.management.responses import ( AnalyticsResponse, AppSettingsResponse, + CollectionOverviewResponse, + CollectionResponse, DocumentChunkResponse, DocumentOverviewResponse, - GroupOverviewResponse, - GroupResponse, KnowledgeGraphResponse, LogResponse, PromptResponse, @@ -25,20 +32,23 @@ WrappedCollectionResponse, WrappedDocumentChunkResponse, WrappedDocumentOverviewResponse, + WrappedGetPromptsResponse, WrappedKnowledgeGraphResponse, WrappedLogResponse, WrappedPromptMessageResponse, WrappedScoreCompletionResponse, WrappedServerStatsResponse, + WrappedUserCollectionResponse, WrappedUserOverviewResponse, + WrappedUsersInCollectionResponse, ) -from .restructure.responses import ( +from shared.api.models.restructure.responses import ( KGCreationResponse, KGEnrichmentResponse, WrappedKGCreationResponse, WrappedKGEnrichmentResponse, ) -from .retrieval.responses import ( +from shared.api.models.retrieval.responses import ( RAGAgentResponse, RAGResponse, SearchResponse, @@ -52,6 +62,9 @@ "GenericMessageResponse", "TokenResponse", "UserResponse", + "WrappedTokenResponse", + "WrappedUserResponse", + "WrappedGenericMessageResponse", # Ingestion Responses "IngestionResponse", "WrappedIngestionResponse", @@ -72,8 +85,8 @@ "DocumentOverviewResponse", "DocumentChunkResponse", "KnowledgeGraphResponse", - "GroupResponse", - "GroupOverviewResponse", + "CollectionResponse", + "CollectionOverviewResponse", "WrappedPromptMessageResponse", "WrappedServerStatsResponse", "WrappedLogResponse", @@ -81,12 +94,18 @@ "WrappedAppSettingsResponse", "WrappedScoreCompletionResponse", "WrappedUserOverviewResponse", + "WrappedDocumentChunkResponse", "WrappedDocumentOverviewResponse", "WrappedDocumentChunkResponse", "WrappedKnowledgeGraphResponse", "WrappedCollectionResponse", + "WrappedDocumentChunkResponse", "WrappedCollectionListResponse", "WrappedAddUserResponse", + "WrappedUsersInCollectionResponse", + "WrappedGetPromptsResponse", + "WrappedUserCollectionResponse", + "WrappedDocumentChunkResponse", "WrappedCollectionOverviewResponse", # Retrieval Responses "SearchResponse", diff --git a/py/core/base/logging/__init__.py b/py/core/base/logging/__init__.py index e72199005..333a03c2c 100644 --- a/py/core/base/logging/__init__.py +++ b/py/core/base/logging/__init__.py @@ -11,8 +11,6 @@ LoggingConfig, PostgresLoggingConfig, PostgresRunLoggingProvider, - RedisLoggingConfig, - RedisRunLoggingProvider, RunLoggingSingleton, ) from .run_manager import RunManager, manage_run @@ -30,8 +28,6 @@ "LoggingConfig", "PostgresLoggingConfig", "PostgresRunLoggingProvider", - "RedisLoggingConfig", - "RedisRunLoggingProvider", "RunLoggingSingleton", # Run Manager "RunManager", diff --git a/py/core/base/logging/log_processor.py b/py/core/base/logging/log_processor.py index 4953016f7..2eaf4dced 100644 --- a/py/core/base/logging/log_processor.py +++ b/py/core/base/logging/log_processor.py @@ -19,7 +19,7 @@ class LogProcessor: def __init__(self, filters: Dict[str, Callable[[Dict[str, Any]], bool]]): self.filters = filters - self.populations = {name: [] for name in filters} + self.populations: dict = {name: [] for name in filters} def process_log(self, log: Dict[str, Any]): for name, filter_func in self.filters.items(): diff --git a/py/core/base/logging/run_logger.py b/py/core/base/logging/run_logger.py index 99fbff46d..0c84a48fa 100644 --- a/py/core/base/logging/run_logger.py +++ b/py/core/base/logging/run_logger.py @@ -28,7 +28,7 @@ class LoggingConfig(ProviderConfig): log_info_table: str = "log_info" logging_path: Optional[str] = None - def validate(self) -> None: + def validate_config(self) -> None: pass @property @@ -148,6 +148,11 @@ async def log( key: str, value: str, ): + if not self.conn: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + await self.conn.execute( f""" INSERT INTO {self.log_table} (timestamp, run_id, key, value) @@ -163,6 +168,11 @@ async def info_log( run_type: RunType, user_id: UUID, ): + if not self.conn: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + await self.conn.execute( f""" INSERT INTO {self.log_info_table} (timestamp, run_id, run_type, user_id) @@ -183,6 +193,11 @@ async def get_info_logs( run_type_filter: Optional[RunType] = None, user_ids: Optional[list[UUID]] = None, ) -> list[RunInfoLog]: + if not self.conn: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + cursor = await self.conn.cursor() query = "SELECT run_id, run_type, timestamp, user_id" query += f" FROM {self.log_info_table}" @@ -217,6 +232,11 @@ async def get_logs( ) -> list: if not run_ids: raise ValueError("No run ids provided.") + if not self.conn: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + cursor = await self.conn.cursor() placeholders = ",".join(["?" for _ in run_ids]) query = f""" @@ -249,6 +269,10 @@ async def get_logs( async def score_completion( self, run_id: UUID, message_id: UUID, score: float ): + if not self.conn: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) cursor = await self.conn.cursor() await cursor.execute( @@ -281,6 +305,7 @@ async def score_completion( f"UPDATE {self.log_table} SET value = ? WHERE run_id = ? AND key = 'completion_record'", (json.dumps(completion_record), str(run_id)), ) + await self.conn.commit() return {"message": "Score updated successfully."} @@ -292,7 +317,7 @@ class PostgresLoggingConfig(LoggingConfig): log_table: str = "logs" log_info_table: str = "log_info" - def validate(self) -> None: + def validate_config(self) -> None: required_env_vars = [ "POSTGRES_DBNAME", "POSTGRES_USER", @@ -386,6 +411,11 @@ async def log( key: str, value: str, ): + if not self.pool: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + async with self.pool.acquire() as conn: await conn.execute( f"INSERT INTO {self.log_table} (timestamp, run_id, key, value) VALUES (NOW(), $1, $2, $3)", @@ -400,6 +430,11 @@ async def info_log( run_type: RunType, user_id: UUID, ): + if not self.pool: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + async with self.pool.acquire() as conn: await conn.execute( f"INSERT INTO {self.log_info_table} (timestamp, run_id, run_type, user_id) VALUES (NOW(), $1, $2, $3)", @@ -415,6 +450,11 @@ async def get_info_logs( run_type_filter: Optional[RunType] = None, user_ids: Optional[list[UUID]] = None, ) -> list[RunInfoLog]: + if not self.pool: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + query = f"SELECT run_id, run_type, timestamp, user_id FROM {self.log_info_table}" conditions = [] params = [] @@ -453,6 +493,10 @@ async def get_logs( ) -> list: if not run_ids: raise ValueError("No run ids provided.") + if not self.pool: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) placeholders = ",".join([f"${i + 1}" for i in range(len(run_ids))]) query = f""" @@ -472,6 +516,11 @@ async def get_logs( async def score_completion( self, run_id: UUID, message_id: UUID, score: float ): + if not self.pool: + raise ValueError( + "Initialize the connection pool before attempting to log." + ) + async with self.pool.acquire() as conn: row = await conn.fetchrow( f"SELECT value FROM {self.log_table} WHERE run_id = $1 AND key = 'completion_record'", @@ -510,205 +559,14 @@ async def score_completion( return {"message": "Score not updated."} -class RedisLoggingConfig(LoggingConfig): - provider: str = "redis" - log_table: str = "logs" - log_info_table: str = "log_info" - - def validate(self) -> None: - required_env_vars = ["REDIS_CLUSTER_IP", "REDIS_CLUSTER_PORT"] - for var in required_env_vars: - if not os.getenv(var): - raise ValueError(f"Environment variable {var} is not set.") - - @property - def supported_providers(self) -> list[str]: - return ["redis"] - - -class RedisRunLoggingProvider(RunLoggingProvider): - def __init__(self, config: RedisLoggingConfig): - logger.info( - f"Initializing RedisRunLoggingProvider with config: {config}" - ) - - if not all( - [ - os.getenv("REDIS_CLUSTER_IP"), - os.getenv("REDIS_CLUSTER_PORT"), - ] - ): - raise ValueError( - "Please set the environment variables REDIS_CLUSTER_IP and REDIS_CLUSTER_PORT to run `LoggingDatabaseConnection` with `redis`." - ) - try: - from redis.asyncio import Redis - except ImportError: - raise ValueError( - "Error, `redis` is not installed. Please install it using `pip install redis`." - ) - - cluster_ip = os.getenv("REDIS_CLUSTER_IP") - port = os.getenv("REDIS_CLUSTER_PORT") - self.redis = Redis(host=cluster_ip, port=port, decode_responses=True) - self.log_key = config.log_table - self.log_info_key = config.log_info_table - - async def __aenter__(self): - return self - - async def __aexit__(self, exc_type, exc, tb): - await self.close() - - async def close(self): - await self.redis.close() - - async def log( - self, - run_id: UUID, - key: str, - value: str, - ): - timestamp = datetime.now().timestamp() - log_entry = { - "timestamp": timestamp, - "run_id": str(run_id), - "key": key, - "value": value, - } - await self.redis.lpush( - f"{self.log_key}:{str(run_id)}", json.dumps(log_entry) - ) - - async def info_log( - self, - run_id: UUID, - run_type: RunType, - user_id: UUID, - ): - timestamp = datetime.now().timestamp() - log_entry = { - "timestamp": timestamp, - "run_id": str(run_id), - "run_type": run_type, - "user_id": str(user_id), - } - await self.redis.hset( - self.log_info_key, str(run_id), json.dumps(log_entry) - ) - await self.redis.zadd( - f"{self.log_info_key}_sorted", {str(run_id): timestamp} - ) - - async def get_info_logs( - self, - offset: int = 0, - limit: int = 100, - run_type_filter: Optional[RunType] = None, - user_ids: Optional[list[UUID]] = None, - ) -> list[RunInfoLog]: - run_info_list = [] - start = offset - count_per_batch = 100 # Adjust batch size as needed - - while len(run_info_list) < limit: - run_ids = await self.redis.zrevrange( - f"{self.log_info_key}_sorted", - start, - start + count_per_batch - 1, - ) - if not run_ids: - break # No more log IDs to process - - start += count_per_batch - - for run_id in run_ids: - log_entry = json.loads( - await self.redis.hget(self.log_info_key, run_id) - ) - - # Check if the log entry matches the filters - if ( - run_type_filter is None - or log_entry["run_type"] == run_type_filter - ) and ( - user_ids is None or UUID(log_entry["user_id"]) in user_ids - ): - run_info_list.append( - RunInfoLog( - run_id=UUID(log_entry["run_id"]), - run_type=log_entry["run_type"], - timestamp=datetime.fromtimestamp( - log_entry["timestamp"] - ), - user_id=UUID(log_entry["user_id"]), - ) - ) - - if len(run_info_list) >= limit: - break - - return run_info_list[:limit] - - async def get_logs( - self, run_ids: list[UUID], limit_per_run: int = 10 - ) -> list: - logs = [] - for run_id in run_ids: - raw_logs = await self.redis.lrange( - f"{self.log_key}:{str(run_id)}", 0, limit_per_run - 1 - ) - for raw_log in raw_logs: - json_log = json.loads(raw_log) - json_log["run_id"] = UUID(json_log["run_id"]) - logs.append(json_log) - return logs - - async def score_completion( - self, run_id: UUID, message_id: UUID, score: float - ): - log_key = f"{self.log_key}:{str(run_id)}" - logs = await self.redis.lrange(log_key, 0, -1) - - for i, log_entry in enumerate(logs): - log_data = json.loads(log_entry) - if log_data.get("key") == "completion_record": - completion_record = json.loads(log_data["value"]) - - if completion_record.get("message_id") == str(message_id): - if ( - "score" not in completion_record - or completion_record["score"] is None - ): - completion_record["score"] = [score] - elif isinstance(completion_record["score"], list): - completion_record["score"] = [ - x - for x in completion_record["score"] - if x is not None - ] - completion_record["score"].append(score) - else: - completion_record["score"] = [ - completion_record["score"], - score, - ] - - log_data["value"] = json.dumps(completion_record) - await self.redis.lset(log_key, i, json.dumps(log_data)) - return {"message": "Score updated successfully."} - - return {"message": "Score not updated."} - - class RunLoggingSingleton: _instance = None _is_configured = False + _config: Optional[LoggingConfig] = None SUPPORTED_PROVIDERS = { "local": LocalRunLoggingProvider, "postgres": PostgresRunLoggingProvider, - "redis": RedisRunLoggingProvider, } @classmethod @@ -716,9 +574,7 @@ def get_instance(cls): return cls.SUPPORTED_PROVIDERS[cls._config.provider](cls._config) @classmethod - def configure( - cls, logging_config: Optional[LoggingConfig] = LoggingConfig() - ): + def configure(cls, logging_config: LoggingConfig = LoggingConfig()): if not cls._is_configured: cls._config = logging_config cls._is_configured = True diff --git a/py/core/base/logging/run_manager.py b/py/core/base/logging/run_manager.py index d5b4adc3e..daa1de925 100644 --- a/py/core/base/logging/run_manager.py +++ b/py/core/base/logging/run_manager.py @@ -4,19 +4,19 @@ from typing import Optional from uuid import UUID -from core.base.api.models.auth.responses import UserResponse +from core.base.api.models import UserResponse from core.base.logging.base import RunType from core.base.utils import generate_run_id from .run_logger import RunLoggingSingleton -run_id_var = contextvars.ContextVar("run_id", default=None) +run_id_var = contextvars.ContextVar("run_id", default=generate_run_id()) class RunManager: def __init__(self, logger: RunLoggingSingleton): self.logger = logger - self.run_info = {} + self.run_info: dict[UUID, dict] = {} async def set_run_info(self, run_type: str, run_id: Optional[UUID] = None): run_id = run_id or run_id_var.get() @@ -71,4 +71,4 @@ async def manage_run( run_id_var.reset(token) else: # We're in a test environment, just reset the run_id_var - run_id_var.set(None) + run_id_var.set(None) # type: ignore diff --git a/py/core/base/parsers/base_parser.py b/py/core/base/parsers/base_parser.py index f1bb49d7f..f2d7cd02c 100644 --- a/py/core/base/parsers/base_parser.py +++ b/py/core/base/parsers/base_parser.py @@ -3,7 +3,7 @@ from abc import ABC, abstractmethod from typing import AsyncGenerator, Generic, TypeVar -from ..abstractions.document import DataType +from ..abstractions import DataType T = TypeVar("T") diff --git a/py/core/base/pipeline/base_pipeline.py b/py/core/base/pipeline/base_pipeline.py index 3bae02015..0414ef90f 100644 --- a/py/core/base/pipeline/base_pipeline.py +++ b/py/core/base/pipeline/base_pipeline.py @@ -24,7 +24,7 @@ def __init__( self.upstream_outputs: list[list[dict[str, str]]] = [] self.pipe_logger = pipe_logger or RunLoggingSingleton() self.run_manager = run_manager or RunManager(self.pipe_logger) - self.futures = {} + self.futures: dict[str, asyncio.Future] = {} self.level = 0 def add_pipe( @@ -51,7 +51,6 @@ async def run( ): """Run the pipeline.""" run_manager = run_manager or self.run_manager - self.state = state or AsyncState() current_input = input async with manage_run(run_manager): @@ -108,7 +107,7 @@ async def _run_pipe( input_dict = {"message": input} # Collection upstream outputs by prev_pipe_name - grouped_upstream_outputs = {} + grouped_upstream_outputs: dict[str, list] = {} for upstream_input in add_upstream_outputs: upstream_pipe_name = upstream_input["prev_pipe_name"] if upstream_pipe_name not in grouped_upstream_outputs: @@ -147,7 +146,6 @@ async def replay_items_as_async_gen(items): input_dict[upstream_input["input_field"]] = outputs[ prev_output_field ] - async for ele in await pipe.run( pipe.Input(**input_dict), self.state, diff --git a/py/core/base/pipes/base_pipe.py b/py/core/base/pipes/base_pipe.py index 40cd58e20..f59f8a9b2 100644 --- a/py/core/base/pipes/base_pipe.py +++ b/py/core/base/pipes/base_pipe.py @@ -2,11 +2,12 @@ import logging from abc import abstractmethod from enum import Enum -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator, Generic, Optional, TypeVar from uuid import UUID from pydantic import BaseModel +from core.base.logging import RunType from core.base.logging.run_logger import RunLoggingSingleton from core.base.logging.run_manager import RunManager, manage_run @@ -62,7 +63,10 @@ async def delete(self, outer_key: str, inner_key: Optional[str] = None): del self.data[outer_key][inner_key] -class AsyncPipe: +T = TypeVar("T") + + +class AsyncPipe(Generic[T]): """An asynchronous pipe for processing data with logging capabilities.""" class PipeConfig(BaseModel): @@ -86,15 +90,15 @@ class Config: def __init__( self, + config: PipeConfig, type: PipeType = PipeType.OTHER, - config: Optional[PipeConfig] = None, pipe_logger: Optional[RunLoggingSingleton] = None, run_manager: Optional[RunManager] = None, ): self._config = config or self.PipeConfig() self._type = type self.pipe_logger = pipe_logger or RunLoggingSingleton() - self.log_queue = asyncio.Queue() + self.log_queue: asyncio.Queue = asyncio.Queue() self.log_worker_task = None self._run_manager = run_manager or RunManager(self.pipe_logger) @@ -124,23 +128,24 @@ async def enqueue_log(self, run_id: UUID, key: str, value: str): async def run( self, input: Input, - state: Optional[AsyncState] = None, + state: Optional[AsyncState], run_manager: Optional[RunManager] = None, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[Any, None]: + ) -> AsyncGenerator[T, None]: """Run the pipe with logging capabilities.""" run_manager = run_manager or self._run_manager + state = state or AsyncState() async def wrapped_run() -> AsyncGenerator[Any, None]: - async with manage_run(run_manager, self.config.name) as run_id: - self.log_worker_task = asyncio.create_task( + async with manage_run(run_manager, RunType.UNSPECIFIED) as run_id: + self.log_worker_task = asyncio.create_task( # type: ignore self.log_worker(), name=f"log-worker-{self.config.name}" ) try: - async for result in self._run_logic( - input, state=state, run_id=run_id, *args, **kwargs + async for result in self._run_logic( # type: ignore + input, state, run_id, *args, **kwargs ): yield result finally: @@ -166,8 +171,9 @@ async def wrapped_run() -> AsyncGenerator[Any, None]: async def _run_logic( self, input: Input, + state: AsyncState, run_id: UUID, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[Any, None]: + ) -> AsyncGenerator[T, None]: pass diff --git a/py/core/base/providers/auth.py b/py/core/base/providers/auth.py index 4e00c7ff8..df43ac0b3 100644 --- a/py/core/base/providers/auth.py +++ b/py/core/base/providers/auth.py @@ -5,35 +5,36 @@ from fastapi import Security from fastapi.security import HTTPAuthorizationCredentials, HTTPBearer +from ...utils import generate_id_from_label from ..abstractions import R2RException, Token, TokenData from ..api.models import UserResponse -from ..utils import generate_id_from_label from .base import Provider, ProviderConfig +from .crypto import CryptoProvider logger = logging.getLogger(__name__) class AuthConfig(ProviderConfig): secret_key: Optional[str] = None - require_authentication: Optional[bool] = False - require_email_verification: Optional[bool] = False + require_authentication: bool = False + require_email_verification: bool = False + default_admin_email: str = "admin@example.com" + default_admin_password: str = "change_me_immediately" access_token_lifetime_in_minutes: Optional[int] = None refresh_token_lifetime_in_days: Optional[int] = None - default_admin_email: Optional[str] = "admin@example.com" - default_admin_password: Optional[str] = "change_me_immediately" @property def supported_providers(self) -> list[str]: return ["r2r"] - def validate(self) -> None: - super().validate() + def validate_config(self) -> None: + pass class AuthProvider(Provider, ABC): security = HTTPBearer(auto_error=False) - def __init__(self, config: AuthConfig): + def __init__(self, config: AuthConfig, crypto_provider: CryptoProvider): if not isinstance(config, AuthConfig): raise ValueError( "AuthProvider must be initialized with an AuthConfig" @@ -41,7 +42,9 @@ def __init__(self, config: AuthConfig): self.config = config self.admin_email = config.default_admin_email self.admin_password = config.default_admin_password + self.crypto_provider = crypto_provider super().__init__(config) + self.config: AuthConfig = config # for type hinting def _get_default_admin_user(self) -> UserResponse: return UserResponse( @@ -64,11 +67,11 @@ def create_refresh_token(self, data: dict) -> str: pass @abstractmethod - def decode_token(self, token: str) -> TokenData: + async def decode_token(self, token: str) -> TokenData: pass @abstractmethod - def user(self, token: str) -> UserResponse: + async def user(self, token: str) -> UserResponse: pass @abstractmethod @@ -78,21 +81,23 @@ def get_current_active_user( pass @abstractmethod - def register(self, email: str, password: str) -> dict[str, str]: + async def register(self, email: str, password: str) -> dict[str, str]: pass @abstractmethod - def verify_email( + async def verify_email( self, email: str, verification_code: str ) -> dict[str, str]: pass @abstractmethod - def login(self, email: str, password: str) -> dict[str, Token]: + async def login(self, email: str, password: str) -> dict[str, Token]: pass @abstractmethod - def refresh_access_token(self, refresh_token: str) -> dict[str, str]: + async def refresh_access_token( + self, refresh_token: str + ) -> dict[str, Token]: pass async def auth_wrapper( @@ -114,3 +119,23 @@ async def auth_wrapper( message=f"Error '{e}' occurred during authentication.", status_code=401, ) + + @abstractmethod + async def change_password( + self, user: UserResponse, current_password: str, new_password: str + ) -> dict[str, str]: + pass + + @abstractmethod + async def request_password_reset(self, email: str) -> dict[str, str]: + pass + + @abstractmethod + async def confirm_password_reset( + self, reset_token: str, new_password: str + ) -> dict[str, str]: + pass + + @abstractmethod + async def logout(self, token: str) -> dict[str, str]: + pass diff --git a/py/core/base/providers/base.py b/py/core/base/providers/base.py index d907b01e2..30f6211cd 100644 --- a/py/core/base/providers/base.py +++ b/py/core/base/providers/base.py @@ -15,7 +15,7 @@ class Config: ignore_extra = True @abstractmethod - def validate(self) -> None: + def validate_config(self) -> None: pass @classmethod @@ -38,13 +38,18 @@ def supported_providers(self) -> list[str]: """Define a list of supported providers.""" pass + @classmethod + def from_dict( + cls: Type["ProviderConfig"], data: dict[str, Any] + ) -> "ProviderConfig": + """Create a new instance of the config from a dictionary.""" + return cls.create(**data) + class Provider(ABC): """A base provider class to provide a common interface for all providers.""" - def __init__( - self, config: Optional[ProviderConfig] = None, *args, **kwargs - ): + def __init__(self, config: ProviderConfig, *args, **kwargs): if config: - config.validate() + config.validate_config() self.config = config diff --git a/py/core/base/providers/chunking.py b/py/core/base/providers/chunking.py index cf28f70a4..231b14e4f 100644 --- a/py/core/base/providers/chunking.py +++ b/py/core/base/providers/chunking.py @@ -3,7 +3,7 @@ from enum import Enum from typing import AsyncGenerator, Optional, Union -from ..abstractions.document import DocumentExtraction +from ..abstractions import DocumentExtraction from .base import Provider, ProviderConfig logger = logging.getLogger(__name__) @@ -21,13 +21,13 @@ class Strategy(str, Enum): class ChunkingConfig(ProviderConfig): provider: str = "unstructured_local" - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider {self.provider} is not supported.") @property def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api", None] + return ["r2r", "unstructured_local", "unstructured_api"] class Config: json_schema_extra = { @@ -47,14 +47,14 @@ class Config: } -class R2RChunkingConfig(ProviderConfig): +class R2RChunkingConfig(ChunkingConfig): provider: str = "r2r" method: Strategy = Strategy.RECURSIVE chunk_size: int = 512 chunk_overlap: int = 20 max_chunk_size: Optional[int] = None - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider {self.provider} is not supported.") if self.chunk_size <= 0: @@ -64,7 +64,7 @@ def validate(self) -> None: @property def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api", None] + return ["r2r", "unstructured_local", "unstructured_api"] class Config: json_schema_extra = { @@ -119,8 +119,8 @@ class UnstructuredChunkingConfig(ChunkingConfig): unique_element_ids: bool = False xml_keep_tags: bool = False - def validate(self) -> None: - super().validate() + def validate_config(self) -> None: + super().validate_config() if self.strategy not in ["auto", "fast", "hi_res"]: raise ValueError("strategy must be 'auto', 'fast', or 'hi_res'") @@ -136,3 +136,53 @@ async def chunk( ) -> AsyncGenerator[str, None]: """Chunk the parsed document using the configured chunking strategy.""" pass + + multipage_sections: bool = True + new_after_n_chars: Optional[int] = 1500 + ocr_languages: Optional[list[str]] = None + output_format: str = "application/json" + overlap: int = 0 + overlap_all: bool = False + pdf_infer_table_structure: bool = True + + similarity_threshold: Optional[float] = None + skip_infer_table_types: Optional[list[str]] = None + split_pdf_concurrency_level: int = 5 + split_pdf_page: bool = True + starting_page_number: Optional[int] = None + strategy: str = "auto" + chunking_strategy: Strategy = Strategy.BY_TITLE + unique_element_ids: bool = False + xml_keep_tags: bool = False + + def validate_config(self) -> None: + if self.strategy not in ["auto", "fast", "hi_res"]: + raise ValueError("strategy must be 'auto', 'fast', or 'hi_res'") + + +__all__ = [ + "GenerationConfig", + "KGSearchSettings", + "MessageType", + "Message", + "ChunkingConfig", + "KGSearchResultType", + "KGSearchMethod", + "KGEntityResult", + "KGRelationshipResult", + "KGCommunityResult", + "KGGlobalResult", + "KGSearchResult", + "R2RException", + "Token", + "HybridSearchSettings", + "VectorSearchSettings", + "KGCreationSettings", + "KGEnrichmentSettings", + "KGCreationResponse", + "KGEnrichmentResponse", + "UserResponse", + "VectorSearchResult", + "SearchResponse", + "RAGResponse", +] diff --git a/py/core/base/providers/crypto.py b/py/core/base/providers/crypto.py index debe7b082..b1e8d3118 100644 --- a/py/core/base/providers/crypto.py +++ b/py/core/base/providers/crypto.py @@ -9,10 +9,9 @@ class CryptoConfig(ProviderConfig): @property def supported_providers(self) -> list[str]: - return [None, "bcrypt"] # Add other crypto providers as needed + return ["bcrypt"] # Add other crypto providers as needed - def validate(self) -> None: - super().validate() + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Unsupported crypto provider: {self.provider}") diff --git a/py/core/base/providers/database.py b/py/core/base/providers/database.py index 2ec9348da..8f167a11b 100644 --- a/py/core/base/providers/database.py +++ b/py/core/base/providers/database.py @@ -1,6 +1,6 @@ import logging from abc import ABC, abstractmethod -from typing import Optional +from typing import Any, Optional from .base import Provider, ProviderConfig @@ -17,20 +17,21 @@ class DatabaseConfig(ProviderConfig): port: Optional[int] = None db_name: Optional[str] = None vecs_collection: Optional[str] = None + project_name: Optional[str] = None def __post_init__(self): - self.validate() + self.validate_config() # Capture additional fields for key, value in self.extra_fields.items(): setattr(self, key, value) - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider '{self.provider}' is not supported.") @property def supported_providers(self) -> list[str]: - return ["postgres", None] + return ["postgres"] class VectorDBProvider(Provider, ABC): @@ -41,12 +42,11 @@ def _initialize_vector_db(self, dimension: int) -> None: class RelationalDBProvider(Provider, ABC): @abstractmethod - def _initialize_relational_db(self) -> None: + async def _initialize_relational_db(self) -> None: pass class DatabaseProvider(Provider): - def __init__(self, config: DatabaseConfig): if not isinstance(config, DatabaseConfig): raise ValueError( @@ -54,15 +54,15 @@ def __init__(self, config: DatabaseConfig): ) logger.info(f"Initializing DatabaseProvider with config {config}.") super().__init__(config) - self.vector: VectorDBProvider = self._initialize_vector_db() - self.relational: RelationalDBProvider = ( - self._initialize_relational_db() - ) + + # remove later to re-introduce typing... + self.vector: Any = None + self.relational: Any = None @abstractmethod def _initialize_vector_db(self) -> VectorDBProvider: pass @abstractmethod - def _initialize_relational_db(self) -> RelationalDBProvider: + async def _initialize_relational_db(self) -> RelationalDBProvider: pass diff --git a/py/core/base/providers/embedding.py b/py/core/base/providers/embedding.py index 25725914d..c08bd4720 100644 --- a/py/core/base/providers/embedding.py +++ b/py/core/base/providers/embedding.py @@ -5,17 +5,20 @@ from enum import Enum from typing import Any, Optional -from ..abstractions import EmbeddingPurpose, default_embedding_prefixes -from ..abstractions.search import VectorSearchResult +from ..abstractions import ( + EmbeddingPurpose, + VectorSearchResult, + default_embedding_prefixes, +) from .base import Provider, ProviderConfig logger = logging.getLogger(__name__) class EmbeddingConfig(ProviderConfig): - provider: Optional[str] = None - base_model: Optional[str] = None - base_dimension: Optional[int] = None + provider: str + base_model: str + base_dimension: int rerank_model: Optional[str] = None rerank_dimension: Optional[int] = None rerank_transformer_type: Optional[str] = None @@ -27,13 +30,13 @@ class EmbeddingConfig(ProviderConfig): initial_backoff: float = 1.0 max_backoff: float = 60.0 - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider '{self.provider}' is not supported.") @property def supported_providers(self) -> list[str]: - return [None, "litellm", "openai", "ollama"] + return ["litellm", "openai", "ollama"] class EmbeddingProvider(Provider): @@ -156,12 +159,6 @@ def rerank( ): pass - @abstractmethod - def tokenize_string( - self, text: str, model: str, stage: PipeStage - ) -> list[int]: - pass - def set_prefixes(self, config_prefixes: dict[str, str], base_model: str): self.prefixes = {} diff --git a/py/core/base/providers/file.py b/py/core/base/providers/file.py index 288fd981e..1bb6915e8 100644 --- a/py/core/base/providers/file.py +++ b/py/core/base/providers/file.py @@ -7,18 +7,18 @@ class FileConfig(ProviderConfig): - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider '{self.provider}' is not supported.") @property def supported_providers(self) -> list[str]: - return ["postgres", None] + return ["postgres"] class FileProvider(Provider, ABC): @abstractmethod - def store_file( + async def store_file( self, document_id: UUID, file_name: str, @@ -28,17 +28,17 @@ def store_file( pass @abstractmethod - def retrieve_file( + async def retrieve_file( self, document_id: UUID ) -> Optional[tuple[str, BinaryIO, int]]: pass @abstractmethod - def delete_file(self, document_id: UUID) -> bool: + async def delete_file(self, document_id: UUID) -> bool: pass @abstractmethod - def get_files_overview( + async def get_files_overview( self, filter_document_ids: Optional[list[UUID]] = None, filter_file_names: Optional[list[str]] = None, diff --git a/py/core/base/providers/kg.py b/py/core/base/providers/kg.py index ef7f5bbab..a52a367c8 100644 --- a/py/core/base/providers/kg.py +++ b/py/core/base/providers/kg.py @@ -4,10 +4,16 @@ from abc import ABC, abstractmethod from typing import Any, Optional, Tuple -from ...base.utils.base_utils import RelationshipType -from ..abstractions.graph import Entity, KGExtraction, Triple -from ..abstractions.llm import GenerationConfig -from ..abstractions.restructure import KGCreationSettings, KGEnrichmentSettings +from ..abstractions import ( + Entity, + GenerationConfig, + KGCreationSettings, + KGEnrichmentSettings, + KGExtraction, + KGSearchSettings, + RelationshipType, + Triple, +) from .base import ProviderConfig logger = logging.getLogger(__name__) @@ -23,22 +29,18 @@ class KGConfig(ProviderConfig): database: Optional[str] = None batch_size: Optional[int] = 1 - kg_extraction_prompt: Optional[str] = "few_shot_ner_kg_extraction" - kg_search_prompt: Optional[str] = "kg_search" - kg_search_config: Optional[GenerationConfig] = None kg_store_path: Optional[str] = None - kg_enrichment_settings: Optional[KGEnrichmentSettings] = ( - KGEnrichmentSettings() - ) - kg_creation_settings: Optional[KGCreationSettings] = KGCreationSettings() + kg_enrichment_settings: KGEnrichmentSettings = KGEnrichmentSettings() + kg_creation_settings: KGCreationSettings = KGCreationSettings() + kg_search_settings: KGSearchSettings = KGSearchSettings() - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider '{self.provider}' is not supported.") @property def supported_providers(self) -> list[str]: - return [None, "neo4j", "local"] + return ["neo4j", "local"] class KGProvider(ABC): @@ -54,7 +56,7 @@ def __init__(self, config: KGConfig) -> None: self.validate_config() def validate_config(self) -> None: - self.config.validate() + self.config.validate_config() @property @abstractmethod diff --git a/py/core/base/providers/llm.py b/py/core/base/providers/llm.py index bcec66cd7..65445bc53 100644 --- a/py/core/base/providers/llm.py +++ b/py/core/base/providers/llm.py @@ -5,7 +5,7 @@ from concurrent.futures import ThreadPoolExecutor from typing import Any, AsyncGenerator, Generator, Optional -from core.base.abstractions.llm import ( +from core.base.abstractions import ( GenerationConfig, LLMChatCompletion, LLMChatCompletionChunk, @@ -24,7 +24,7 @@ class CompletionConfig(ProviderConfig): initial_backoff: float = 1.0 max_backoff: float = 60.0 - def validate(self) -> None: + def validate_config(self) -> None: if not self.provider: raise ValueError("Provider must be set.") if self.provider not in self.supported_providers: diff --git a/py/core/base/providers/orchestration.py b/py/core/base/providers/orchestration.py index 48a22eabc..a676bf650 100644 --- a/py/core/base/providers/orchestration.py +++ b/py/core/base/providers/orchestration.py @@ -8,7 +8,7 @@ class OrchestrationConfig(ProviderConfig): provider: str max_threads: int = 256 - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider {self.provider} is not supported.") diff --git a/py/core/base/providers/parsing.py b/py/core/base/providers/parsing.py index 61d58201d..4b18bc619 100644 --- a/py/core/base/providers/parsing.py +++ b/py/core/base/providers/parsing.py @@ -3,7 +3,7 @@ from pydantic import BaseModel, Field -from ..abstractions.document import Document, DocumentType +from ..abstractions import Document, DocumentType from .base import Provider, ProviderConfig from .chunking import ChunkingConfig @@ -21,9 +21,9 @@ class ParsingConfig(ProviderConfig): @property def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api", None] + return ["r2r", "unstructured_local", "unstructured_api"] - def validate(self) -> None: + def validate_config(self) -> None: if self.provider not in self.supported_providers: raise ValueError(f"Provider {self.provider} is not supported.") diff --git a/py/core/base/providers/prompt.py b/py/core/base/providers/prompt.py index 06be0efd0..f2104a99f 100644 --- a/py/core/base/providers/prompt.py +++ b/py/core/base/providers/prompt.py @@ -3,19 +3,21 @@ from pathlib import Path from typing import Any, Optional +from core.base.abstractions import Prompt + from .base import Provider, ProviderConfig logger = logging.getLogger(__name__) class PromptConfig(ProviderConfig): - default_system_name: Optional[str] = "default_system" - default_task_name: Optional[str] = "default_rag" + default_system_name: str = "default_system" + default_task_name: str = "default_rag" # TODO - Replace this with a database file_path: Optional[Path] = None - def validate(self) -> None: + def validate_config(self) -> None: pass @property @@ -25,34 +27,32 @@ def supported_providers(self) -> list[str]: class PromptProvider(Provider): - def __init__(self, config: Optional[PromptConfig] = None): - if config is None: - config = PromptConfig() - elif not isinstance(config, PromptConfig): - raise ValueError( - "PromptProvider must be initialized with a `PromptConfig`." - ) + def __init__(self, config: PromptConfig): logger.info(f"Initializing PromptProvider with config {config}.") super().__init__(config) + self.config: PromptConfig = config @abstractmethod - def add_prompt( + async def add_prompt( self, name: str, template: str, input_types: dict[str, str] ) -> None: pass @abstractmethod def get_prompt( - self, prompt_name: str, inputs: Optional[dict[str, Any]] = None + self, + prompt_name: str, + inputs: Optional[dict[str, Any]] = None, + prompt_override: Optional[str] = None, ) -> str: pass @abstractmethod - def get_all_prompts(self) -> dict[str, str]: + def get_all_prompts(self) -> dict[str, Prompt]: pass @abstractmethod - def update_prompt( + async def update_prompt( self, name: str, template: Optional[str] = None, @@ -60,6 +60,10 @@ def update_prompt( ) -> None: pass + @abstractmethod + async def delete_prompt(self, name: str) -> None: + pass + def _get_message_payload( self, system_prompt_name: Optional[str] = None, @@ -70,7 +74,7 @@ def _get_message_payload( task_role: str = "user", task_inputs: dict = {}, task_prompt_override: Optional[str] = None, - ) -> dict: + ) -> list[dict]: system_prompt = system_prompt_override or self.get_prompt( system_prompt_name or self.config.default_system_name, system_inputs, diff --git a/py/core/base/utils/__init__.py b/py/core/base/utils/__init__.py index bae809ee0..5ebe8efd2 100644 --- a/py/core/base/utils/__init__.py +++ b/py/core/base/utils/__init__.py @@ -1,4 +1,6 @@ -from .base_utils import ( +from shared.utils import ( + RecursiveCharacterTextSplitter, + TextSplitter, decrement_version, format_entity_types, format_relations, @@ -11,7 +13,6 @@ run_pipeline, to_async_generator, ) -from .splitter.text import RecursiveCharacterTextSplitter, TextSplitter __all__ = [ "format_entity_types", diff --git a/py/core/configs/local_llm.toml b/py/core/configs/local_llm.toml index 532e3f1bd..c268ff468 100644 --- a/py/core/configs/local_llm.toml +++ b/py/core/configs/local_llm.toml @@ -1,3 +1,10 @@ +[agent] +system_instruction_name = "rag_agent" +tool_names = ["search"] + + [agent.generation_config] + model = "ollama/llama3.1" + [completion] provider = "litellm" concurrent_request_limit = 1 @@ -21,12 +28,31 @@ batch_size = 128 add_title_as_prefix = true concurrent_request_limit = 2 -[parsing] -excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] +[kg] +provider = "neo4j" # set this to "None" if you don't want to use a knowledge graph +batch_size = 256 - [agent.generation_config] - model = "ollama/llama3.1" + [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" + entity_types = [] # if empty, all entities are extracted + relation_types = [] # if empty, all relations are extracted + fragment_merge_count = 4 # number of fragments to merge into a single extraction + max_knowledge_triples = 100 + generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction + + [kg.kg_enrichment_settings] + community_reports_prompt = "graphrag_community_reports_prompt" + max_description_input_length = 8192 + max_summary_input_length = 65536 + generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering + leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ + + [kg.kg_search_settings] + map_system_prompt = "graphrag_map_system_prompt" + reduce_system_prompt = "graphrag_reduce_system_prompt" + generation_config = { model = "ollama/llama3.1" } + +[parsing] +provider = "unstructured_local" +excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] diff --git a/py/core/configs/local_llm_neo4j_kg.toml b/py/core/configs/local_llm_neo4j_kg.toml deleted file mode 100644 index 8305d1a0d..000000000 --- a/py/core/configs/local_llm_neo4j_kg.toml +++ /dev/null @@ -1,62 +0,0 @@ -[chunking] -provider = "unstructured_local" -strategy = "auto" -chunking_strategy = "basic" -new_after_n_chars = 2_048 -max_characters = 4_096 # use larger max_characters for KG construction -combine_under_n_chars = 512 -overlap = 20 - -[completion] -provider = "litellm" -concurrent_request_limit = 1 - - [completion.generation_config] - model = "ollama/llama3.1" - temperature = 0.1 - top_p = 1 - max_tokens_to_sample = 1_024 - stream = false - add_generation_kwargs = {} - -[embedding] -provider = "ollama" -base_model = "mxbai-embed-large" -base_dimension = 1_024 -batch_size = 32 -add_title_as_prefix = true -concurrent_request_limit = 2 - -[parsing] -excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ] - -[kg] -provider = "neo4j" -batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" - - [kg.kg_creation_settings] - entity_types = [] # if empty, all entities are extracted - relation_types = [] # if empty, all relations are extracted - fragment_merge_count = 4 # number of fragments to merge into a single extraction - max_knowledge_triples = 100 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for triplet extraction - - [kg.kg_enrichment_settings] - max_description_input_length = 8192 - max_summary_input_length = 65536 - generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering - leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - - [kg.kg_search_config] - model = "ollama/llama3.1" - -[database] -provider = "postgres" - -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] - - [agent.generation_config] - model = "ollama/llama3.1" diff --git a/py/core/configs/neo4j_kg_no_vector_postgres.toml b/py/core/configs/neo4j_kg_no_vector_postgres.toml index 4c75fa8e2..4a7386d4f 100644 --- a/py/core/configs/neo4j_kg_no_vector_postgres.toml +++ b/py/core/configs/neo4j_kg_no_vector_postgres.toml @@ -7,9 +7,9 @@ provider = "None" [kg] provider = "neo4j" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction @@ -20,5 +20,5 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - [kg.kg_search_config] - model = "gpt-4o-mini" + [kg.kg_search_settings] + generation_config = { model = "gpt-4o-mini" } diff --git a/py/core/configs/unstructured.toml b/py/core/configs/unstructured_api.toml similarity index 100% rename from py/core/configs/unstructured.toml rename to py/core/configs/unstructured_api.toml diff --git a/py/core/examples/scripts/advanced_kg_cookbook.py b/py/core/examples/scripts/advanced_kg_cookbook.py index db3a2ee1a..e13e75232 100644 --- a/py/core/examples/scripts/advanced_kg_cookbook.py +++ b/py/core/examples/scripts/advanced_kg_cookbook.py @@ -159,7 +159,6 @@ def main( except: continue - print("Inspecting Knowledge Graph") print( client.inspect_knowledge_graph(0, 1000, print_descriptions=True)[ "results" diff --git a/py/core/examples/scripts/run_group_workflow.py b/py/core/examples/scripts/run_collection_workflow.py similarity index 88% rename from py/core/examples/scripts/run_group_workflow.py rename to py/core/examples/scripts/run_collection_workflow.py index f5d808c71..2263c7818 100644 --- a/py/core/examples/scripts/run_group_workflow.py +++ b/py/core/examples/scripts/run_collection_workflow.py @@ -16,36 +16,36 @@ # Create two collections print("\nCreating two collections...") collection1_result = client.create_collection( - "TestGroup1", "A test collection for document access" + "TestCollection1", "A test collection for document access" ) collection2_result = client.create_collection( - "TestGroup2", "Another test collection" + "TestCollection2", "Another test collection" ) - print("Group1 creation result:", collection1_result) - print("Group2 creation result:", collection2_result) + print("Collection1 creation result:", collection1_result) + print("Collection2 creation result:", collection2_result) collection1_id = collection1_result["results"]["collection_id"] collection2_id = collection2_result["results"]["collection_id"] # Get collections overview print("\nGetting collections overview...") collections_overview = client.collections_overview() - print("Groups overview:", collections_overview) + print("Collections overview:", collections_overview) # Get specific collection print("\nGetting specific collection...") collection1_details = client.get_collection(collection1_id) - print("Group1 details:", collection1_details) + print("Collection1 details:", collection1_details) # List all collections print("\nListing all collections...") collections_list = client.list_collections() - print("Groups list:", collections_list) + print("Collections list:", collections_list) # Update a collection - print("\nUpdating Group1...") + print("\nUpdating Collection1...") update_result = client.update_collection( collection1_id, - name="UpdatedTestGroup1", + name="UpdatedTestCollection1", description="Updated description", ) print("Collection update result:", update_result) @@ -112,9 +112,9 @@ print("Add user3 to collection2 result:", add_user3_result2) # Get users in a collection - print("\nGetting users in Group1...") + print("\nGetting users in Collection1...") users_in_collection1 = client.user_collections(collection1_id) - print("Users in Group1:", users_in_collection1) + print("Users in Collection1:", users_in_collection1) # Get collections for a user print("\nGetting collections for User3...") @@ -122,19 +122,19 @@ print("User3 collections:", user3_collections) # Get documents in a collection - print("\nGetting documents in Group1...") + print("\nGetting documents in Collection1...") docs_in_collection1 = client.documents_in_collection(collection1_id) - print("Documents in Group1:", docs_in_collection1) + print("Documents in Collection1:", docs_in_collection1) # Remove user from collection - print("\nRemoving User3 from Group1...") + print("\nRemoving User3 from Collection1...") remove_user_result = client.remove_user_from_collection( user3_result["results"]["id"], collection1_id ) print("Remove user result:", remove_user_result) # Remove document from collection - print("\nRemoving Document1 from Group1...") + print("\nRemoving Document1 from Collection1...") remove_doc_result = client.remove_document_from_collection( document1_id, collection1_id ) @@ -163,7 +163,7 @@ print("\nLogging in as user3...") client.login("user3@test.com", "password123") - # Search for documents (should see only document2 after removal from Group1) + # Search for documents (should see only document2 after removal from Collection1) print("\nUser3 searching for documents...") try: search_result_user3 = client.search( diff --git a/py/core/integrations/unstructured/main.py b/py/core/integrations/unstructured/main.py index 431f40dd1..3c73ccfe6 100644 --- a/py/core/integrations/unstructured/main.py +++ b/py/core/integrations/unstructured/main.py @@ -16,7 +16,7 @@ class PartitionRequestModel(BaseModel): - file_content: bytes + file_content: str chunking_config: Dict diff --git a/py/core/main/abstractions.py b/py/core/main/abstractions.py index 146aae25f..7647a70f9 100644 --- a/py/core/main/abstractions.py +++ b/py/core/main/abstractions.py @@ -1,5 +1,3 @@ -from typing import Optional - from pydantic import BaseModel from core.agent import R2RRAGAgent, R2RStreamingRAGAgent @@ -20,37 +18,36 @@ class R2RProviders(BaseModel): - auth: Optional[AuthProvider] - chunking: Optional[ChunkingProvider] - database: Optional[DatabaseProvider] - kg: Optional[KGProvider] - llm: Optional[CompletionProvider] - embedding: Optional[EmbeddingProvider] - orchestration: Optional[OrchestrationProvider] - prompt: Optional[PromptProvider] - parsing: Optional[ParsingProvider] - file: Optional[FileProvider] + auth: AuthProvider + chunking: ChunkingProvider + database: DatabaseProvider + kg: KGProvider + llm: CompletionProvider + embedding: EmbeddingProvider + orchestration: OrchestrationProvider + prompt: PromptProvider + parsing: ParsingProvider + file: FileProvider class Config: arbitrary_types_allowed = True class R2RPipes(BaseModel): - parsing_pipe: Optional[AsyncPipe] - chunking_pipe: Optional[AsyncPipe] - embedding_pipe: Optional[AsyncPipe] - vector_storage_pipe: Optional[AsyncPipe] - vector_search_pipe: Optional[AsyncPipe] - rag_pipe: Optional[AsyncPipe] - streaming_rag_pipe: Optional[AsyncPipe] - kg_search_pipe: Optional[AsyncPipe] - kg_extraction_pipe: Optional[AsyncPipe] - kg_storage_pipe: Optional[AsyncPipe] - kg_node_extraction_pipe: Optional[AsyncPipe] - kg_node_description_pipe: Optional[AsyncPipe] - kg_clustering_pipe: Optional[AsyncPipe] - kg_community_summary_pipe: Optional[AsyncPipe] - chunking_pipe: Optional[AsyncPipe] + parsing_pipe: AsyncPipe + chunking_pipe: AsyncPipe + embedding_pipe: AsyncPipe + vector_storage_pipe: AsyncPipe + vector_search_pipe: AsyncPipe + rag_pipe: AsyncPipe + streaming_rag_pipe: AsyncPipe + kg_search_pipe: AsyncPipe + kg_extraction_pipe: AsyncPipe + kg_storage_pipe: AsyncPipe + kg_node_extraction_pipe: AsyncPipe + kg_node_description_pipe: AsyncPipe + kg_clustering_pipe: AsyncPipe + kg_community_summary_pipe: AsyncPipe class Config: arbitrary_types_allowed = True diff --git a/py/core/main/api/auth_router.py b/py/core/main/api/auth_router.py index d09d7292d..5b9d5aeb5 100644 --- a/py/core/main/api/auth_router.py +++ b/py/core/main/api/auth_router.py @@ -5,7 +5,7 @@ from fastapi.security import OAuth2PasswordBearer, OAuth2PasswordRequestForm from pydantic import EmailStr -from core.base.api.models.auth.responses import ( +from core.base.api.models import ( GenericMessageResponse, WrappedGenericMessageResponse, WrappedTokenResponse, @@ -27,6 +27,13 @@ def __init__( orchestration_provider: Optional[OrchestrationProvider] = None, ): super().__init__(auth_service, run_type, orchestration_provider) + self.service: AuthService = auth_service # for type hinting + + def _register_workflows(self): + pass + + def _load_openapi_extras(self): + return {} def _setup_routes(self): @self.router.post("/register", response_model=WrappedUserResponse) diff --git a/py/core/main/api/base_router.py b/py/core/main/api/base_router.py index d6163421e..58265e529 100644 --- a/py/core/main/api/base_router.py +++ b/py/core/main/api/base_router.py @@ -1,20 +1,19 @@ import functools import logging from abc import abstractmethod -from typing import Optional +from typing import Callable, Optional from fastapi import APIRouter, HTTPException from fastapi.responses import StreamingResponse from core.base import R2RException, manage_run from core.base.logging.base import RunType - -logger = logging.getLogger(__name__) - from core.base.providers import OrchestrationProvider from ..services.base import Service +logger = logging.getLogger(__name__) + class BaseRouter: def __init__( @@ -34,7 +33,7 @@ def __init__( def get_router(self): return self.router - def base_endpoint(self, func: callable): + def base_endpoint(self, func: Callable): @functools.wraps(func) async def wrapper(*args, **kwargs): async with manage_run( @@ -48,10 +47,18 @@ async def wrapper(*args, **kwargs): ) try: - results = await func(*args, **kwargs) + func_result = await func(*args, **kwargs) + if ( + isinstance(func_result, tuple) + and len(func_result) == 2 + ): + results, outer_kwargs = func_result + else: + results, outer_kwargs = func_result, {} + if isinstance(results, StreamingResponse): return results - return {"results": results} + return {"results": results, **outer_kwargs} except R2RException as re: raise HTTPException( status_code=re.status_code, diff --git a/py/core/main/api/data/retrieval_router_openapi.yml b/py/core/main/api/data/retrieval_router_openapi.yml index 5a549445c..d76e3a7cc 100644 --- a/py/core/main/api/data/retrieval_router_openapi.yml +++ b/py/core/main/api/data/retrieval_router_openapi.yml @@ -20,7 +20,7 @@ search: "use_kg_search": True, # graph needs to be constructed first "kg_search_type": "local", "kg_search_level": "0", - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4o-mini", "temperature": 0.7, }, @@ -50,7 +50,7 @@ search: "use_kg_search": true, # graph needs to be constructed first "kg_search_type": "local", "kg_search_level": "0", - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4o-mini", "temperature": 0.7 }, @@ -91,7 +91,7 @@ rag: "use_kg_search": True, "kg_search_type": "local", "kg_search_level": "0", - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4o-mini", "temperature": 0.7, }, @@ -126,7 +126,7 @@ rag: "use_kg_search": true, # graph needs to be constructed first "kg_search_type": "local", "kg_search_level": "0", - "kg_search_generation_config": { + "generation_config": { "model": "gpt-4o-mini", "temperature": 0.7 }, diff --git a/py/core/main/api/ingestion_router.py b/py/core/main/api/ingestion_router.py index 950da9218..975b449b1 100644 --- a/py/core/main/api/ingestion_router.py +++ b/py/core/main/api/ingestion_router.py @@ -10,7 +10,7 @@ from pydantic import Json from core.base import ChunkingConfig, R2RException, generate_user_document_id -from core.base.api.models.ingestion.responses import ( +from core.base.api.models import ( WrappedIngestionResponse, WrappedUpdateResponse, ) @@ -83,7 +83,8 @@ async def ingest_files_app( description=ingest_files_descriptions.get("chunking_config"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedIngestionResponse: + response_model=WrappedIngestionResponse, + ): """ Ingest files into the system. @@ -174,7 +175,8 @@ async def retry_ingest_files( description=ingest_files_descriptions.get("document_ids"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedIngestionResponse: + response_model=WrappedIngestionResponse, + ): """ Retry the ingestion of files into the system. @@ -186,7 +188,9 @@ async def retry_ingest_files( documents_overview = await self.service.providers.database.relational.get_documents_overview( filter_document_ids=document_ids, filter_user_ids=[auth_user.id], - ) + )[ + "results" + ] if len(documents_overview) != len(document_ids): raise R2RException( status_code=404, @@ -229,7 +233,8 @@ async def update_files_app( description=ingest_files_descriptions.get("chunking_config"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedUpdateResponse: + response_model=WrappedUpdateResponse, + ): """ Update existing files in the system. @@ -310,7 +315,7 @@ def _validate_chunking_config(chunking_config): from ..assembly.factory import R2RProviderFactory if chunking_config: - chunking_config.validate() + chunking_config.validate_config() R2RProviderFactory.create_chunking_provider(chunking_config) else: logger.info("No chunking config override provided. Using default.") diff --git a/py/core/main/api/management_router.py b/py/core/main/api/management_router.py index 22c6bfc34..cb37b0315 100644 --- a/py/core/main/api/management_router.py +++ b/py/core/main/api/management_router.py @@ -11,7 +11,7 @@ from pydantic import Json from core.base import R2RException -from core.base.api.models.management.responses import ( +from core.base.api.models import ( WrappedAddUserResponse, WrappedAnalyticsResponse, WrappedAppSettingsResponse, @@ -26,7 +26,9 @@ WrappedPromptMessageResponse, WrappedScoreCompletionResponse, WrappedServerStatsResponse, + WrappedUserCollectionResponse, WrappedUserOverviewResponse, + WrappedUsersInCollectionResponse, ) from core.base.logging import AnalysisTypes, LogFilterCriteria from core.base.providers import OrchestrationProvider @@ -46,6 +48,12 @@ def __init__( self.service: ManagementService = service # for type hinting self.start_time = datetime.now(timezone.utc) + def _register_workflows(self): + pass + + def _load_openapi_extras(self): + return {} + # TODO: remove this from the management route, it should be at the base of the server def _setup_routes(self): @self.router.get("/health") @@ -57,7 +65,8 @@ async def health_check(): @self.base_endpoint async def server_stats( auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedServerStatsResponse: + response_model=WrappedServerStatsResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only an authorized user can call the `server_stats` endpoint.", @@ -83,7 +92,8 @@ async def update_prompt_app( {}, description="Input types" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: + response_model=WrappedPromptMessageResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `update_prompt` endpoint.", @@ -102,7 +112,8 @@ async def add_prompt_app( template: str = Body(..., description="Prompt template"), input_types: dict[str, str] = Body({}, description="Input types"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: + response_model=WrappedPromptMessageResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `add_prompt` endpoint.", @@ -122,7 +133,8 @@ async def get_prompt_app( None, description="Prompt override" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedPromptMessageResponse: + response_model=WrappedPromptMessageResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `get_prompt` endpoint.", @@ -137,7 +149,8 @@ async def get_prompt_app( @self.base_endpoint async def get_all_prompts_app( auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedGetPromptsResponse: + response_model=WrappedGetPromptsResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `get_all_prompts` endpoint.", @@ -151,7 +164,8 @@ async def get_all_prompts_app( async def delete_prompt_app( prompt_name: str = Path(..., description="Prompt name"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> None: + response_model=None, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `delete_prompt` endpoint.", @@ -166,7 +180,8 @@ async def get_analytics_app( filter_criteria: Optional[Json[dict]] = Query({}), analysis_types: Optional[Json[dict]] = Query({}), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAnalyticsResponse: + response_model=WrappedAnalyticsResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `analytics` endpoint.", 403 @@ -197,7 +212,8 @@ async def logs_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedLogResponse: + response_model=WrappedLogResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `logs` endpoint.", 403 @@ -213,7 +229,8 @@ async def logs_app( @self.base_endpoint async def app_settings( auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAppSettingsResponse: + response_model=WrappedAppSettingsResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `app_settings` endpoint.", @@ -227,7 +244,8 @@ async def score_completion( message_id: str = Body(..., description="Message ID"), score: float = Body(..., description="Completion score"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedScoreCompletionResponse: + response_model=WrappedScoreCompletionResponse, + ): message_uuid = UUID(message_id) return await self.service.score_completion( message_id=message_uuid, score=score @@ -240,7 +258,8 @@ async def users_overview_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedUserOverviewResponse: + response_model=WrappedUserOverviewResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `users_overview` endpoint.", @@ -251,16 +270,21 @@ async def users_overview_app( [UUID(user_id) for user_id in user_ids] if user_ids else None ) - return await self.service.users_overview( + users_overview_response = await self.service.users_overview( user_ids=user_uuids, offset=offset, limit=limit ) + return users_overview_response["results"], { + "total_entries": users_overview_response["total_entries"] + } + @self.router.delete("/delete", status_code=204) @self.base_endpoint async def delete_app( filters: str = Query(..., description="JSON-encoded filters"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> None: + response_model=None, + ): filters_dict = json.loads(filters) if filters else None return await self.service.delete(filters=filters_dict) @@ -281,7 +305,7 @@ async def download_file_app( document_uuid = UUID(document_id) except ValueError: raise R2RException( - status_code=400, detail="Invalid document ID format." + status_code=400, message="Invalid document ID format." ) file_tuple = await self.service.download_file(document_uuid) @@ -318,21 +342,26 @@ async def documents_overview_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentOverviewResponse: + response_model=WrappedDocumentOverviewResponse, + ): request_user_ids = ( None if auth_user.is_superuser else [auth_user.id] ) document_uuids = [ UUID(document_id) for document_id in document_ids ] - result = await self.service.documents_overview( - user_ids=request_user_ids, - collection_ids=auth_user.collection_ids, - document_ids=document_uuids, - offset=offset, - limit=limit, + documents_overview_response = ( + await self.service.documents_overview( + user_ids=request_user_ids, + collection_ids=auth_user.collection_ids, + document_ids=document_uuids, + offset=offset, + limit=limit, + ) ) - return result + return documents_overview_response["results"], { + "total_entries": documents_overview_response["total_entries"] + } @self.router.get("/document_chunks/{document_id}") @self.base_endpoint @@ -341,19 +370,23 @@ async def document_chunks_app( offset: Optional[int] = Query(0, ge=0), limit: Optional[int] = Query(100, ge=0), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentChunkResponse: + response_model=WrappedDocumentChunkResponse, + ): document_uuid = UUID(document_id) - chunks = await self.service.document_chunks( + + document_chunks_result = await self.service.document_chunks( document_uuid, offset, limit ) - if not chunks: + if not document_chunks_result: raise R2RException( "No chunks found for the given document ID.", 404, ) - is_owner = str(chunks[0].get("user_id")) == str(auth_user.id) + is_owner = str( + document_chunks_result["results"][0].get("user_id") + ) == str(auth_user.id) if not is_owner and not auth_user.is_superuser: raise R2RException( @@ -361,7 +394,9 @@ async def document_chunks_app( 403, ) - return chunks + return document_chunks_result["results"], { + "total_entries": document_chunks_result["total_entries"] + } @self.router.get("/inspect_knowledge_graph") @self.base_endpoint @@ -370,7 +405,8 @@ async def inspect_knowledge_graph( limit: int = 100, print_descriptions: bool = False, auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKnowledgeGraphResponse: + response_model=WrappedKnowledgeGraphResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `inspect_knowledge_graph` endpoint.", @@ -389,7 +425,8 @@ async def collections_overview_app( offset: Optional[int] = Query(0, ge=0), limit: Optional[int] = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionOverviewResponse: + response_model=WrappedCollectionOverviewResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can call the `collections_overview` endpoint.", @@ -401,10 +438,16 @@ async def collections_overview_app( if collection_ids else None ) - return await self.service.collections_overview( - collection_ids=collection_uuids, offset=offset, limit=limit + collections_overview_response = ( + await self.service.collections_overview( + collection_ids=collection_uuids, offset=offset, limit=limit + ) ) + return collections_overview_response["results"], { + "total_entries": collections_overview_response["total_entries"] + } + @self.router.post("/create_collection") @self.base_endpoint async def create_collection_app( @@ -413,7 +456,8 @@ async def create_collection_app( "", description="Collection description" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: + response_model=WrappedCollectionResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can create collections.", 403 @@ -425,7 +469,8 @@ async def create_collection_app( async def get_collection_app( collection_id: str = Path(..., description="Collection ID"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: + response_model=WrappedCollectionResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can get Collection details.", 403 @@ -445,7 +490,8 @@ async def update_collection_app( None, description="Updated collection description" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionResponse: + response_model=WrappedCollectionResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can update collections.", 403 @@ -474,22 +520,28 @@ async def list_collections_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionListResponse: + response_model=WrappedCollectionListResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can list all collections.", 403 ) - return await self.service.list_collections( + list_collections_response = await self.service.list_collections( offset=offset, limit=min(max(limit, 1), 1000) ) + return list_collections_response["results"], { + "total_entries": list_collections_response["total_entries"] + } + @self.router.post("/add_user_to_collection") @self.base_endpoint async def add_user_to_collection_app( user_id: str = Body(..., description="User ID"), collection_id: str = Body(..., description="Collection ID"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedAddUserResponse: + response_model=WrappedAddUserResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can add users to collections.", 403 @@ -529,18 +581,25 @@ async def get_users_in_collection_app( 100, ge=1, le=1000, description="Pagination limit" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), + response_model=WrappedUsersInCollectionResponse, ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can get users in a collection.", 403 ) collection_uuid = UUID(collection_id) - return await self.service.get_users_in_collection( - collection_id=collection_uuid, - offset=offset, - limit=min(max(limit, 1), 1000), + users_in_collection_response = ( + await self.service.get_users_in_collection( + collection_id=collection_uuid, + offset=offset, + limit=min(max(limit, 1), 1000), + ) ) + return users_in_collection_response["results"], { + "total_entries": users_in_collection_response["total_entries"] + } + @self.router.get("/user_collections/{user_id}") @self.base_endpoint async def get_collections_for_user_app( @@ -550,16 +609,23 @@ async def get_collections_for_user_app( 100, ge=1, le=1000, description="Pagination limit" ), auth_user=Depends(self.service.providers.auth.auth_wrapper), + response_model=WrappedUserCollectionResponse, ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can get collections for a user.", 403 ) user_uuid = UUID(user_id) - return await self.service.get_collections_for_user( - user_uuid, offset, limit + user_collection_response = ( + await self.service.get_collections_for_user( + user_uuid, offset, limit + ) ) + return user_collection_response["results"], { + "total_entries": user_collection_response["total_entries"] + } + @self.router.post("/assign_document_to_collection") @self.base_endpoint async def assign_document_to_collection_app( @@ -584,7 +650,8 @@ async def remove_document_from_collection_app( document_id: str = Body(..., description="Document ID"), collection_id: str = Body(..., description="Collection ID"), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> None: + response_model=None, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can remove documents from collections.", @@ -604,16 +671,23 @@ async def document_collections_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedCollectionListResponse: + response_model=WrappedCollectionListResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can get the collections belonging to a document.", 403, ) - return await self.service.document_collections( - document_id, offset, limit + document_collections_response = ( + await self.service.document_collections( + document_id, offset, limit + ) ) + return document_collections_response["results"], { + "total_entries": document_collections_response["total_entries"] + } + @self.router.get("/collection/{collection_id}/documents") @self.base_endpoint async def documents_in_collection_app( @@ -621,12 +695,21 @@ async def documents_in_collection_app( offset: int = Query(0, ge=0), limit: int = Query(100, ge=1, le=1000), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedDocumentOverviewResponse: + response_model=WrappedDocumentOverviewResponse, + ): if not auth_user.is_superuser: raise R2RException( "Only a superuser can get documents in a collection.", 403 ) collection_uuid = UUID(collection_id) - return await self.service.documents_in_collection( - collection_uuid, offset, limit + documents_in_collection_response = ( + await self.service.documents_in_collection( + collection_uuid, offset, limit + ) ) + + return documents_in_collection_response["results"], { + "total_entries": documents_in_collection_response[ + "total_entries" + ] + } diff --git a/py/core/main/api/restructure_router.py b/py/core/main/api/restructure_router.py index d43ce89da..d65ee80fb 100644 --- a/py/core/main/api/restructure_router.py +++ b/py/core/main/api/restructure_router.py @@ -1,14 +1,13 @@ import logging from pathlib import Path -from typing import Optional, Union +from typing import Optional import yaml from fastapi import Body, Depends from pydantic import Json from core.base import KGCreationSettings, KGEnrichmentSettings -from core.base.abstractions.document import RestructureStatus -from core.base.api.models.restructure.responses import ( +from core.base.api.models import ( WrappedKGCreationResponse, WrappedKGEnrichmentResponse, ) @@ -41,15 +40,13 @@ def __init__( super().__init__(service, run_type, orchestration_provider) self.service: RestructureService = service - def _load_openapi_extras(self): - yaml_path = ( - Path(__file__).parent - / "data" - / "restructure_router_openapi.yml" - ) - with open(yaml_path, "r") as yaml_file: - yaml_content = yaml.safe_load(yaml_file) - return yaml_content + def _load_openapi_extras(self): + yaml_path = ( + Path(__file__).parent / "data" / "restructure_router_openapi.yml" + ) + with open(yaml_path, "r") as yaml_file: + yaml_content = yaml.safe_load(yaml_file) + return yaml_content def _register_workflows(self): self.orchestration_provider.register_workflow( @@ -80,7 +77,8 @@ async def create_graph( description="Settings for the graph creation process.", ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGCreationResponse: + response_model=WrappedKGCreationResponse, + ): """ Creating a graph on your documents. This endpoint takes input a list of document ids and KGCreationSettings. If document IDs are not provided, the graph will be created on all documents in the system. @@ -133,7 +131,8 @@ async def enrich_graph( description="Settings for the graph enrichment process.", ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedKGEnrichmentResponse: + response_model=WrappedKGEnrichmentResponse, + ): """ This endpoint enriches the graph with additional information. It creates communities of nodes based on their similarity and adds embeddings to the graph. This step is necessary for GraphRAG to work. """ diff --git a/py/core/main/api/retrieval_router.py b/py/core/main/api/retrieval_router.py index 84124b2d2..e53e4b78a 100644 --- a/py/core/main/api/retrieval_router.py +++ b/py/core/main/api/retrieval_router.py @@ -42,6 +42,9 @@ def _load_openapi_extras(self): yaml_content = yaml.safe_load(yaml_file) return yaml_content + def _register_workflows(self): + pass + def _setup_routes(self): search_extras = self.openapi_extras.get("search", {}) search_descriptions = search_extras.get("input_descriptions", {}) @@ -64,7 +67,9 @@ async def search_app( description=search_descriptions.get("kg_search_settings"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedSearchResponse: + response_model=WrappedSearchResponse, + ): + print("kg_search_settings = ", kg_search_settings) """ Perform a search query on the vector database and knowledge graph. @@ -139,7 +144,8 @@ async def rag_app( description=rag_descriptions.get("include_title_if_available"), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedRAGResponse: + response_model=WrappedRAGResponse, + ): """ Execute a RAG (Retrieval-Augmented Generation) query. @@ -149,6 +155,8 @@ async def rag_app( The generation process can be customized using the rag_generation_config parameter. """ + print("kg_search_settings = ", kg_search_settings) + allowed_collections = set(auth_user.collection_ids) filters = { "$or": [ @@ -221,7 +229,8 @@ async def agent_app( ), ), auth_user=Depends(self.service.providers.auth.auth_wrapper), - ) -> WrappedRAGAgentResponse: + response_model=WrappedRAGAgentResponse, + ): """ Implement an agent-based interaction for complex query processing. diff --git a/py/core/main/app.py b/py/core/main/app.py index 492c9b33b..be4eefbde 100644 --- a/py/core/main/app.py +++ b/py/core/main/app.py @@ -10,8 +10,6 @@ from .api.restructure_router import RestructureRouter from .api.retrieval_router import RetrievalRouter from .config import R2RConfig -from .services.ingestion_service import IngestionService -from .services.restructure_service import RestructureService class R2RApp: diff --git a/py/core/main/app_entry.py b/py/core/main/app_entry.py index 98d1041b4..300f5b8f9 100644 --- a/py/core/main/app_entry.py +++ b/py/core/main/app_entry.py @@ -27,7 +27,7 @@ async def lifespan(app: FastAPI): app.router.routes = r2r_app.app.routes # Copy middleware and exception handlers - app.middleware = r2r_app.app.middleware + app.middleware = r2r_app.app.middleware # type: ignore app.exception_handlers = r2r_app.app.exception_handlers # Start the scheduler diff --git a/py/core/main/assembly/builder.py b/py/core/main/assembly/builder.py index 30f28cea5..be2e0e700 100644 --- a/py/core/main/assembly/builder.py +++ b/py/core/main/assembly/builder.py @@ -1,5 +1,5 @@ import logging -from dataclasses import dataclass, field +from dataclasses import dataclass from typing import Any, Dict, Optional, Type from core.agent import R2RRAGAgent @@ -67,7 +67,6 @@ class PipeOverrides: kg_node_description: Optional[AsyncPipe] = None kg_clustering: Optional[AsyncPipe] = None kg_community_summary: Optional[AsyncPipe] = None - kg_search: Optional[AsyncPipe] = None @dataclass @@ -93,8 +92,10 @@ def __init__(self, config: R2RConfig): self.provider_factory_override: Optional[Type[R2RProviderFactory]] = ( None ) - self.pipe_factory_override: Optional[R2RPipeFactory] = None - self.pipeline_factory_override: Optional[R2RPipelineFactory] = None + self.pipe_factory_override: Optional[Type[R2RPipeFactory]] = None + self.pipeline_factory_override: Optional[Type[R2RPipelineFactory]] = ( + None + ) self.provider_overrides = ProviderOverrides() self.pipe_overrides = PipeOverrides() self.pipeline_overrides = PipelineOverrides() @@ -106,11 +107,11 @@ def with_provider_factory(self, factory: Type[R2RProviderFactory]): self.provider_factory_override = factory return self - def with_pipe_factory(self, factory: R2RPipeFactory): + def with_pipe_factory(self, factory: type[R2RPipeFactory]): self.pipe_factory_override = factory return self - def with_pipeline_factory(self, factory: R2RPipelineFactory): + def with_pipeline_factory(self, factory: type[R2RPipelineFactory]): self.pipeline_factory_override = factory return self @@ -142,11 +143,16 @@ async def _create_providers( for k, v in vars(self.provider_overrides).items() if v is not None } + kwargs = {**kwargs, **overrides} factory = provider_factory(self.config) - return await factory.create_providers(*args, **kwargs, **overrides) + return await factory.create_providers(*args, **kwargs) def _create_pipes( - self, pipe_factory: R2RPipeFactory, providers: Any, *args, **kwargs + self, + pipe_factory: type[R2RPipeFactory], + providers: Any, + *args, + **kwargs, ) -> Any: overrides = { k: v for k, v in vars(self.pipe_overrides).items() if v is not None @@ -156,19 +162,11 @@ def _create_pipes( ) def _create_pipelines( - self, pipeline_factory: R2RPipelineFactory, pipes: Any, *args, **kwargs - ) -> Any: - overrides = { - k: v - for k, v in vars(self.pipeline_overrides).items() - if v is not None - } - return pipeline_factory(self.config, pipes).create_pipelines( - overrides=overrides, *args, **kwargs - ) - - def _create_pipelines( - self, pipeline_factory: R2RPipelineFactory, pipes: Any, *args, **kwargs + self, + pipeline_factory: type[R2RPipelineFactory], + pipes: Any, + *args, + **kwargs, ) -> Any: override_dict = { f"{k}_pipeline": v diff --git a/py/core/main/assembly/factory.py b/py/core/main/assembly/factory.py index 45e6215df..34bb378df 100644 --- a/py/core/main/assembly/factory.py +++ b/py/core/main/assembly/factory.py @@ -27,7 +27,7 @@ RunLoggingSingleton, ) from core.pipelines import RAGPipeline, SearchPipeline -from core.pipes import MultiSearchPipe, SearchPipe +from core.pipes import GeneratorPipe, MultiSearchPipe, SearchPipe from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders from ..config import R2RConfig @@ -43,11 +43,10 @@ def __init__(self, config: R2RConfig): async def create_auth_provider( auth_config: AuthConfig, db_provider: DatabaseProvider, - crypto_provider: Optional[CryptoProvider] = None, + crypto_provider: CryptoProvider, *args, **kwargs, ) -> AuthProvider: - auth_provider: Optional[AuthProvider] = None if auth_config.provider == "r2r": from core.providers import R2RAuthProvider @@ -55,32 +54,24 @@ async def create_auth_provider( auth_config, crypto_provider, db_provider ) await auth_provider.initialize() - elif auth_config.provider is None: - auth_provider = None + return auth_provider else: raise ValueError( f"Auth provider {auth_config.provider} not supported." ) - return auth_provider @staticmethod def create_crypto_provider( crypto_config: CryptoConfig, *args, **kwargs ) -> CryptoProvider: - crypto_provider: Optional[CryptoProvider] = None if crypto_config.provider == "bcrypt": from core.providers.crypto import BCryptConfig, BCryptProvider - crypto_provider = BCryptProvider( - BCryptConfig(**crypto_config.dict()) - ) - elif crypto_config.provider is None: - crypto_provider = None + return BCryptProvider(BCryptConfig(**crypto_config.dict())) else: raise ValueError( f"Crypto provider {crypto_config.provider} not supported." ) - return crypto_provider @staticmethod def create_parsing_provider( @@ -108,18 +99,26 @@ def create_parsing_provider( def create_chunking_provider( chunking_config: ChunkingConfig, *args, **kwargs ) -> ChunkingProvider: - chunking_config.validate() + chunking_config.validate_config() if chunking_config.provider == "r2r": + from core.base import R2RChunkingConfig from core.providers import R2RChunkingProvider - return R2RChunkingProvider(chunking_config) + chunking_config_r2r = R2RChunkingConfig( + **chunking_config.extra_fields + ) + return R2RChunkingProvider(chunking_config_r2r) elif chunking_config.provider in [ "unstructured_local", "unstructured_api", ]: + from core.base import UnstructuredChunkingConfig from core.providers import UnstructuredChunkingProvider - return UnstructuredChunkingProvider(chunking_config) + chunking_config_unst = UnstructuredChunkingConfig( + **chunking_config.extra_fields + ) + return UnstructuredChunkingProvider(chunking_config_unst) else: raise ValueError( f"Chunking provider {chunking_config.provider} not supported" @@ -139,7 +138,7 @@ def create_orchestration_provider(*args, **kwargs): async def create_database_provider( self, db_config: DatabaseConfig, - crypto_provider: Optional[CryptoProvider] = None, + crypto_provider: CryptoProvider, *args, **kwargs, ) -> DatabaseProvider: @@ -157,15 +156,12 @@ async def create_database_provider( db_config, vector_db_dimension, crypto_provider=crypto_provider ) await database_provider.initialize() - elif db_config.provider is None: - database_provider = None + return database_provider else: raise ValueError( f"Database provider {db_config.provider} not supported" ) - return database_provider - @staticmethod def create_embedding_provider( embedding: EmbeddingConfig, *args, **kwargs @@ -214,8 +210,6 @@ async def create_file_provider( file_provider = PostgresFileProvider(file_config, db_provider) await file_provider.initialize() - elif file_config.provider is None: - file_provider = None else: raise ValueError( f"File provider {file_config.provider} not supported." @@ -304,13 +298,16 @@ async def create_providers( llm_provider = llm_provider_override or self.create_llm_provider( self.config.completion, *args, **kwargs ) + kg_provider = kg_provider_override or self.create_kg_provider( self.config.kg, *args, **kwargs ) + crypto_provider = ( crypto_provider_override or self.create_crypto_provider(self.config.crypto, *args, **kwargs) ) + database_provider = ( database_provider_override or await self.create_database_provider( @@ -342,20 +339,20 @@ async def create_providers( self.config.parsing, *args, **kwargs ) ) + chunking_provider = chunking_config or self.create_chunking_provider( self.config.chunking, *args, **kwargs ) - file_provider = ( - file_provider_override - or await self.create_file_provider( - self.config.file, database_provider, *args, **kwargs - ) + + file_provider = file_provider_override or await self.create_file_provider( + self.config.file, database_provider, *args, **kwargs # type: ignore ) orchestration_provider = ( orchestration_provider_override or self.create_orchestration_provider() ) + return R2RProviders( auth=auth_provider, chunking=chunking_provider, @@ -417,7 +414,7 @@ def create_pipes( rag_pipe=rag_pipe_override or self.create_rag_pipe(*args, **kwargs), streaming_rag_pipe=streaming_rag_pipe_override - or self.create_rag_pipe(stream=True, *args, **kwargs), + or self.create_rag_pipe(True, *args, **kwargs), kg_node_extraction_pipe=kg_node_extraction_pipe or self.create_kg_node_extraction_pipe(*args, **kwargs), kg_node_description_pipe=kg_node_description_pipe @@ -436,12 +433,16 @@ def create_parsing_pipe(self, *args, **kwargs) -> Any: return ParsingPipe( parsing_provider=self.providers.parsing, file_provider=self.providers.file, + config=AsyncPipe.PipeConfig(name="parsing_pipe"), ) def create_chunking_pipe(self, *args, **kwargs) -> Any: from core.pipes import ChunkingPipe - return ChunkingPipe(chunking_provider=self.providers.chunking) + return ChunkingPipe( + chunking_provider=self.providers.chunking, + config=AsyncPipe.PipeConfig(name="chunking_pipe"), + ) def create_embedding_pipe(self, *args, **kwargs) -> Any: if self.config.embedding.provider is None: @@ -453,6 +454,7 @@ def create_embedding_pipe(self, *args, **kwargs) -> Any: embedding_provider=self.providers.embedding, database_provider=self.providers.database, embedding_batch_size=self.config.embedding.batch_size, + config=AsyncPipe.PipeConfig(name="embedding_pipe"), ) def create_vector_storage_pipe(self, *args, **kwargs) -> Any: @@ -461,7 +463,10 @@ def create_vector_storage_pipe(self, *args, **kwargs) -> Any: from core.pipes import VectorStoragePipe - return VectorStoragePipe(database_provider=self.providers.database) + return VectorStoragePipe( + database_provider=self.providers.database, + config=AsyncPipe.PipeConfig(name="vector_storage_pipe"), + ) def create_default_vector_search_pipe(self, *args, **kwargs) -> Any: if self.config.embedding.provider is None: @@ -472,6 +477,7 @@ def create_default_vector_search_pipe(self, *args, **kwargs) -> Any: return VectorSearchPipe( database_provider=self.providers.database, embedding_provider=self.providers.embedding, + config=SearchPipe.SearchConfig(name="vector_search_pipe"), ) def create_multi_search_pipe( @@ -513,15 +519,15 @@ def create_vector_search_pipe(self, *args, **kwargs) -> Any: ) hyde_search_pipe = self.create_multi_search_pipe( vanilla_vector_search_pipe, - use_rrf=False, - expansion_technique="hyde", + False, + "hyde", *args, **kwargs, ) rag_fusion_pipe = self.create_multi_search_pipe( vanilla_vector_search_pipe, - use_rrf=True, - expansion_technique="rag_fusion", + True, + "rag_fusion", *args, **kwargs, ) @@ -535,6 +541,7 @@ def create_vector_search_pipe(self, *args, **kwargs) -> Any: "rag_fusion": rag_fusion_pipe, }, default_strategy="hyde", + config=AsyncPipe.PipeConfig(name="routing_search_pipe"), ) def create_kg_extraction_pipe(self, *args, **kwargs) -> Any: @@ -549,7 +556,7 @@ def create_kg_extraction_pipe(self, *args, **kwargs) -> Any: database_provider=self.providers.database, prompt_provider=self.providers.prompt, chunking_provider=self.providers.chunking, - kg_batch_size=self.config.kg.batch_size, + config=AsyncPipe.PipeConfig(name="kg_extraction_pipe"), ) def create_kg_storage_pipe(self, *args, **kwargs) -> Any: @@ -561,6 +568,7 @@ def create_kg_storage_pipe(self, *args, **kwargs) -> Any: return KGStoragePipe( kg_provider=self.providers.kg, embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_storage_pipe"), ) def create_kg_search_pipe(self, *args, **kwargs) -> Any: @@ -574,6 +582,9 @@ def create_kg_search_pipe(self, *args, **kwargs) -> Any: llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, embedding_provider=self.providers.embedding, + config=GeneratorPipe.PipeConfig( + name="kg_rag_pipe", task_prompt="kg_search" + ), ) def create_rag_pipe(self, stream: bool = False, *args, **kwargs) -> Any: @@ -583,6 +594,9 @@ def create_rag_pipe(self, stream: bool = False, *args, **kwargs) -> Any: return StreamingSearchRAGPipe( llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, + config=GeneratorPipe.PipeConfig( + name="streaming_rag_pipe", task_prompt="default_rag" + ), ) else: from core.pipes import SearchRAGPipe @@ -590,6 +604,9 @@ def create_rag_pipe(self, stream: bool = False, *args, **kwargs) -> Any: return SearchRAGPipe( llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, + config=GeneratorPipe.PipeConfig( + name="search_rag_pipe", task_prompt="default_rag" + ), ) def create_kg_node_extraction_pipe(self, *args, **kwargs) -> Any: @@ -599,6 +616,7 @@ def create_kg_node_extraction_pipe(self, *args, **kwargs) -> Any: kg_provider=self.providers.kg, llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, + config=AsyncPipe.PipeConfig(name="kg_node_extraction_pipe"), ) def create_kg_node_description_pipe(self, *args, **kwargs) -> Any: @@ -609,6 +627,7 @@ def create_kg_node_description_pipe(self, *args, **kwargs) -> Any: llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_node_description_pipe"), ) def create_kg_clustering_pipe(self, *args, **kwargs) -> Any: @@ -619,6 +638,7 @@ def create_kg_clustering_pipe(self, *args, **kwargs) -> Any: llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_clustering_pipe"), ) def create_kg_community_summary_pipe(self, *args, **kwargs) -> Any: @@ -629,6 +649,7 @@ def create_kg_community_summary_pipe(self, *args, **kwargs) -> Any: llm_provider=self.providers.llm, prompt_provider=self.providers.prompt, embedding_provider=self.providers.embedding, + config=AsyncPipe.PipeConfig(name="kg_community_summary_pipe"), ) @@ -693,15 +714,15 @@ def create_pipelines( search_pipeline=search_pipeline, rag_pipeline=rag_pipeline or self.create_rag_pipeline( - search_pipeline=search_pipeline, - stream=False, + search_pipeline, + False, *args, **kwargs, ), streaming_rag_pipeline=streaming_rag_pipeline or self.create_rag_pipeline( - search_pipeline=search_pipeline, - stream=True, + search_pipeline, + True, *args, **kwargs, ), @@ -733,30 +754,32 @@ def create_agents( rag_agent=rag_agent_override or self.create_rag_agent(*args, **kwargs), streaming_rag_agent=stream_rag_agent_override - or self.create_rag_agent(*args, **kwargs, stream=True), + or self.create_streaming_rag_agent(*args, **kwargs), ) - def create_rag_agent( - self, stream: bool = False, *args, **kwargs - ) -> R2RRAGAgent: + def create_streaming_rag_agent( + self, *args, **kwargs + ) -> R2RStreamingRAGAgent: if not self.providers.llm or not self.providers.prompt: raise ValueError( "LLM and Prompt providers are required for RAG Agent" ) - if stream: - rag_agent = R2RStreamingRAGAgent( - llm_provider=self.providers.llm, - prompt_provider=self.providers.prompt, - config=self.config.agent, - search_pipeline=self.pipelines.search_pipeline, - ) - else: - rag_agent = R2RRAGAgent( - llm_provider=self.providers.llm, - prompt_provider=self.providers.prompt, - config=self.config.agent, - search_pipeline=self.pipelines.search_pipeline, - ) + return R2RStreamingRAGAgent( + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + config=self.config.agent, + search_pipeline=self.pipelines.search_pipeline, + ) - return rag_agent + def create_rag_agent(self, *args, **kwargs) -> R2RRAGAgent: + if not self.providers.llm or not self.providers.prompt: + raise ValueError( + "LLM and Prompt providers are required for RAG Agent" + ) + return R2RRAGAgent( + llm_provider=self.providers.llm, + prompt_provider=self.providers.prompt, + config=self.config.agent, + search_pipeline=self.pipelines.search_pipeline, + ) diff --git a/py/core/main/config.py b/py/core/main/config.py index 0dbd347ad..8069b660f 100644 --- a/py/core/main/config.py +++ b/py/core/main/config.py @@ -7,7 +7,7 @@ import toml from pydantic import BaseModel -from ..base.abstractions.llm import GenerationConfig +from ..base.abstractions import GenerationConfig from ..base.agent.agent import AgentConfig from ..base.logging.run_logger import LoggingConfig from ..base.providers.auth import AuthConfig @@ -31,11 +31,11 @@ class R2RConfig: current_file_path, "..", "..", "r2r.toml" ) - CONFIG_OPTIONS = {} - for file in os.listdir(config_dir_root): - if file.endswith(".toml"): - CONFIG_OPTIONS[file.removesuffix(".toml")] = os.path.join( - config_dir_root, file + CONFIG_OPTIONS: dict[str, Optional[str]] = {} + for file_ in os.listdir(config_dir_root): + if file_.endswith(".toml"): + CONFIG_OPTIONS[file_.removesuffix(".toml")] = os.path.join( + config_dir_root, file_ ) CONFIG_OPTIONS["default"] = None @@ -116,25 +116,26 @@ def __init__( base_path / default_config[section]["file_path"] ) setattr(self, section, default_config[section]) - self.completion = CompletionConfig.create(**self.completion) + + self.completion = CompletionConfig.create(**self.completion) # type: ignore # override GenerationConfig defaults GenerationConfig.set_default( **self.completion.generation_config.dict() ) - self.auth = AuthConfig.create(**self.auth) - self.chunking = ChunkingConfig.create(**self.chunking) - self.crypto = CryptoConfig.create(**self.crypto) - self.database = DatabaseConfig.create(**self.database) - self.embedding = EmbeddingConfig.create(**self.embedding) - self.kg = KGConfig.create(**self.kg) - self.logging = LoggingConfig.create(**self.logging) - if "chunking_config" not in self.parsing: - self.parsing["chunking_config"] = self.chunking - self.parsing = ParsingConfig.create(**self.parsing) - self.prompt = PromptConfig.create(**self.prompt) - self.agent = AgentConfig.create(**self.agent) - self.file = FileConfig.create(**self.file) + self.auth = AuthConfig.create(**self.auth) # type: ignore + self.chunking = ChunkingConfig.create(**self.chunking) # type: ignore + self.crypto = CryptoConfig.create(**self.crypto) # type: ignore + self.database = DatabaseConfig.create(**self.database) # type: ignore + self.embedding = EmbeddingConfig.create(**self.embedding) # type: ignore + self.kg = KGConfig.create(**self.kg) # type: ignore + self.logging = LoggingConfig.create(**self.logging) # type: ignore + if "chunking_config" not in self.parsing: # type: ignore + self.parsing["chunking_config"] = self.chunking # type: ignore + self.parsing = ParsingConfig.create(**self.parsing) # type: ignore + self.prompt = PromptConfig.create(**self.prompt) # type: ignore + self.agent = AgentConfig.create(**self.agent) # type: ignore + self.file = FileConfig.create(**self.file) # type: ignore def _validate_config_section( self, config_data: dict[str, Any], section: str, keys: list @@ -149,7 +150,7 @@ def _validate_config_section( ) @classmethod - def from_toml(cls, config_path: str = None) -> "R2RConfig": + def from_toml(cls, config_path: Optional[str] = None) -> "R2RConfig": if config_path is None: config_path = R2RConfig.default_config_path @@ -192,7 +193,7 @@ def _serialize_config(config_section: Any) -> dict: for k, v in config_section.items() } elif isinstance(config_section, (list, tuple)): - return [ + return [ # type: ignore R2RConfig._serialize_config(item) for item in config_section ] elif isinstance(config_section, Enum): diff --git a/py/core/main/hatchet/base.py b/py/core/main/hatchet/base.py index 6718100e0..d38a51d52 100644 --- a/py/core/main/hatchet/base.py +++ b/py/core/main/hatchet/base.py @@ -1,8 +1,6 @@ from hatchet_sdk import Hatchet try: - import os - r2r_hatchet = Hatchet() except ImportError: r2r_hatchet = None diff --git a/py/core/main/hatchet/ingestion_workflow.py b/py/core/main/hatchet/ingestion_workflow.py index 231e50ba2..3e4c11227 100644 --- a/py/core/main/hatchet/ingestion_workflow.py +++ b/py/core/main/hatchet/ingestion_workflow.py @@ -38,21 +38,6 @@ async def parse(self, context: Context) -> dict: status=IngestionStatus.PARSING, ) - return { - "status": "Successfully parsed file", - "document_info": document_info.to_dict(), - } - - @r2r_hatchet.step(parents=["parse"], timeout="60m") - async def extract(self, context: Context) -> dict: - document_info_dict = context.step_output("parse")["document_info"] - document_info = DocumentInfo(**document_info_dict) - - await self.ingestion_service.update_document_status( - document_info, - status=IngestionStatus.EXTRACTING, - ) - extractions_generator = await self.ingestion_service.parse_file( document_info ) @@ -71,9 +56,9 @@ async def extract(self, context: Context) -> dict: "document_info": document_info.to_dict(), } - @r2r_hatchet.step(parents=["extract"], timeout="60m") + @r2r_hatchet.step(parents=["parse"], timeout="60m") async def chunk(self, context: Context) -> dict: - document_info_dict = context.step_output("extract")["document_info"] + document_info_dict = context.step_output("parse")["document_info"] document_info = DocumentInfo(**document_info_dict) await self.ingestion_service.update_document_status( @@ -81,7 +66,7 @@ async def chunk(self, context: Context) -> dict: status=IngestionStatus.CHUNKING, ) - extractions = context.step_output("extract")["extractions"] + extractions = context.step_output("parse")["extractions"] chunking_config = context.workflow_input()["request"].get( "chunking_config" ) @@ -128,7 +113,7 @@ async def embed(self, context: Context) -> dict: status=IngestionStatus.STORING, ) - storage_generator = await self.ingestion_service.store_embeddings( + storage_generator = await self.ingestion_service.store_embeddings( # type: ignore embeddings ) @@ -224,11 +209,12 @@ async def update_files(self, context: Context) -> None: message="Number of ids does not match number of files.", ) - documents_overview = await self.ingestion_service.providers.database.relational.get_documents_overview( - filter_document_ids=document_ids, - filter_user_ids=None if user.is_superuser else [user.id], - ) - + documents_overview = ( + await self.ingestion_service.providers.database.relational.get_documents_overview( + filter_document_ids=document_ids, + filter_user_ids=None if user.is_superuser else [user.id], + ) + )["results"] if len(documents_overview) != len(document_ids): raise R2RException( status_code=404, diff --git a/py/core/main/hatchet/restructure_workflow.py b/py/core/main/hatchet/restructure_workflow.py index ce10ebf70..539624e19 100644 --- a/py/core/main/hatchet/restructure_workflow.py +++ b/py/core/main/hatchet/restructure_workflow.py @@ -3,11 +3,11 @@ import logging import uuid -from hatchet_sdk import Context +from hatchet_sdk import ConcurrencyLimitStrategy, Context from core import GenerationConfig, IngestionStatus, KGCreationSettings from core.base import R2RDocumentProcessingError -from core.base.abstractions.document import RestructureStatus +from core.base.abstractions import RestructureStatus from ..services import RestructureService from .base import r2r_hatchet @@ -21,7 +21,7 @@ def __init__(self, restructure_service: RestructureService): self.restructure_service = restructure_service @r2r_hatchet.step(retries=3, timeout="60m") - async def kg_extract_and_store(self, context: Context) -> None: + async def kg_extract_and_store(self, context: Context) -> dict: input_data = context.workflow_input()["request"] document_id = uuid.UUID(input_data["document_id"]) fragment_merge_count = input_data["fragment_merge_count"] @@ -29,11 +29,10 @@ async def kg_extract_and_store(self, context: Context) -> None: entity_types = input_data["entity_types"] relation_types = input_data["relation_types"] - document_overview = ( - await self.restructure_service.providers.database.relational.get_documents_overview( - filter_document_ids=[document_id] - ) - )[0] + document_overview = await self.restructure_service.providers.database.relational.get_documents_overview( + filter_document_ids=[document_id] + ) + document_overview = document_overview["results"][0] try: @@ -88,7 +87,6 @@ async def kg_extract_and_store(self, context: Context) -> None: document_id=document_id, ) - return {"result": None} @@ -98,15 +96,21 @@ def __init__(self, restructure_service: RestructureService): self.restructure_service = restructure_service @r2r_hatchet.step(retries=1) - async def kg_extraction_ingress(self, context: Context) -> None: + async def kg_extraction_ingress(self, context: Context) -> dict: input_data = context.workflow_input()["request"] + kg_creation_settings = KGCreationSettings( **json.loads(input_data["kg_creation_settings"]) ) + documents_overview = ( + await self.restructure_service.providers.database.relational.get_documents_overview() + ) + documents_overview = documents_overview["results"] + document_ids = [ doc.id - for doc in await self.restructure_service.providers.database.relational.get_documents_overview() + for doc in documents_overview if doc.restructuring_status != IngestionStatus.SUCCESS ] @@ -115,6 +119,7 @@ async def kg_extraction_ingress(self, context: Context) -> None: documents_overviews = await self.restructure_service.providers.database.relational.get_documents_overview( filter_document_ids=document_ids ) + documents_overviews = documents_overviews["results"] # Only run if restructuring_status is pending or failure filtered_document_ids = [] @@ -123,6 +128,7 @@ async def kg_extraction_ingress(self, context: Context) -> None: if restructuring_status in [ RestructureStatus.PENDING, RestructureStatus.FAILURE, + RestructureStatus.ENRICHMENT_FAILURE, ]: filtered_document_ids.append(document_overview.id) elif restructuring_status == RestructureStatus.SUCCESS: @@ -183,7 +189,7 @@ def __init__(self, restructure_service: RestructureService): self.restructure_service = restructure_service @r2r_hatchet.step(retries=3, timeout="60m") - async def kg_node_creation(self, context: Context) -> None: + async def kg_node_creation(self, context: Context) -> dict: input_data = context.workflow_input()["request"] max_description_input_length = input_data[ "max_description_input_length" @@ -194,7 +200,7 @@ async def kg_node_creation(self, context: Context) -> None: return {"result": None} @r2r_hatchet.step(retries=3, parents=["kg_node_creation"], timeout="60m") - async def kg_clustering(self, context: Context) -> None: + async def kg_clustering(self, context: Context) -> dict: input_data = context.workflow_input()["request"] skip_clustering = input_data["skip_clustering"] force_enrichment = input_data["force_enrichment"] @@ -208,6 +214,7 @@ async def kg_clustering(self, context: Context) -> None: documents_overview = ( await self.restructure_service.providers.database.relational.get_documents_overview() ) + documents_overview = documents_overview["results"] if not force_enrichment: if any( @@ -251,6 +258,7 @@ async def kg_clustering(self, context: Context) -> None: result = results[0] + # Run community summary workflows workflows = [] for level, community_id in result["intermediate_communities"]: logger.info( @@ -272,10 +280,6 @@ async def kg_clustering(self, context: Context) -> None: ) results = await asyncio.gather(*workflows) - logger.info( - f"KG Community Summary Workflows completed: {len(results)}" - ) - else: logger.info( "Skipping Leiden clustering as skip_clustering is True, also skipping community summary workflows" @@ -287,6 +291,7 @@ async def kg_clustering(self, context: Context) -> None: documents_overview = ( await self.restructure_service.providers.database.relational.get_documents_overview() ) + documents_overview = documents_overview["results"] for document_overview in documents_overview: if ( document_overview.restructuring_status @@ -301,14 +306,14 @@ async def kg_clustering(self, context: Context) -> None: logger.error( f"Error in kg_clustering for document {document_overview.id}: {str(e)}" ) - - return {"result": None} + raise e finally: documents_overview = ( await self.restructure_service.providers.database.relational.get_documents_overview() ) + documents_overview = documents_overview["results"] for document_overview in documents_overview: if ( document_overview.restructuring_status @@ -330,7 +335,7 @@ def __init__(self, restructure_service: RestructureService): self.restructure_service = restructure_service @r2r_hatchet.step(retries=1, timeout="60m") - async def kg_community_summary(self, context: Context) -> None: + async def kg_community_summary(self, context: Context) -> dict: input_data = context.workflow_input()["request"] community_id = input_data["community_id"] level = input_data["level"] diff --git a/py/core/main/services/auth_service.py b/py/core/main/services/auth_service.py index 078835264..57029bcbc 100644 --- a/py/core/main/services/auth_service.py +++ b/py/core/main/services/auth_service.py @@ -3,7 +3,7 @@ from uuid import UUID from core.base import R2RException, RunLoggingSingleton, RunManager, Token -from core.base.api.models.auth.responses import UserResponse +from core.base.api.models import UserResponse from core.telemetry.telemetry_decorator import telemetry_event from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders @@ -33,11 +33,13 @@ def __init__( ) @telemetry_event("RegisterUser") - async def register(self, email: str, password: str) -> UserResponse: + async def register(self, email: str, password: str) -> dict[str, str]: return await self.providers.auth.register(email, password) @telemetry_event("VerifyEmail") - async def verify_email(self, email: str, verification_code: str) -> bool: + async def verify_email( + self, email: str, verification_code: str + ) -> dict[str, str]: if not self.config.auth.require_email_verification: raise R2RException( @@ -70,7 +72,7 @@ async def login(self, email: str, password: str) -> dict[str, Token]: @telemetry_event("GetCurrentUser") async def user(self, token: str) -> UserResponse: - token_data = self.providers.auth.decode_token(token) + token_data = await self.providers.auth.decode_token(token) user = await self.providers.database.relational.get_user_by_email( token_data.email ) @@ -138,7 +140,7 @@ async def update_user( async def delete_user( self, user_id: UUID, - password: Optional[str] = None, + password: str, delete_vector_data: bool = False, is_superuser: bool = False, ) -> dict[str, str]: @@ -147,7 +149,7 @@ async def delete_user( raise R2RException(status_code=404, message="User not found") if not ( is_superuser - or self.providers.auth.crypto_provider.verify_password( + or self.providers.auth.crypto_provider.verify_password( # type: ignore password, user.hashed_password ) ): @@ -160,7 +162,9 @@ async def delete_user( @telemetry_event("CleanExpiredBlacklistedTokens") async def clean_expired_blacklisted_tokens( - self, max_age_hours: int = 7 * 24, current_time: datetime = None + self, + max_age_hours: int = 7 * 24, + current_time: Optional[datetime] = None, ): await self.providers.database.relational.clean_expired_blacklisted_tokens( max_age_hours, current_time diff --git a/py/core/main/services/ingestion_service.py b/py/core/main/services/ingestion_service.py index 448f55e31..a0a3ac276 100644 --- a/py/core/main/services/ingestion_service.py +++ b/py/core/main/services/ingestion_service.py @@ -1,7 +1,7 @@ import json import logging from datetime import datetime -from typing import Any, Optional, Union +from typing import Any, AsyncGenerator, Optional, Sequence, Union from uuid import UUID from core.base import ( @@ -17,10 +17,10 @@ VectorEntry, decrement_version, ) +from core.base.api.models import UserResponse from core.base.providers import ChunkingConfig from core.telemetry.telemetry_decorator import telemetry_event -from ...base.api.models.auth.responses import UserResponse from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders from ..config import R2RConfig from .base import Service @@ -58,11 +58,11 @@ async def ingest_file_ingress( self, file_data: dict, user: UserResponse, + document_id: UUID, + size_in_bytes, metadata: Optional[dict] = None, - document_id: Optional[UUID] = None, version: Optional[str] = None, is_update: bool = False, - size_in_bytes: Optional[int] = None, *args: Any, **kwargs: Any, ) -> dict: @@ -88,21 +88,25 @@ async def ingest_file_ingress( size_in_bytes, ) - if existing_document_info := await self.providers.database.relational.get_documents_overview( - filter_user_ids=[user.id], - filter_document_ids=[document_id], - ): - existing_doc = existing_document_info[0] + existing_document_info = ( + await self.providers.database.relational.get_documents_overview( + filter_user_ids=[user.id], + filter_document_ids=[document_id], + ) + ) + if documents := existing_document_info.get("documents", []): + existing_doc = documents[0] if is_update: if ( existing_doc.version >= version - and existing_doc.ingestion_status == "success" + and existing_doc.ingestion_status + == IngestionStatus.SUCCESS ): raise R2RException( status_code=409, message=f"Must increment version number before attempting to overwrite document {document_id}.", ) - elif existing_doc.ingestion_status != "failure": + elif existing_doc.ingestion_status != IngestionStatus.FAILURE: raise R2RException( status_code=409, message=f"Document {document_id} was already ingested and is not in a failed state.", @@ -144,7 +148,7 @@ def _create_document_info( metadata=metadata, version=version, size_in_bytes=size_in_bytes, - ingestion_status="pending", + ingestion_status=IngestionStatus.PENDING, created_at=datetime.now(), updated_at=datetime.now(), ) @@ -152,33 +156,29 @@ def _create_document_info( async def parse_file( self, document_info: DocumentInfo, - ) -> list[DocumentFragment]: - file_name, file_wrapper, size_in_bytes = ( - await self.providers.file.retrieve_file(document_info.id) + ) -> AsyncGenerator[DocumentFragment, None]: + return await self.pipes.parsing_pipe.run( + input=self.pipes.parsing_pipe.Input( + message=Document( + id=document_info.id, + collection_ids=document_info.collection_ids, + user_id=document_info.user_id, + type=document_info.type, + metadata={ + "document_type": document_info.type.value, + **document_info.metadata, + }, + ) + ), + state=None, + run_manager=self.run_manager, ) - with file_wrapper as file_content_stream: - return await self.pipes.parsing_pipe.run( - input=self.pipes.parsing_pipe.Input( - message=Document( - id=document_info.id, - collection_ids=document_info.collection_ids, - user_id=document_info.user_id, - type=document_info.type, - metadata={ - "document_type": document_info.type.value, - **document_info.metadata, - }, - ) - ), - run_manager=self.run_manager, - ) - async def chunk_document( self, parsed_documents: list[dict], - chunking_config: Optional[ChunkingConfig] = None, - ) -> list[DocumentFragment]: + chunking_config: ChunkingConfig, + ) -> AsyncGenerator[DocumentFragment, None]: return await self.pipes.chunking_pipe.run( input=self.pipes.chunking_pipe.Input( @@ -187,6 +187,7 @@ async def chunk_document( for chunk in parsed_documents ] ), + state=None, run_manager=self.run_manager, chunking_config=chunking_config, ) @@ -194,7 +195,7 @@ async def chunk_document( async def embed_document( self, chunked_documents: list[dict], - ) -> list[str]: + ) -> AsyncGenerator[VectorEntry, None]: return await self.pipes.embedding_pipe.run( input=self.pipes.embedding_pipe.Input( message=[ @@ -202,13 +203,14 @@ async def embed_document( for chunk in chunked_documents ] ), + state=None, run_manager=self.run_manager, ) async def store_embeddings( self, - embeddings: list[Union[dict, VectorEntry]], - ) -> list[str]: + embeddings: Sequence[Union[dict, VectorEntry]], + ) -> AsyncGenerator[str, None]: vector_entries = [ ( embedding @@ -220,6 +222,7 @@ async def store_embeddings( return await self.pipes.vector_storage_pipe.run( input=self.pipes.vector_storage_pipe.Input(message=vector_entries), + state=None, run_manager=self.run_manager, ) diff --git a/py/core/main/services/management_service.py b/py/core/main/services/management_service.py index 7cddd5111..f8aa19255 100644 --- a/py/core/main/services/management_service.py +++ b/py/core/main/services/management_service.py @@ -14,6 +14,7 @@ R2RException, RunLoggingSingleton, RunManager, + RunType, ) from core.telemetry.telemetry_decorator import telemetry_event @@ -50,7 +51,7 @@ async def logs( self, offset: int = 0, limit: int = 100, - run_type_filter: Optional[str] = None, + run_type_filter: Optional[RunType] = None, ): if self.logging_connection is None: raise R2RException( @@ -128,7 +129,7 @@ async def analytics( else log.get("key") == value ) - log_processor = LogProcessor(filters) + log_processor = LogProcessor(filters) # type: ignore for log in logs: if "entries" in log and isinstance(log["entries"], list): log_processor.process_log(log) @@ -198,7 +199,7 @@ async def score_completion( self, message_id: UUID, score: float = 0.0, - run_type_filter: str = None, + run_type_filter: Optional[RunType] = None, max_runs: int = 100, *args: Any, **kwargs: Any, @@ -260,7 +261,7 @@ async def users_overview( @telemetry_event("Delete") async def delete( self, - filters: dict[str, str], + filters: dict[str, Any], *args, **kwargs, ): @@ -282,7 +283,7 @@ async def delete( logger.error(f"Error deleting from vector database: {e}") vector_delete_results = {} - document_ids_to_purge = set() + document_ids_to_purge: set[UUID] = set() if vector_delete_results: document_ids_to_purge.update( doc_id @@ -296,14 +297,16 @@ async def delete( relational_filters = {} if "document_id" in filters: relational_filters["filter_document_ids"] = [ - UUID(filters["document_id"]) + UUID(filters["document_id"]["$eq"]) ] if "user_id" in filters: - relational_filters["filter_user_ids"] = [UUID(filters["user_id"])] + relational_filters["filter_user_ids"] = [ + UUID(filters["user_id"]["$eq"]) + ] if "collection_ids" in filters: relational_filters["filter_collection_ids"] = [ UUID(collection_id) - for collection_id in filters["collection_ids"] + for collection_id in filters["collection_ids"]["$in"] ] try: @@ -317,7 +320,9 @@ async def delete( documents_overview = [] if documents_overview: - document_ids_to_purge.update(doc.id for doc in documents_overview) + document_ids_to_purge.update( + doc.id for doc in documents_overview["results"] + ) if not document_ids_to_purge: raise R2RException( @@ -354,7 +359,7 @@ async def documents_overview( collection_ids: Optional[list[UUID]] = None, document_ids: Optional[list[UUID]] = None, offset: Optional[int] = 0, - limit: Optional[int] = 100, + limit: Optional[int] = 1000, *args: Any, **kwargs: Any, ): @@ -379,18 +384,6 @@ async def document_chunks( document_id, offset=offset, limit=limit ) - @telemetry_event("UpdatePrompt") - async def update_prompt( - self, - name: str, - template: Optional[str] = None, - input_types: Optional[dict[str, str]] = None, - ): - if input_types is None: - input_types = {} - self.providers.prompt.update_prompt(name, template, input_types) - return {"message": f"Prompt '{name}' updated successfully."} - @telemetry_event("InspectKnowledgeGraph") async def inspect_knowledge_graph( self, @@ -477,7 +470,7 @@ async def inspect_knowledge_graph( message=f"An error occurred while fetching relationships: {str(e)}", ) - @telemetry_event("AssignDocumentToGroup") + @telemetry_event("AssignDocumentToCollection") async def assign_document_to_collection( self, document_id: str, collection_id: UUID ): @@ -490,7 +483,7 @@ async def assign_document_to_collection( ) return {"message": "Document assigned to collection successfully"} - @telemetry_event("RemoveDocumentFromGroup") + @telemetry_event("RemoveDocumentFromCollection") async def remove_document_from_collection( self, document_id: str, collection_id: UUID ): @@ -502,26 +495,21 @@ async def remove_document_from_collection( ) return {"message": "Document removed from collection successfully"} - @telemetry_event("DocumentGroups") + @telemetry_event("DocumentCollections") async def document_collections( self, document_id: str, offset: int = 0, limit: int = 100 ): - collection_ids = ( - await self.providers.database.relational.document_collections( - document_id, offset=offset, limit=limit - ) + return await self.providers.database.relational.document_collections( + document_id, offset=offset, limit=limit ) - return { - "collection_ids": [ - str(collection_id) for collection_id in collection_ids - ] - } def _process_relationships( self, relationships: list[Tuple[str, str, str]] ) -> Tuple[Dict[str, list[str]], Dict[str, Dict[str, list[str]]]]: graph = defaultdict(list) - grouped = defaultdict(lambda: defaultdict(list)) + grouped: Dict[str, Dict[str, list[str]]] = defaultdict( + lambda: defaultdict(list) + ) for subject, relation, obj in relationships: graph[subject].append(obj) grouped[subject][relation].append(obj) @@ -602,7 +590,7 @@ def _get_central_nodes( } return sorted(centrality.items(), key=lambda x: x[1], reverse=True)[:5] - @telemetry_event("CreateGroup") + @telemetry_event("CreateCollection") async def create_collection( self, name: str, description: str = "" ) -> UUID: @@ -610,29 +598,32 @@ async def create_collection( name, description ) - @telemetry_event("GetGroup") + @telemetry_event("GetCollection") async def get_collection(self, collection_id: UUID) -> Optional[dict]: return await self.providers.database.relational.get_collection( collection_id ) - @telemetry_event("UpdateGroup") + @telemetry_event("UpdateCollection") async def update_collection( - self, collection_id: UUID, name: str = None, description: str = None + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, ) -> bool: return await self.providers.database.relational.update_collection( collection_id, name, description ) - @telemetry_event("DeleteGroup") + @telemetry_event("DeleteCollection") async def delete_collection(self, collection_id: UUID) -> bool: await self.providers.database.relational.delete_collection( collection_id ) - await self.providers.database.vector.delete_collection(collection_id) + self.providers.database.vector.delete_collection(collection_id) return True - @telemetry_event("ListGroups") + @telemetry_event("ListCollections") async def list_collections( self, offset: int = 0, limit: int = 100 ) -> list[dict]: @@ -640,7 +631,7 @@ async def list_collections( offset=offset, limit=limit ) - @telemetry_event("AddUserToGroup") + @telemetry_event("AddUserToCollection") async def add_user_to_collection( self, user_id: UUID, collection_id: UUID ) -> bool: @@ -648,7 +639,7 @@ async def add_user_to_collection( user_id, collection_id ) - @telemetry_event("RemoveUserFromGroup") + @telemetry_event("RemoveUserFromCollection") async def remove_user_from_collection( self, user_id: UUID, collection_id: UUID ) -> bool: @@ -656,7 +647,7 @@ async def remove_user_from_collection( user_id, collection_id ) - @telemetry_event("GetUsersInGroup") + @telemetry_event("GetUsersInCollection") async def get_users_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: @@ -666,7 +657,7 @@ async def get_users_in_collection( ) ) - @telemetry_event("GetGroupsForUser") + @telemetry_event("GetCollectionsForUser") async def get_collections_for_user( self, user_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: @@ -676,7 +667,7 @@ async def get_collections_for_user( ) ) - @telemetry_event("GroupsOverview") + @telemetry_event("CollectionsOverview") async def collections_overview( self, collection_ids: Optional[list[UUID]] = None, @@ -697,7 +688,7 @@ async def collections_overview( ) ) - @telemetry_event("GetDocumentsInGroup") + @telemetry_event("GetDocumentsInCollection") async def documents_in_collection( self, collection_id: UUID, offset: int = 0, limit: int = 100 ) -> list[dict]: @@ -707,14 +698,6 @@ async def documents_in_collection( ) ) - @telemetry_event("DocumentGroups") - async def document_collections( - self, document_id: str, offset: int = 0, limit: int = 100 - ) -> list[str]: - return await self.providers.database.relational.document_collections( - document_id, offset, limit - ) - @telemetry_event("AddPrompt") async def add_prompt( self, name: str, template: str, input_types: dict[str, str] @@ -731,10 +714,10 @@ async def get_prompt( prompt_name: str, inputs: Optional[dict[str, Any]] = None, prompt_override: Optional[str] = None, - ) -> str: + ) -> dict: try: return { - "message": await self.providers.prompt.get_prompt( + "message": self.providers.prompt.get_prompt( prompt_name, inputs, prompt_override ) } @@ -743,7 +726,7 @@ async def get_prompt( @telemetry_event("GetAllPrompts") async def get_all_prompts(self) -> dict[str, Prompt]: - return await self.providers.prompt.get_all_prompts() + return self.providers.prompt.get_all_prompts() @telemetry_event("UpdatePrompt") async def update_prompt( diff --git a/py/core/main/services/restructure_service.py b/py/core/main/services/restructure_service.py index c0731d75f..e38f912f2 100644 --- a/py/core/main/services/restructure_service.py +++ b/py/core/main/services/restructure_service.py @@ -62,11 +62,13 @@ async def kg_extract_and_store( "relation_types": relation_types, } ), + state=None, run_manager=self.run_manager, ) result_gen = await self.pipes.kg_storage_pipe.run( input=self.pipes.kg_storage_pipe.Input(message=triples), + state=None, run_manager=self.run_manager, ) @@ -76,6 +78,7 @@ async def kg_extract_and_store( async def kg_node_creation(self, max_description_input_length: int): node_extractions = await self.pipes.kg_node_extraction_pipe.run( input=self.pipes.kg_node_extraction_pipe.Input(message=None), + state=None, run_manager=self.run_manager, ) result_gen = await self.pipes.kg_node_description_pipe.run( @@ -85,6 +88,7 @@ async def kg_node_creation(self, max_description_input_length: int): "max_description_input_length": max_description_input_length, } ), + state=None, run_manager=self.run_manager, ) return await _collect_results(result_gen) @@ -98,6 +102,7 @@ async def kg_clustering(self, leiden_params, generation_config): "generation_config": generation_config, } ), + state=None, run_manager=self.run_manager, ) @@ -120,6 +125,7 @@ async def kg_community_summary( "max_summary_input_length": max_summary_input_length, } ), + state=None, run_manager=self.run_manager, ) return await _collect_results(summary_results) diff --git a/py/core/main/services/retrieval_service.py b/py/core/main/services/retrieval_service.py index da55c3806..add854b54 100644 --- a/py/core/main/services/retrieval_service.py +++ b/py/core/main/services/retrieval_service.py @@ -14,13 +14,13 @@ R2RException, RunLoggingSingleton, RunManager, + RunType, VectorSearchSettings, generate_id_from_label, manage_run, to_async_generator, ) -from core.base.api.models import RAGResponse, SearchResponse -from core.base.api.models.auth.responses import UserResponse +from core.base.api.models import RAGResponse, SearchResponse, UserResponse from core.telemetry.telemetry_decorator import telemetry_event from ..abstractions import R2RAgents, R2RPipelines, R2RPipes, R2RProviders @@ -60,7 +60,7 @@ async def search( *args, **kwargs, ) -> SearchResponse: - async with manage_run(self.run_manager, "search_app") as run_id: + async with manage_run(self.run_manager, RunType.RETRIEVAL) as run_id: t0 = time.time() if ( @@ -94,14 +94,17 @@ async def search( for filter, value in vector_search_settings.filters.items(): if isinstance(value, UUID): vector_search_settings.filters[filter] = str(value) - + merged_kwargs = { + "input": to_async_generator([query]), + "state": None, + "vector_search_settings": vector_search_settings, + "kg_search_settings": kg_search_settings, + "run_manager": self.run_manager, + **kwargs, + } results = await self.pipelines.search_pipeline.run( - input=to_async_generator([query]), - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - run_manager=self.run_manager, *args, - **kwargs, + **merged_kwargs, ) t1 = time.time() @@ -113,7 +116,7 @@ async def search( value=latency, ) - return results.dict() + return results.as_dict() @telemetry_event("RAG") async def rag( @@ -125,7 +128,7 @@ async def rag( *args, **kwargs, ) -> RAGResponse: - async with manage_run(self.run_manager, "rag_app") as run_id: + async with manage_run(self.run_manager, RunType.RETRIEVAL) as run_id: try: # TODO - Remove these transforms once we have a better way to handle this for ( @@ -158,14 +161,19 @@ async def rag( **kwargs, ) + merged_kwargs = { + "input": to_async_generator([query]), + "state": None, + "vector_search_settings": vector_search_settings, + "kg_search_settings": kg_search_settings, + "run_manager": self.run_manager, + "rag_generation_config": rag_generation_config, + **kwargs, + } + results = await self.pipelines.rag_pipeline.run( - input=to_async_generator([query]), - run_manager=self.run_manager, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - rag_generation_config=rag_generation_config, *args, - **kwargs, + **merged_kwargs, ) if len(results) == 0: @@ -221,17 +229,22 @@ async def stream_rag_response( ): async def stream_response(): async with manage_run(self.run_manager, "rag"): + merged_kwargs = { + "input": to_async_generator([query]), + "state": None, + "run_manager": self.run_manager, + "vector_search_settings": vector_search_settings, + "kg_search_settings": kg_search_settings, + "rag_generation_config": rag_generation_config, + "completion_record": completion_record, + **kwargs, + } + async for ( chunk ) in await self.pipelines.streaming_rag_pipeline.run( - input=to_async_generator([query]), - run_manager=self.run_manager, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, - rag_generation_config=rag_generation_config, - completion_record=completion_record, *args, - **kwargs, + **merged_kwargs, ): yield chunk @@ -249,7 +262,7 @@ async def agent( *args, **kwargs, ): - async with manage_run(self.run_manager, "agent_app") as run_id: + async with manage_run(self.run_manager, RunType.RETRIEVAL) as run_id: try: t0 = time.time() @@ -403,7 +416,7 @@ def prepare_agent_input( user: UserResponse, ) -> dict: return { - "messages": [message.to_dict() for message in messages], + "messages": [message.to_dict() for message in messages], # type: ignore "vector_search_settings": vector_search_settings.to_dict(), "kg_search_settings": kg_search_settings.to_dict(), "rag_generation_config": rag_generation_config.to_dict(), @@ -416,7 +429,7 @@ def prepare_agent_input( def parse_agent_input(data: dict): return { "messages": [ - Message.from_dict(message) for message in data["messages"] + Message.from_dict(message) for message in data["messages"] # type: ignore ], "vector_search_settings": VectorSearchSettings.from_dict( data["vector_search_settings"] diff --git a/py/core/parsers/media/__init__.py b/py/core/parsers/media/__init__.py index 81a227459..a56ec64a8 100644 --- a/py/core/parsers/media/__init__.py +++ b/py/core/parsers/media/__init__.py @@ -2,7 +2,11 @@ from .docx_parser import DOCXParser from .img_parser import ImageParser from .movie_parser import MovieParser -from .pdf_parser import PDFParser, PDFParserMarker, PDFParserUnstructured +from .pdf_parser import ( # type: ignore + PDFParser, + PDFParserMarker, + PDFParserUnstructured, +) from .ppt_parser import PPTParser __all__ = [ diff --git a/py/core/parsers/media/audio_parser.py b/py/core/parsers/media/audio_parser.py index 2b1017cf8..149b6352c 100644 --- a/py/core/parsers/media/audio_parser.py +++ b/py/core/parsers/media/audio_parser.py @@ -3,7 +3,6 @@ from core.base.parsers.base_parser import AsyncParser from core.parsers.media.openai_helpers import process_audio_with_openai -from core.telemetry.telemetry_decorator import telemetry_event class AudioParser(AsyncParser[bytes]): @@ -15,7 +14,7 @@ def __init__( self.api_base = api_base self.openai_api_key = os.environ.get("OPENAI_API_KEY") - async def ingest( + async def ingest( # type: ignore self, data: bytes, chunk_size: int = 1024 ) -> AsyncGenerator[str, None]: """Ingest audio data and yield a transcription.""" @@ -24,7 +23,7 @@ async def ingest( f.write(data) try: transcription_text = process_audio_with_openai( - open(temp_audio_path, "rb"), self.openai_api_key + open(temp_audio_path, "rb"), self.openai_api_key # type: ignore ) # split text into small chunks and yield them diff --git a/py/core/parsers/media/docx_parser.py b/py/core/parsers/media/docx_parser.py index 5993328a0..acf471d9a 100644 --- a/py/core/parsers/media/docx_parser.py +++ b/py/core/parsers/media/docx_parser.py @@ -1,7 +1,7 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser @@ -18,7 +18,7 @@ def __init__(self): "Error, `python-docx` is required to run `DOCXParser`. Please install it using `pip install python-docx`." ) - async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: # type: ignore """Ingest DOCX data and yield text from each paragraph.""" if isinstance(data, str): raise ValueError("DOCX data must be in bytes format.") diff --git a/py/core/parsers/media/img_parser.py b/py/core/parsers/media/img_parser.py index 89facbf39..29aeda54b 100644 --- a/py/core/parsers/media/img_parser.py +++ b/py/core/parsers/media/img_parser.py @@ -1,15 +1,11 @@ import base64 import logging import os -from io import BytesIO from typing import AsyncGenerator -from PIL import Image - -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser from core.parsers.media.openai_helpers import process_frame_with_openai -from core.telemetry.telemetry_decorator import telemetry_event logger = logging.getLogger(__name__) @@ -30,7 +26,7 @@ def __init__( self.api_base = api_base self.max_image_size = max_image_size - async def ingest( + async def ingest( # type: ignore self, data: DataType, chunk_size: int = 1024 ) -> AsyncGenerator[str, None]: """Ingest image data and yield a description.""" @@ -40,8 +36,8 @@ async def ingest( data = base64.b64encode(data).decode("utf-8") openai_text = process_frame_with_openai( - data, - self.openai_api_key, + data, # type: ignore + self.openai_api_key, # type: ignore self.model, self.max_tokens, self.api_base, diff --git a/py/core/parsers/media/movie_parser.py b/py/core/parsers/media/movie_parser.py index 4899b3cf8..b34df4cc5 100644 --- a/py/core/parsers/media/movie_parser.py +++ b/py/core/parsers/media/movie_parser.py @@ -46,7 +46,7 @@ def __init__( "Error, environment variable `OPENAI_API_KEY` is required to run `MovieParser`." ) - async def ingest( + async def ingest( # type: ignore self, data: bytes, chunk_size: int = 1024 ) -> AsyncGenerator[str, None]: """Ingest movie data and yield a description.""" @@ -57,13 +57,13 @@ async def ingest( raw_frames, audio_file = self.process_video(temp_video_path) for frame in raw_frames: frame_text = process_frame_with_openai( - frame, self.openai_api_key + frame, self.openai_api_key # type: ignore ) yield frame_text if audio_file: transcription_text = process_audio_with_openai( - audio_file, self.openai_api_key + audio_file, self.openai_api_key # type: ignore ) # split text into small chunks and yield them for i in range(0, len(transcription_text), chunk_size): diff --git a/py/core/parsers/media/openai_helpers.py b/py/core/parsers/media/openai_helpers.py index 02d7dcda8..7b3498df9 100644 --- a/py/core/parsers/media/openai_helpers.py +++ b/py/core/parsers/media/openai_helpers.py @@ -31,7 +31,7 @@ def process_frame_with_openai( }, { "type": "image_url", - "image_url": {"url": f"data:image/jpeg;base64,{data}"}, + "image_url": {"url": f"data:image/jpeg;base64,{data}"}, # type: ignore }, ], } diff --git a/py/core/parsers/media/pdf_parser.py b/py/core/parsers/media/pdf_parser.py index bbf0dd53d..e79eca1a1 100644 --- a/py/core/parsers/media/pdf_parser.py +++ b/py/core/parsers/media/pdf_parser.py @@ -1,3 +1,4 @@ +# type: ignore import asyncio import logging import string @@ -5,7 +6,7 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser logger = logging.getLogger(__name__) diff --git a/py/core/parsers/media/ppt_parser.py b/py/core/parsers/media/ppt_parser.py index 8ebf130f3..b623752ce 100644 --- a/py/core/parsers/media/ppt_parser.py +++ b/py/core/parsers/media/ppt_parser.py @@ -1,7 +1,7 @@ from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser @@ -18,7 +18,7 @@ def __init__(self): "Error, `python-pptx` is required to run `PPTParser`. Please install it using `pip install python-pptx`." ) - async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: + async def ingest(self, data: DataType) -> AsyncGenerator[str, None]: # type: ignore """Ingest PPT data and yield text from each slide.""" if isinstance(data, str): raise ValueError("PPT data must be in bytes format.") diff --git a/py/core/parsers/structured/__init__.py b/py/core/parsers/structured/__init__.py index a44b30662..7f2ce90f1 100644 --- a/py/core/parsers/structured/__init__.py +++ b/py/core/parsers/structured/__init__.py @@ -1,3 +1,4 @@ +# type: ignore from .csv_parser import CSVParser, CSVParserAdvanced from .json_parser import JSONParser from .xlsx_parser import XLSXParser, XLSXParserAdvanced diff --git a/py/core/parsers/structured/csv_parser.py b/py/core/parsers/structured/csv_parser.py index 2d7b40641..2a075eda8 100644 --- a/py/core/parsers/structured/csv_parser.py +++ b/py/core/parsers/structured/csv_parser.py @@ -1,6 +1,7 @@ +# type: ignore from typing import IO, AsyncGenerator, Optional, Union -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/parsers/structured/json_parser.py b/py/core/parsers/structured/json_parser.py index 9ade198dd..f2e18e81d 100644 --- a/py/core/parsers/structured/json_parser.py +++ b/py/core/parsers/structured/json_parser.py @@ -1,7 +1,8 @@ +# type: ignore import json from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/parsers/structured/xlsx_parser.py b/py/core/parsers/structured/xlsx_parser.py index a95d9d66d..502370708 100644 --- a/py/core/parsers/structured/xlsx_parser.py +++ b/py/core/parsers/structured/xlsx_parser.py @@ -1,7 +1,8 @@ +# type: ignore from io import BytesIO from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/parsers/text/__init__.py b/py/core/parsers/text/__init__.py index 0d88dcb31..8f85d0461 100644 --- a/py/core/parsers/text/__init__.py +++ b/py/core/parsers/text/__init__.py @@ -1,3 +1,4 @@ +# type: ignore from .html_parser import HTMLParser from .md_parser import MDParser from .text_parser import TextParser diff --git a/py/core/parsers/text/html_parser.py b/py/core/parsers/text/html_parser.py index a0f209439..61b59cf20 100644 --- a/py/core/parsers/text/html_parser.py +++ b/py/core/parsers/text/html_parser.py @@ -1,8 +1,9 @@ +# type: ignore from typing import AsyncGenerator from bs4 import BeautifulSoup -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/parsers/text/md_parser.py b/py/core/parsers/text/md_parser.py index efe129ac9..47c003057 100644 --- a/py/core/parsers/text/md_parser.py +++ b/py/core/parsers/text/md_parser.py @@ -1,8 +1,9 @@ +# type: ignore from typing import AsyncGenerator from bs4 import BeautifulSoup -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/parsers/text/text_parser.py b/py/core/parsers/text/text_parser.py index 7dbe9ac10..999eaace4 100644 --- a/py/core/parsers/text/text_parser.py +++ b/py/core/parsers/text/text_parser.py @@ -1,6 +1,7 @@ +# type: ignore from typing import AsyncGenerator -from core.base.abstractions.document import DataType +from core.base.abstractions import DataType from core.base.parsers.base_parser import AsyncParser diff --git a/py/core/pipelines/rag_pipeline.py b/py/core/pipelines/rag_pipeline.py index dc6604fbf..809acdb12 100644 --- a/py/core/pipelines/rag_pipeline.py +++ b/py/core/pipelines/rag_pipeline.py @@ -2,9 +2,12 @@ import logging from typing import Any, Optional -from ..base.abstractions.llm import GenerationConfig -from ..base.abstractions.search import KGSearchSettings, VectorSearchSettings -from ..base.api.models.auth.responses import UserResponse +from ..base.abstractions import ( + GenerationConfig, + KGSearchSettings, + VectorSearchSettings, +) +from ..base.logging import RunType from ..base.logging.run_logger import RunLoggingSingleton from ..base.logging.run_manager import RunManager, manage_run from ..base.pipeline.base_pipeline import AsyncPipeline @@ -23,13 +26,13 @@ def __init__( run_manager: Optional[RunManager] = None, ): super().__init__(pipe_logger, run_manager) - self._search_pipeline = None - self._rag_pipeline = None + self._search_pipeline: Optional[AsyncPipeline] = None + self._rag_pipeline: Optional[AsyncPipeline] = None - async def run( + async def run( # type: ignore self, input: Any, - state: Optional[AsyncState] = None, + state: Optional[AsyncState], run_manager: Optional[RunManager] = None, vector_search_settings: VectorSearchSettings = VectorSearchSettings(), kg_search_settings: KGSearchSettings = KGSearchSettings(), @@ -37,8 +40,16 @@ async def run( *args: Any, **kwargs: Any, ): + if not self._rag_pipeline: + raise ValueError( + "`_rag_pipeline` must be set before running the RAG pipeline" + ) self.state = state or AsyncState() - async with manage_run(run_manager): + # TODO - This feels anti-pattern. + run_manager = ( + run_manager or self.run_manager or RunManager(self.pipe_logger) + ) + async with manage_run(run_manager, RunType.RETRIEVAL): if not self._search_pipeline: raise ValueError( "`_search_pipeline` must be set before running the RAG pipeline" @@ -47,16 +58,19 @@ async def run( async def multi_query_generator(input): tasks = [] async for query in input: + input_kwargs = { + **kwargs, + "vector_search_settings": vector_search_settings, + "kg_search_settings": kg_search_settings, + } task = asyncio.create_task( self._search_pipeline.run( to_async_generator([query]), - state=state, - stream=False, - run_manager=run_manager, - vector_search_settings=vector_search_settings, - kg_search_settings=kg_search_settings, + state, + False, + run_manager, *args, - **kwargs, + **input_kwargs, ) ) tasks.append((query, task)) @@ -64,14 +78,18 @@ async def multi_query_generator(input): for query, task in tasks: yield (query, await task) + input_kwargs = { + **kwargs, + "rag_generation_config": rag_generation_config, + } + rag_results = await self._rag_pipeline.run( - input=multi_query_generator(input), - state=state, - stream=rag_generation_config.stream, - run_manager=run_manager, - rag_generation_config=rag_generation_config, + multi_query_generator(input), + state, + rag_generation_config.stream, + run_manager, *args, - **kwargs, + **input_kwargs, ) return rag_results diff --git a/py/core/pipelines/search_pipeline.py b/py/core/pipelines/search_pipeline.py index a9b3b3819..e763646d6 100644 --- a/py/core/pipelines/search_pipeline.py +++ b/py/core/pipelines/search_pipeline.py @@ -3,7 +3,7 @@ from asyncio import Queue from typing import Any, Optional -from ..base.abstractions.search import ( +from ..base.abstractions import ( AggregateSearchResult, KGSearchSettings, VectorSearchSettings, @@ -25,14 +25,14 @@ def __init__( run_manager: Optional[RunManager] = None, ): super().__init__(pipe_logger, run_manager) - self._parsing_pipe = None - self._vector_search_pipeline = None - self._kg_search_pipeline = None + self._parsing_pipe: Optional[AsyncPipe] = None + self._vector_search_pipeline: Optional[AsyncPipeline] = None + self._kg_search_pipeline: Optional[AsyncPipeline] = None - async def run( + async def run( # type: ignore self, input: Any, - state: Optional[AsyncState] = None, + state: Optional[AsyncState], stream: bool = False, run_manager: Optional[RunManager] = None, vector_search_settings: VectorSearchSettings = VectorSearchSettings(), @@ -51,8 +51,8 @@ async def run( ) run_manager = run_manager or self.run_manager async with manage_run(run_manager): - vector_search_queue = Queue() - kg_queue = Queue() + vector_search_queue: Queue[str] = Queue() + kg_queue: Queue[str] = Queue() async def enqueue_requests(): async for message in input: @@ -69,6 +69,9 @@ async def enqueue_requests(): # Start the embedding and KG pipelines in parallel if use_vector_search: + if not self._vector_search_pipeline: + raise ValueError("Vector search pipeline not found") + vector_search_task = asyncio.create_task( self._vector_search_pipeline.run( dequeue_requests(vector_search_queue), @@ -82,6 +85,8 @@ async def enqueue_requests(): ) if do_kg: + if not self._kg_search_pipeline: + raise ValueError("KG search pipeline not found") kg_task = asyncio.create_task( self._kg_search_pipeline.run( dequeue_requests(kg_queue), @@ -120,12 +125,22 @@ def add_pipe( if kg_extraction_pipe: if not self._kg_search_pipeline: self._kg_search_pipeline = AsyncPipeline() + if not self._kg_search_pipeline: + raise ValueError( + "KG search pipeline not found" + ) # for type hinting + self._kg_search_pipeline.add_pipe( pipe, add_upstream_outputs, *args, **kwargs ) elif vector_search_pipe: if not self._vector_search_pipeline: self._vector_search_pipeline = AsyncPipeline() + if not self._vector_search_pipeline: + raise ValueError( + "Vector search pipeline not found" + ) # for type hinting + self._vector_search_pipeline.add_pipe( pipe, add_upstream_outputs, *args, **kwargs ) diff --git a/py/core/pipes/__init__.py b/py/core/pipes/__init__.py index e02574bd2..88bd1149e 100644 --- a/py/core/pipes/__init__.py +++ b/py/core/pipes/__init__.py @@ -1,3 +1,4 @@ +from .abstractions.generator_pipe import GeneratorPipe from .abstractions.search_pipe import SearchPipe from .ingestion.chunking_pipe import ChunkingPipe from .ingestion.embedding_pipe import EmbeddingPipe @@ -8,7 +9,6 @@ from .kg.extraction import KGTriplesExtractionPipe from .kg.node_extraction import KGNodeDescriptionPipe, KGNodeExtractionPipe from .kg.storage import KGStoragePipe -from .other.web_search_pipe import WebSearchPipe from .retrieval.kg_search_pipe import KGSearchSearchPipe from .retrieval.multi_search import MultiSearchPipe from .retrieval.query_transform_pipe import QueryTransformPipe @@ -19,6 +19,7 @@ __all__ = [ "SearchPipe", + "GeneratorPipe", "EmbeddingPipe", "KGTriplesExtractionPipe", "KGNodeExtractionPipe", @@ -31,7 +32,6 @@ "StreamingSearchRAGPipe", "VectorSearchPipe", "VectorStoragePipe", - "WebSearchPipe", "KGStoragePipe", "KGClusteringPipe", "MultiSearchPipe", diff --git a/py/core/pipes/abstractions/generator_pipe.py b/py/core/pipes/abstractions/generator_pipe.py index edc230dc0..d45eb1146 100644 --- a/py/core/pipes/abstractions/generator_pipe.py +++ b/py/core/pipes/abstractions/generator_pipe.py @@ -5,16 +5,17 @@ from core.base import ( AsyncState, CompletionProvider, + CompletionRecord, PipeType, PromptProvider, RunLoggingSingleton, ) -from core.base.abstractions.llm import GenerationConfig +from core.base.abstractions import GenerationConfig from core.base.pipes.base_pipe import AsyncPipe class GeneratorPipe(AsyncPipe): - class Config(AsyncPipe.PipeConfig): + class PipeConfig(AsyncPipe.PipeConfig): name: str task_prompt: str system_prompt: str = "default_system" @@ -23,16 +24,16 @@ def __init__( self, llm_provider: CompletionProvider, prompt_provider: PromptProvider, + config: AsyncPipe.PipeConfig, type: PipeType = PipeType.GENERATOR, - config: Optional[Config] = None, pipe_logger: Optional[RunLoggingSingleton] = None, *args, **kwargs, ): super().__init__( - type=type, - config=config or self.Config(), - pipe_logger=pipe_logger, + config, + type, + pipe_logger, *args, **kwargs, ) @@ -46,6 +47,7 @@ async def _run_logic( state: AsyncState, run_id: UUID, rag_generation_config: GenerationConfig, + completion_record: Optional[CompletionRecord] = None, *args: Any, **kwargs: Any, ) -> AsyncGenerator[Any, None]: diff --git a/py/core/pipes/abstractions/search_pipe.py b/py/core/pipes/abstractions/search_pipe.py index 905a737b3..567e7811a 100644 --- a/py/core/pipes/abstractions/search_pipe.py +++ b/py/core/pipes/abstractions/search_pipe.py @@ -14,7 +14,7 @@ logger = logging.getLogger(__name__) -class SearchPipe(AsyncPipe): +class SearchPipe(AsyncPipe[VectorSearchResult]): class SearchConfig(AsyncPipe.PipeConfig): name: str = "default_vector_search" filters: dict = {} @@ -25,16 +25,16 @@ class Input(AsyncPipe.Input): def __init__( self, - pipe_logger: Optional[RunLoggingSingleton] = None, + config: AsyncPipe.PipeConfig, type: PipeType = PipeType.SEARCH, - config: Optional[AsyncPipe.PipeConfig] = None, + pipe_logger: Optional[RunLoggingSingleton] = None, *args, **kwargs, ): super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config, + config, + type, + pipe_logger, *args, **kwargs, ) @@ -43,8 +43,7 @@ def __init__( async def search( self, query: str, - filters: dict[str, Any] = {}, - limit: int = 10, + search_settings: Any, *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorSearchResult, None]: @@ -53,7 +52,7 @@ async def search( @abstractmethod async def _run_logic( self, - input: Input, + input: AsyncPipe.Input, state: AsyncState, run_id: UUID, *args: Any, diff --git a/py/core/pipes/ingestion/chunking_pipe.py b/py/core/pipes/ingestion/chunking_pipe.py index 41da6ef44..f5777d988 100644 --- a/py/core/pipes/ingestion/chunking_pipe.py +++ b/py/core/pipes/ingestion/chunking_pipe.py @@ -1,14 +1,14 @@ import logging -from typing import Any, AsyncGenerator, Optional, Union +from typing import Any, AsyncGenerator, Optional +from uuid import UUID from core.base import ( AsyncState, - ChunkingConfig, ChunkingProvider, DocumentExtraction, DocumentFragment, PipeType, - R2RDocumentProcessingError, + R2RChunkingConfig, RunLoggingSingleton, generate_id_from_label, ) @@ -18,36 +18,35 @@ logger = logging.getLogger(__name__) -class ChunkingPipe(AsyncPipe): +class ChunkingPipe(AsyncPipe[DocumentFragment]): class Input(AsyncPipe.Input): message: list[DocumentExtraction] def __init__( self, chunking_provider: ChunkingProvider, + config: AsyncPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config - or AsyncPipe.PipeConfig(name="default_chunking_pipe"), + config, + type, + pipe_logger, *args, **kwargs, ) self.default_chunking_provider = ( - chunking_provider or R2RChunkingProvider(ChunkingConfig()) + chunking_provider or R2RChunkingProvider(R2RChunkingConfig()) ) - async def _run_logic( + async def _run_logic( # type: ignore self, - input: Input, - state: Optional[AsyncState], - run_id: Any, + input: AsyncPipe.Input, + state: AsyncState, + run_id: UUID, *args: Any, **kwargs: Any, ) -> AsyncGenerator[DocumentFragment, None]: @@ -60,8 +59,7 @@ async def _run_logic( unstr_iteration = 0 # unstructured already chunks for item in input.message: iteration = 0 - async for chunk in chunking_provider.chunk(item): - + async for chunk in chunking_provider.chunk(item): # type: ignore if item.metadata.get("partitioned_by_unstructured", False): item.metadata["chunk_order"] = unstr_iteration unstr_iteration += 1 diff --git a/py/core/pipes/ingestion/embedding_pipe.py b/py/core/pipes/ingestion/embedding_pipe.py index 8dc4df30e..03a543608 100644 --- a/py/core/pipes/ingestion/embedding_pipe.py +++ b/py/core/pipes/ingestion/embedding_pipe.py @@ -17,7 +17,7 @@ logger = logging.getLogger(__name__) -class EmbeddingPipe(AsyncPipe): +class EmbeddingPipe(AsyncPipe[VectorEntry]): """ Embeds fragments using a specified embedding model. """ @@ -28,25 +28,24 @@ class Input(AsyncPipe.Input): def __init__( self, embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, embedding_batch_size: int = 1, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config - or AsyncPipe.PipeConfig(name="default_embedding_pipe"), + config, + type, + pipe_logger, ) self.embedding_provider = embedding_provider self.embedding_batch_size = embedding_batch_size async def embed(self, fragments: list[DocumentFragment]) -> list[float]: return await self.embedding_provider.async_get_embeddings( - [fragment.data for fragment in fragments], + [fragment.data for fragment in fragments], # type: ignore EmbeddingProvider.PipeStage.BASE, ) @@ -62,7 +61,7 @@ async def _process_batch( user_id=fragment.user_id, collection_ids=fragment.collection_ids, vector=Vector(data=raw_vector), - text=fragment.data, + text=fragment.data, # type: ignore metadata={ **fragment.metadata, }, @@ -70,14 +69,19 @@ async def _process_batch( for raw_vector, fragment in zip(vectors, fragment_batch) ] - async def _run_logic( + async def _run_logic( # type: ignore self, - input: Input, - state: Optional[AsyncState], + input: AsyncPipe.Input, + state: AsyncState, run_id: Any, *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorEntry, None]: + + if not isinstance(input, EmbeddingPipe.Input): + raise ValueError( + f"Invalid input type for embedding pipe: {type(input)}" + ) fragment_batch = [] batch_size = self.embedding_batch_size concurrent_limit = ( @@ -109,8 +113,8 @@ async def process_batch(batch): if fragment_batch: tasks.add(asyncio.create_task(process_batch(fragment_batch))) - for task in asyncio.as_completed(tasks): - for vector_entry in await task: + for future_task in asyncio.as_completed(tasks): + for vector_entry in await future_task: yield vector_entry finally: # Ensure all tasks are completed @@ -132,6 +136,11 @@ async def _process_fragment( self, fragment: DocumentFragment ) -> Union[VectorEntry, R2RDocumentProcessingError]: try: + if isinstance(fragment.data, bytes): + raise ValueError( + "Fragment data is in bytes format, which is not supported by the embedding provider." + ) + vectors = await self.embedding_provider.async_get_embeddings( [fragment.data], EmbeddingProvider.PipeStage.BASE, diff --git a/py/core/pipes/ingestion/parsing_pipe.py b/py/core/pipes/ingestion/parsing_pipe.py index 22ebef44b..539186c11 100644 --- a/py/core/pipes/ingestion/parsing_pipe.py +++ b/py/core/pipes/ingestion/parsing_pipe.py @@ -12,7 +12,7 @@ RunLoggingSingleton, generate_id_from_label, ) -from core.base.abstractions.exception import R2RDocumentProcessingError +from core.base.abstractions import R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe logger = logging.getLogger(__name__) @@ -26,17 +26,16 @@ def __init__( self, parsing_provider: ParsingProvider, file_provider: FileProvider, - pipe_logger: Optional[RunLoggingSingleton] = None, + config: AsyncPipe.PipeConfig, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, + pipe_logger: Optional[RunLoggingSingleton] = None, *args, **kwargs, ): super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config - or AsyncPipe.PipeConfig(name="default_document_parsing_pipe"), + config, + type, + pipe_logger, *args, **kwargs, ) @@ -50,14 +49,13 @@ async def _parse( version: str, ) -> AsyncGenerator[DocumentExtraction, None]: try: - file_name, file_wrapper, file_size = ( - await self.file_provider.retrieve_file(document.id) - ) + if result := await self.file_provider.retrieve_file(document.id): + file_name, file_wrapper, file_size = result with file_wrapper as file_content_stream: file_content = file_content_stream.read() - async for extraction in self.parsing_provider.parse( + async for extraction in self.parsing_provider.parse( # type: ignore file_content, document ): extraction_id = generate_id_from_label( @@ -72,10 +70,10 @@ async def _parse( error_message=f"Error parsing document: {str(e)}", ) - async def _run_logic( + async def _run_logic( # type: ignore self, - input: Input, - state: Optional[AsyncState], + input: AsyncPipe.Input, + state: AsyncState, run_id: UUID, *args, **kwargs, diff --git a/py/core/pipes/ingestion/vector_storage_pipe.py b/py/core/pipes/ingestion/vector_storage_pipe.py index 8667b3d1e..b7c8ead03 100644 --- a/py/core/pipes/ingestion/vector_storage_pipe.py +++ b/py/core/pipes/ingestion/vector_storage_pipe.py @@ -1,6 +1,5 @@ -import asyncio import logging -from typing import Any, AsyncGenerator, Optional, Tuple, Union +from typing import Any, AsyncGenerator, Optional from uuid import UUID from core.base import ( @@ -11,23 +10,22 @@ StorageResult, VectorEntry, ) -from core.base.abstractions.exception import R2RDocumentProcessingError from core.base.pipes.base_pipe import AsyncPipe logger = logging.getLogger(__name__) -class VectorStoragePipe(AsyncPipe): +class VectorStoragePipe(AsyncPipe[StorageResult]): class Input(AsyncPipe.Input): message: list[VectorEntry] def __init__( self, database_provider: DatabaseProvider, + config: AsyncPipe.PipeConfig, storage_batch_size: int = 128, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): @@ -35,9 +33,9 @@ def __init__( Initializes the async vector storage pipe with necessary components and configurations. """ super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config, + config, + type, + pipe_logger, *args, **kwargs, ) @@ -61,18 +59,16 @@ async def store( logger.error(error_message) raise ValueError(error_message) - async def _run_logic( + async def _run_logic( # type: ignore self, - input: Input, - state: Optional[AsyncState], + input: AsyncPipe.Input, + state: AsyncState, run_id: UUID, *args: Any, **kwargs: Any, - ) -> AsyncGenerator[ - Tuple[UUID, Union[str, R2RDocumentProcessingError]], None - ]: + ) -> AsyncGenerator[StorageResult, None]: vector_batch = [] - document_counts = {} + document_counts: dict[UUID, int] = {} for msg in input.message: vector_batch.append(msg) diff --git a/py/core/pipes/kg/clustering.py b/py/core/pipes/kg/clustering.py index e39570a03..9aa8ad312 100644 --- a/py/core/pipes/kg/clustering.py +++ b/py/core/pipes/kg/clustering.py @@ -1,4 +1,3 @@ -import json import logging from typing import Any, AsyncGenerator, Optional from uuid import UUID @@ -6,16 +5,13 @@ from core.base import ( AsyncPipe, AsyncState, - Community, CompletionProvider, EmbeddingProvider, - Entity, GenerationConfig, KGProvider, PipeType, PromptProvider, RunLoggingSingleton, - Triple, ) logger = logging.getLogger(__name__) @@ -32,9 +28,9 @@ def __init__( llm_provider: CompletionProvider, prompt_provider: PromptProvider, embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.OTHER, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): @@ -61,7 +57,7 @@ async def cluster_kg( """ num_communities, num_hierarchies, intermediate_communities = ( - self.kg_provider.perform_graph_clustering(leiden_params) + self.kg_provider.perform_graph_clustering(leiden_params) # type: ignore ) logger.info( @@ -74,7 +70,7 @@ async def cluster_kg( "intermediate_communities": intermediate_communities, } - async def _run_logic( + async def _run_logic( # type: ignore self, input: AsyncPipe.Input, state: AsyncState, @@ -87,7 +83,11 @@ async def _run_logic( """ leiden_params = input.message["leiden_params"] + if not leiden_params: + raise ValueError("Leiden parameters not provided.") generation_config = input.message["generation_config"] + if not generation_config: + raise ValueError("Generation config not provided.") base_dimension = self.embedding_provider.config.base_dimension vector_index_fn = self.kg_provider.create_vector_index diff --git a/py/core/pipes/kg/community_summary.py b/py/core/pipes/kg/community_summary.py index f90510036..d93adfaef 100644 --- a/py/core/pipes/kg/community_summary.py +++ b/py/core/pipes/kg/community_summary.py @@ -32,9 +32,9 @@ def __init__( llm_provider: CompletionProvider, prompt_provider: PromptProvider, embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.OTHER, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): @@ -54,7 +54,6 @@ def __init__( def community_summary_prompt( self, - prompt: str, entities: list[Entity], triples: list[Triple], max_summary_input_length: int, @@ -63,17 +62,26 @@ def community_summary_prompt( Preparing the list of entities and triples to be summarized and created into a community summary. """ entities_info = "\n".join( - [f"{entity.name}, {entity.description}" for entity in entities] + [ + f"{entity.id}, {entity.name}, {entity.description}" + for entity in entities + ] ) triples_info = "\n".join( [ - f"{triple.subject}, {triple.object}, {triple.predicate}, {triple.description}" + f"{triple.id}, {triple.subject}, {triple.object}, {triple.predicate}, {triple.description}" for triple in triples ] ) - prompt = prompt.format(entities=entities_info, triples=triples_info) + prompt = f""" + Entities: + {entities_info} + + Relationships: + {triples_info} + """ if len(prompt) > max_summary_input_length: logger.info( @@ -92,24 +100,6 @@ async def process_community( ) -> dict: """ Process a community by summarizing it and creating a summary embedding and storing it to a neo4j database. - - Input: - - level: The level of the hierarchy. - - community_id: The ID of the community to process. - - Output: - - A dictionary with the community id and the title of the community. - - Output format: {"id": community_id, "title": title} - """ - - input_text = """ - - Entities: - {entities} - - Triples: - {triples} - """ logger.info( @@ -117,22 +107,24 @@ async def process_community( ) entities, triples = ( - self.kg_provider.get_community_entities_and_triples( + self.kg_provider.get_community_entities_and_triples( # type: ignore level=level, community_id=community_id ) ) if entities == [] or triples == []: - return None + # TODO - Does this logic work well with the full workflow? + raise ValueError( + f"Community {community_id} at level {level} has no entities or triples." + ) description = ( ( await self.llm_provider.aget_completion( messages=self.prompt_provider._get_message_payload( - task_prompt_name="graphrag_community_reports", + task_prompt_name=self.kg_provider.config.kg_enrichment_settings.community_reports_prompt, task_inputs={ "input_text": self.community_summary_prompt( - input_text, entities, triples, max_summary_input_length, @@ -146,6 +138,11 @@ async def process_community( .message.content ) + if not description: + raise ValueError( + f"Failed to generate a summary for community {community_id} at level {level}." + ) + community = Community( id=str(community_id), level=str(level), @@ -155,7 +152,7 @@ async def process_community( ), ) - self.kg_provider.upsert_communities([community]) + self.kg_provider.upsert_communities([community]) # type: ignore try: summary = json.loads(community.summary) @@ -164,7 +161,7 @@ async def process_community( return {"id": community.id, "title": summary["title"]} - async def _run_logic( + async def _run_logic( # type: ignore self, input: AsyncPipe.Input, state: AsyncState, @@ -181,11 +178,16 @@ async def _run_logic( generation_config = input.message["generation_config"] max_summary_input_length = input.message["max_summary_input_length"] - community_summary = await self.process_community( - level=level, - community_id=community_id, - max_summary_input_length=max_summary_input_length, - generation_config=generation_config, - ) + try: + community_summary = await self.process_community( + level=level, + community_id=community_id, + max_summary_input_length=max_summary_input_length, + generation_config=generation_config, + ) - yield community_summary + yield community_summary + except Exception as e: + error_message = f"Failed to process community {community_id} at level {level}: {e}" + logger.error(error_message) + raise ValueError(error_message) diff --git a/py/core/pipes/kg/extraction.py b/py/core/pipes/kg/extraction.py index a30f5b731..80b47bd46 100644 --- a/py/core/pipes/kg/extraction.py +++ b/py/core/pipes/kg/extraction.py @@ -36,7 +36,9 @@ class ClientError(Exception): pass -class KGTriplesExtractionPipe(AsyncPipe): +class KGTriplesExtractionPipe( + AsyncPipe[Union[KGExtraction, R2RDocumentProcessingError]] +): """ Extracts knowledge graph information from document extractions. """ @@ -51,12 +53,12 @@ def __init__( llm_provider: CompletionProvider, prompt_provider: PromptProvider, chunking_provider: ChunkingProvider, + config: AsyncPipe.PipeConfig, kg_batch_size: int = 1, graph_rag: bool = True, id_prefix: str = "demo", pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): @@ -76,11 +78,6 @@ def __init__( self.pipe_run_info = None self.graph_rag = graph_rag - def map_to_str(self, fragments: list[DocumentFragment]) -> str: - # convert fragment to dict object - fragment = json.loads(json.dumps(fragment)) - return fragment - async def extract_kg( self, fragments: list[DocumentFragment], @@ -88,17 +85,18 @@ async def extract_kg( max_knowledge_triples: int, entity_types: list[str], relation_types: list[str], - retries: int = 3, + retries: int = 5, delay: int = 2, ) -> KGExtraction: """ Extracts NER triples from a fragment with retries. """ + # combine all fragments into a single string - combined_fragment = " ".join([fragment.data for fragment in fragments]) + combined_fragment: str = " ".join([fragment.data for fragment in fragments]) # type: ignore messages = self.prompt_provider._get_message_payload( - task_prompt_name=self.kg_provider.config.kg_extraction_prompt, + task_prompt_name=self.kg_provider.config.kg_creation_settings.kg_extraction_prompt, task_inputs={ "input": combined_fragment, "max_knowledge_triples": max_knowledge_triples, @@ -116,6 +114,12 @@ async def extract_kg( kg_extraction = response.choices[0].message.content + if not kg_extraction: + raise R2RException( + "No knowledge graph extraction found in the response string, the selected LLM likely failed to format it's response correctly.", + 400, + ) + entity_pattern = ( r'\("entity"\${4}([^$]+)\${4}([^$]+)\${4}([^$]+)\)' ) @@ -130,9 +134,13 @@ def parse_fn(response_str: str) -> Any: and len(entities) == 0 ): raise R2RException( - "No entities found in the response string, the selected LLM likely failed to format it's response correctly.", + f"No entities found in the response string, the selected LLM likely failed to format it's response correctly. {response_str}", 400, ) + # logger.warning( + # f"No entities found in the response string, the selected LLM likely failed to format it's response correctly. {response_str}", + # ) + relationships = re.findall( relationship_pattern, response_str ) @@ -164,7 +172,6 @@ def parse_fn(response_str: str) -> Any: # check if subject and object are in entities_dict relations_arr.append( Triple( - id=str(uuid.uuid4()), subject=subject, predicate=predicate, object=object, @@ -203,8 +210,7 @@ def parse_fn(response_str: str) -> Any: logger.error( f"Failed after retries with for fragment {fragments[0].id} of document {fragments[0].document_id}: {e}" ) - raise e - + # raise e # you should raise an error. # add metadata to entities and triples return KGExtraction( @@ -214,7 +220,7 @@ def parse_fn(response_str: str) -> Any: triples=[], ) - async def _run_logic( + async def _run_logic( # type: ignore self, input: Input, state: AsyncState, @@ -244,7 +250,9 @@ async def _run_logic( ) for extraction in self.database_provider.vector.get_document_chunks( document_id=document_id - ) + )[ + "results" + ] ] # sort the fragments accroding to chunk_order field in metadata in ascending order diff --git a/py/core/pipes/kg/node_extraction.py b/py/core/pipes/kg/node_extraction.py index c39958784..1e98963fa 100644 --- a/py/core/pipes/kg/node_extraction.py +++ b/py/core/pipes/kg/node_extraction.py @@ -15,7 +15,6 @@ PromptProvider, RunLoggingSingleton, ) -from core.base.abstractions.graph import Entity, Triple from core.base.pipes.base_pipe import AsyncPipe logger = logging.getLogger(__name__) @@ -34,23 +33,22 @@ def __init__( kg_provider: KGProvider, llm_provider: CompletionProvider, prompt_provider: PromptProvider, + config: AsyncPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.OTHER, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): super().__init__( pipe_logger=pipe_logger, type=type, - config=config - or AsyncPipe.PipeConfig(name="kg_node_extraction_pipe"), + config=config, ) self.kg_provider = kg_provider self.llm_provider = llm_provider self.prompt_provider = prompt_provider - async def _run_logic( + async def _run_logic( # type: ignore self, input: Input, state: AsyncState, @@ -59,7 +57,7 @@ async def _run_logic( **kwargs, ) -> AsyncGenerator[Any, None]: - nodes = self.kg_provider.get_entity_map() + nodes = self.kg_provider.get_entity_map() # type: ignore for _, node_info in nodes.items(): for entity in node_info["entities"]: @@ -81,25 +79,24 @@ def __init__( kg_provider: KGProvider, llm_provider: CompletionProvider, embedding_provider: EmbeddingProvider, + config: AsyncPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.OTHER, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): super().__init__( pipe_logger=pipe_logger, type=type, - config=config - or AsyncPipe.PipeConfig(name="kg_node_description_pipe"), + config=config, ) self.kg_provider = kg_provider self.llm_provider = llm_provider self.embedding_provider = embedding_provider - async def _run_logic( + async def _run_logic( # type: ignore self, - input: Input, + input: AsyncPipe.Input, state: AsyncState, run_id: UUID, *args: Any, diff --git a/py/core/pipes/kg/storage.py b/py/core/pipes/kg/storage.py index 160a66890..1f51a68dc 100644 --- a/py/core/pipes/kg/storage.py +++ b/py/core/pipes/kg/storage.py @@ -1,6 +1,6 @@ import asyncio import logging -from typing import Any, AsyncGenerator, List, Optional, Tuple +from typing import Any, AsyncGenerator, List, Optional from uuid import UUID from core.base import ( @@ -24,11 +24,11 @@ class Input(AsyncPipe.Input): def __init__( self, kg_provider: KGProvider, + config: AsyncPipe.PipeConfig, embedding_provider: Optional[EmbeddingProvider] = None, storage_batch_size: int = 1, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[AsyncPipe.PipeConfig] = None, *args, **kwargs, ): @@ -40,9 +40,9 @@ def __init__( ) super().__init__( - pipe_logger=pipe_logger, - type=type, - config=config, + config, + type, + pipe_logger, *args, **kwargs, ) @@ -53,7 +53,7 @@ def __init__( async def store( self, kg_extractions: list[KGExtraction], - ) -> Tuple[int, int]: + ) -> None: """ Stores a batch of knowledge graph extractions in the graph database. """ @@ -66,7 +66,7 @@ async def store( logger.error(error_message) raise ValueError(error_message) - async def _run_logic( + async def _run_logic( # type: ignore self, input: Input, state: AsyncState, @@ -79,7 +79,7 @@ async def _run_logic( """ batch_tasks = [] - kg_batch = [] + kg_batch: list[KGExtraction] = [] errors = [] async for kg_extraction in input.message: @@ -87,7 +87,7 @@ async def _run_logic( errors.append(kg_extraction) continue - kg_batch.append(kg_extraction) + kg_batch.append(kg_extraction) # type: ignore if len(kg_batch) >= self.storage_batch_size: # Schedule the storage task batch_tasks.append( diff --git a/py/core/pipes/other/web_search_pipe.py b/py/core/pipes/other/web_search_pipe.py deleted file mode 100644 index ee3e6ab2d..000000000 --- a/py/core/pipes/other/web_search_pipe.py +++ /dev/null @@ -1,114 +0,0 @@ -import json -import logging -import uuid -from typing import Any, AsyncGenerator, Optional - -from core.base import ( - AsyncPipe, - AsyncState, - PipeType, - VectorSearchResult, - generate_id_from_label, -) -from core.integrations import SerperClient - -from ..abstractions.search_pipe import SearchPipe - -logger = logging.getLogger(__name__) - - -class WebSearchPipe(SearchPipe): - def __init__( - self, - serper_client: SerperClient, - type: PipeType = PipeType.SEARCH, - config: Optional[SearchPipe.SearchConfig] = None, - *args, - **kwargs, - ): - super().__init__( - type=type, - config=config or SearchPipe.SearchConfig(), - *args, - **kwargs, - ) - self.serper_client = serper_client - - async def search( - self, - message: str, - run_id: uuid.UUID, - *args: Any, - **kwargs: Any, - ) -> AsyncGenerator[VectorSearchResult, None]: - search_limit_override = kwargs.get("search_limit", None) - await self.enqueue_log( - run_id=run_id, key="search_query", value=message - ) - # TODO - Make more general in the future by creating a SearchProvider interface - results = self.serper_client.get_raw( - query=message, - limit=search_limit_override or self.config.search_limit, - ) - - search_results = [] - for result in results: - if result.get("snippet") is None: - continue - text = result.pop("snippet") - search_result = VectorSearchResult( - fragment_id=generate_id_from_label(str(result)), - extraction_id=uuid.uuid5( - uuid.NAMESPACE_DNS, str(uuid.uuid4()) - ), # Generate a new UUID for extraction_id - document_id=uuid.uuid5( - uuid.NAMESPACE_DNS, str(uuid.uuid4()) - ), # Generate a new UUID for document_id - user_id=None, # Web search results don't have a user_id - collection_ids=[], # Web search results don't belong to any collection - score=result.get( - "score", 0 - ), # TODO - Consider dynamically generating scores based on similarity - text=text, - metadata=result, - ) - search_results.append(search_result) - yield search_result - - await self.enqueue_log( - run_id=run_id, - key="search_results", - value=json.dumps([ele.json() for ele in search_results]), - ) - - async def _run_logic( - self, - input: AsyncPipe.Input, - state: AsyncState, - run_id: uuid.UUID, - *args: Any, - **kwargs, - ) -> AsyncGenerator[VectorSearchResult, None]: - search_queries = [] - search_results = [] - async for search_request in input.message: - search_queries.append(search_request) - async for result in self.search( - message=search_request, run_id=run_id, *args, **kwargs - ): - search_results.append(result) - yield result - - await state.update( - self.config.name, {"output": {"search_results": search_results}} - ) - - await state.update( - self.config.name, - { - "output": { - "search_queries": search_queries, - "search_results": search_results, - } - }, - ) diff --git a/py/core/pipes/retrieval/kg_search_pipe.py b/py/core/pipes/retrieval/kg_search_pipe.py index ca890751d..461edca76 100644 --- a/py/core/pipes/retrieval/kg_search_pipe.py +++ b/py/core/pipes/retrieval/kg_search_pipe.py @@ -1,7 +1,7 @@ import asyncio import json import logging -from typing import Any, Optional, AsyncGenerator +from typing import Any, AsyncGenerator, Optional from uuid import UUID from core.base import ( @@ -9,20 +9,20 @@ CompletionProvider, EmbeddingProvider, KGProvider, - KGSearchSettings, PipeType, PromptProvider, - R2RException, RunLoggingSingleton, ) -from core.base.abstractions.search import ( - KGSearchResult, +from core.base.abstractions import ( + KGCommunityResult, + KGEntityResult, KGGlobalResult, + KGRelationshipResult, KGSearchMethod, + KGSearchResult, KGSearchResultType, - KGEntityResult, - KGRelationshipResult, - KGCommunityResult, + KGSearchSettings, + R2RException, ) from ..abstractions.generator_pipe import GeneratorPipe @@ -41,9 +41,9 @@ def __init__( llm_provider: CompletionProvider, prompt_provider: PromptProvider, embedding_provider: EmbeddingProvider, + config: GeneratorPipe.PipeConfig, pipe_logger: Optional[RunLoggingSingleton] = None, type: PipeType = PipeType.INGESTOR, - config: Optional[GeneratorPipe.PipeConfig] = None, *args, **kwargs, ): @@ -51,14 +51,11 @@ def __init__( Initializes the embedding pipe with necessary components and configurations. """ super().__init__( - llm_provider=llm_provider, - prompt_provider=prompt_provider, - type=type, - config=config - or GeneratorPipe.Config( - name="kg_rag_pipe", task_prompt="kg_search" - ), - pipe_logger=pipe_logger, + llm_provider, + prompt_provider, + config, + type, + pipe_logger, *args, **kwargs, ) @@ -123,7 +120,7 @@ async def local_search( # entity search search_type = "__Entity__" - async for search_result in self.kg_provider.vector_query( + async for search_result in self.kg_provider.vector_query( # type: ignore input, search_type=search_type, search_type_limits=kg_search_settings.local_search_limits[ @@ -137,7 +134,6 @@ async def local_search( "document_ids", ], ): - print(search_result) yield KGSearchResult( content=KGEntityResult( name=search_result["name"], @@ -152,7 +148,7 @@ async def local_search( # relationship search search_type = "__Relationship__" - async for search_result in self.kg_provider.vector_query( + async for search_result in self.kg_provider.vector_query( # type: ignore input, search_type=search_type, search_type_limits=kg_search_settings.local_search_limits[ @@ -180,7 +176,7 @@ async def local_search( # community search search_type = "__Community__" - async for search_result in self.kg_provider.vector_query( + async for search_result in self.kg_provider.vector_query( # type: ignore input, search_type=search_type, search_type_limits=kg_search_settings.local_search_limits[ @@ -195,16 +191,24 @@ async def local_search( # try loading it as a json try: + + if "```json" in summary: + summary = ( + summary.strip() + .removeprefix("```json") + .removesuffix("```") + .strip() + ) + summary_json = json.loads(summary) description = summary_json.get("summary", "") name = summary_json.get("title", "") - description += "\n\n" + "\n".join( - [ - finding["summary"] - for finding in summary_json.get("findings", []) - ] - ) + def get_str(finding): + if isinstance(finding, dict): + return f"{finding['summary']} => {finding['explanation']}" + else: + return str(finding) except json.JSONDecodeError: logger.warning(f"Summary is not valid JSON") @@ -216,7 +220,10 @@ async def local_search( ), method=KGSearchMethod.LOCAL, result_type=KGSearchResultType.COMMUNITY, - metadata={"associated_query": message}, + metadata={ + "associated_query": message, + "findings": summary_json.get("findings", ""), + }, ) async def global_search( @@ -231,7 +238,7 @@ async def global_search( # map reduce async for message in input.message: map_responses = [] - communities = self.kg_provider.get_communities( + communities = self.kg_provider.get_communities( # type: ignore level=kg_search_settings.kg_search_level ) @@ -258,13 +265,13 @@ async def preprocess_communities(communities): async def process_community(merged_report): output = await self.llm_provider.aget_completion( messages=self.prompt_provider._get_message_payload( - task_prompt_name="graphrag_map_system_prompt", + task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_map_system_prompt, task_inputs={ "context_data": merged_report, "input": message, }, ), - generation_config=kg_search_settings.kg_search_generation_config, + generation_config=kg_search_settings.generation_config, ) return output.choices[0].message.content @@ -293,27 +300,34 @@ async def process_community(merged_report): # reducing the outputs output = await self.llm_provider.aget_completion( messages=self.prompt_provider._get_message_payload( - task_prompt_name="graphrag_reduce_system_prompt", + task_prompt_name=self.kg_provider.config.kg_search_settings.graphrag_reduce_system_prompt, task_inputs={ "response_type": "multiple paragraphs", "report_data": filtered_responses, "input": message, }, ), - generation_config=kg_search_settings.kg_search_generation_config, + generation_config=kg_search_settings.generation_config, ) - output = output.choices[0].message.content + output_text = output.choices[0].message.content + + if not output_text: + logger.warning(f"No output generated for query: {message}.") + raise R2RException( + "No output generated for query.", + 400, + ) yield KGSearchResult( content=KGGlobalResult( - name="Global Result", description=output + name="Global Result", description=output_text ), method=KGSearchMethod.GLOBAL, - metadata={'associated_query': message}, + metadata={"associated_query": message}, ) - async def _run_logic( + async def _run_logic( # type: ignore self, input: GeneratorPipe.Input, state: AsyncState, @@ -325,14 +339,15 @@ async def _run_logic( kg_search_type = kg_search_settings.kg_search_type - if kg_search_type == "local": + # runs local and/or global search + if kg_search_type == "local" or kg_search_type == "local_and_global": logger.info("Performing KG local search") async for result in self.local_search( input, state, run_id, kg_search_settings ): yield result - else: + if kg_search_type == "global" or kg_search_type == "local_and_global": logger.info("Performing KG global search") async for result in self.global_search( input, state, run_id, kg_search_settings diff --git a/py/core/pipes/retrieval/multi_search.py b/py/core/pipes/retrieval/multi_search.py index 658588c61..ae93ea542 100644 --- a/py/core/pipes/retrieval/multi_search.py +++ b/py/core/pipes/retrieval/multi_search.py @@ -7,7 +7,7 @@ VectorSearchResult, VectorSearchSettings, ) -from core.base.pipes.base_pipe import AsyncPipe +from core.base.pipes.base_pipe import AsyncPipe, PipeType from ..abstractions.search_pipe import SearchPipe from .query_transform_pipe import QueryTransformPipe @@ -25,23 +25,29 @@ def __init__( self, query_transform_pipe: QueryTransformPipe, inner_search_pipe: SearchPipe, - config: Optional[PipeConfig] = None, + config: PipeConfig, *args, **kwargs, ): self.query_transform_pipe = query_transform_pipe self.vector_search_pipe = inner_search_pipe + config = config or MultiSearchPipe.PipeConfig( + name=query_transform_pipe.config.name + ) super().__init__( - config=config - or MultiSearchPipe.PipeConfig( - name=query_transform_pipe.config.name - ), + config, + PipeType.SEARCH, *args, **kwargs, ) + self._config: MultiSearchPipe.PipeConfig = config # for type hinting + + @property + def config(self) -> PipeConfig: + return self._config - async def _run_logic( + async def _run_logic( # type: ignore self, input: Any, state: Any, @@ -83,7 +89,7 @@ async def _run_logic( results.append(search_result) # Collection results by their associated queries - grouped_results = {} + grouped_results: dict[str, list[VectorSearchResult]] = {} for result in results: query = result.metadata["associated_query"] if query not in grouped_results: @@ -106,18 +112,19 @@ async def _run_logic( def reciprocal_rank_fusion( self, all_results: Dict[str, List[VectorSearchResult]] ) -> List[VectorSearchResult]: - document_scores = {} - document_results = {} - document_queries = {} + document_scores: dict[UUID, float] = {} + document_results: dict[UUID, VectorSearchResult] = {} + document_queries: dict[UUID, set[str]] = {} for query, results in all_results.items(): for rank, result in enumerate(results, 1): doc_id = result.fragment_id if doc_id not in document_scores: document_scores[doc_id] = 0 document_results[doc_id] = result - document_queries[doc_id] = set() + set_: set[str] = set() + document_queries[doc_id] = set_ document_scores[doc_id] += 1 / (rank + self.config.rrf_k) - document_queries[doc_id].add(query) + document_queries[doc_id].add(query) # type: ignore # Sort documents by their RRF score sorted_docs = sorted( @@ -132,7 +139,7 @@ def reciprocal_rank_fusion( rrf_score # Replace the original score with the RRF score ) result.metadata["associated_queries"] = list( - document_queries[doc_id] + document_queries[doc_id] # type: ignore ) # Add list of associated queries result.metadata["is_rrf_score"] = True if "associated_query" in result.metadata: diff --git a/py/core/pipes/retrieval/query_transform_pipe.py b/py/core/pipes/retrieval/query_transform_pipe.py index f689aa76b..138a3bf5a 100644 --- a/py/core/pipes/retrieval/query_transform_pipe.py +++ b/py/core/pipes/retrieval/query_transform_pipe.py @@ -1,5 +1,5 @@ import logging -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator from uuid import UUID from core.base import ( @@ -9,7 +9,7 @@ PipeType, PromptProvider, ) -from core.base.abstractions.llm import GenerationConfig +from core.base.abstractions import GenerationConfig from ..abstractions.generator_pipe import GeneratorPipe @@ -29,22 +29,27 @@ def __init__( self, llm_provider: CompletionProvider, prompt_provider: PromptProvider, + config: QueryTransformConfig, type: PipeType = PipeType.TRANSFORM, - config: Optional[QueryTransformConfig] = None, *args, **kwargs, ): logger.info(f"Initalizing an `QueryTransformPipe` pipe.") super().__init__( - llm_provider=llm_provider, - prompt_provider=prompt_provider, - type=type, - config=config or QueryTransformPipe.QueryTransformConfig(), + llm_provider, + prompt_provider, + config, + type, *args, **kwargs, ) + self._config: QueryTransformPipe.QueryTransformConfig = config - async def _run_logic( + @property + def config(self) -> QueryTransformConfig: # type: ignore + return self._config + + async def _run_logic( # type: ignore self, input: AsyncPipe.Input, state: AsyncState, @@ -75,6 +80,9 @@ async def _run_logic( generation_config=query_transform_generation_config, ) content = response.choices[0].message.content + if not content: + logger.error(f"Failed to transform query: {query}. Skipping.") + raise ValueError(f"Failed to transform query: {query}.") outputs = content.split("\n") outputs = [ output.strip() for output in outputs if output.strip() != "" diff --git a/py/core/pipes/retrieval/routing_search_pipe.py b/py/core/pipes/retrieval/routing_search_pipe.py index 926270659..72d7b7e2d 100644 --- a/py/core/pipes/retrieval/routing_search_pipe.py +++ b/py/core/pipes/retrieval/routing_search_pipe.py @@ -1,4 +1,4 @@ -from typing import Any, AsyncGenerator, Dict, Optional, Type +from typing import Any, AsyncGenerator, Dict from uuid import UUID from core.base import ( @@ -8,25 +8,21 @@ VectorSearchSettings, ) -from ..abstractions.search_pipe import SearchPipe - -class RoutingSearchPipe(SearchPipe): +class RoutingSearchPipe(AsyncPipe): def __init__( self, - search_pipes: Dict[str, SearchPipe], + search_pipes: Dict[str, AsyncPipe], default_strategy: str, - config: Optional[SearchPipe.SearchConfig] = None, + config: AsyncPipe.PipeConfig, *args, **kwargs, ): - super().__init__( - config=config or SearchPipe.SearchConfig(), *args, **kwargs - ) + super().__init__(config, *args, **kwargs) self.search_pipes = search_pipes self.default_strategy = default_strategy - async def _run_logic( + async def _run_logic( # type: ignore self, input: AsyncPipe.Input, state: AsyncState, @@ -38,8 +34,12 @@ async def _run_logic( search_pipe = self.search_pipes.get( vector_search_settings.search_strategy ) + if not search_pipe: + raise ValueError( + f"Search strategy {vector_search_settings.search_strategy} not found" + ) - async for result in search_pipe._run_logic( + async for result in search_pipe._run_logic( # type: ignore input, state, run_id, diff --git a/py/core/pipes/retrieval/search_rag_pipe.py b/py/core/pipes/retrieval/search_rag_pipe.py index 869114411..11bf10add 100644 --- a/py/core/pipes/retrieval/search_rag_pipe.py +++ b/py/core/pipes/retrieval/search_rag_pipe.py @@ -1,4 +1,5 @@ from typing import Any, AsyncGenerator, Optional, Tuple +from uuid import UUID from core.base import ( AggregateSearchResult, @@ -9,7 +10,7 @@ PipeType, PromptProvider, ) -from core.base.abstractions.llm import GenerationConfig, RAGCompletion +from core.base.abstractions import GenerationConfig, RAGCompletion from ..abstractions.generator_pipe import GeneratorPipe @@ -22,33 +23,35 @@ def __init__( self, llm_provider: CompletionProvider, prompt_provider: PromptProvider, + config: GeneratorPipe.PipeConfig, type: PipeType = PipeType.GENERATOR, - config: Optional[GeneratorPipe] = None, *args, **kwargs, ): super().__init__( - llm_provider=llm_provider, - prompt_provider=prompt_provider, - type=type, - config=config - or GeneratorPipe.Config( - name="default_rag_pipe", task_prompt="default_rag" - ), + llm_provider, + prompt_provider, + config, + type, *args, **kwargs, ) + self._config: GeneratorPipe.PipeConfig = config - async def _run_logic( + @property + def config(self) -> GeneratorPipe.PipeConfig: # for type hiting + return self._config + + async def _run_logic( # type: ignore self, input: Input, state: AsyncState, + run_id: UUID, rag_generation_config: GenerationConfig, completion_record: Optional[CompletionRecord] = None, *args: Any, **kwargs: Any, ) -> AsyncGenerator[RAGCompletion, None]: - run_id = kwargs.get("run_id") context = "" search_iteration = 1 total_results = 0 @@ -74,10 +77,13 @@ async def _run_logic( yield RAGCompletion(completion=response, search_results=search_results) if run_id: + content = response.choices[0].message.content + if not content: + raise ValueError("Response content is empty") await self.enqueue_log( run_id=run_id, key="llm_response", - value=response.choices[0].message.content, + value=content, ) async def _collect_context( @@ -101,9 +107,10 @@ async def _collect_context( context += f"Knowledge Graph ({iteration}):\n" it = total_results + 1 for search_results in results.kg_search_results: # [1]: - context += ( - f"Query: {search_results.metadata['associated_query']}\n\n" - ) + if associated_query := search_results.metadata.get( + "associated_query" + ): + context += f"Query: {associated_query}\n\n" context += f"Results:\n" for search_result in search_results: context += f"[{it}]: {search_result}\n\n" diff --git a/py/core/pipes/retrieval/streaming_rag_pipe.py b/py/core/pipes/retrieval/streaming_rag_pipe.py index 64160dbae..9f9d3cd47 100644 --- a/py/core/pipes/retrieval/streaming_rag_pipe.py +++ b/py/core/pipes/retrieval/streaming_rag_pipe.py @@ -1,4 +1,3 @@ -import json import logging from datetime import datetime from typing import Any, AsyncGenerator, Generator, Optional @@ -11,17 +10,17 @@ LLMChatCompletionChunk, PipeType, PromptProvider, + format_search_results_for_llm, format_search_results_for_stream, ) -from core.base.abstractions.llm import GenerationConfig +from core.base.abstractions import GenerationConfig from ..abstractions.generator_pipe import GeneratorPipe -from .search_rag_pipe import SearchRAGPipe logger = logging.getLogger(__name__) -class StreamingSearchRAGPipe(SearchRAGPipe): +class StreamingSearchRAGPipe(GeneratorPipe): VECTOR_SEARCH_STREAM_MARKER = ( "search" # TODO - change this to vector_search in next major release ) @@ -33,38 +32,42 @@ def __init__( self, llm_provider: CompletionProvider, prompt_provider: PromptProvider, + config: GeneratorPipe.PipeConfig, type: PipeType = PipeType.GENERATOR, - config: Optional[GeneratorPipe] = None, *args, **kwargs, ): super().__init__( - llm_provider=llm_provider, - prompt_provider=prompt_provider, - type=type, - config=config - or GeneratorPipe.Config( - name="default_streaming_rag_pipe", task_prompt="default_rag" - ), + llm_provider, + prompt_provider, + config, + type, *args, **kwargs, ) + self._config: GeneratorPipe.PipeConfig - async def _run_logic( + @property + def config(self) -> GeneratorPipe.PipeConfig: + return self._config + + async def _run_logic( # type: ignore self, - input: SearchRAGPipe.Input, + input: GeneratorPipe.Input, state: AsyncState, + run_id: UUID, rag_generation_config: GenerationConfig, completion_record: Optional[CompletionRecord] = None, *args: Any, **kwargs: Any, ) -> AsyncGenerator[str, None]: - run_id = kwargs.get("run_id") context = "" async for query, search_results in input.message: result = format_search_results_for_stream(search_results) yield result - context += result + gen_context = format_search_results_for_llm(search_results) + print("gen_context = ", gen_context) + context += gen_context messages = self.prompt_provider._get_message_payload( system_prompt_name=self.config.system_prompt, @@ -77,12 +80,15 @@ async def _run_logic( for chunk in self.llm_provider.get_completion_stream( messages=messages, generation_config=rag_generation_config ): - chunk = StreamingSearchRAGPipe._process_chunk(chunk) - response += chunk - yield chunk + chunk_txt = StreamingSearchRAGPipe._process_chunk(chunk) + response += chunk_txt + yield chunk_txt yield f"" - + if not completion_record: + raise ValueError( + "Completion record is expected in the streaming RAG pipe and is used for logging." + ) completion_record.search_results = search_results completion_record.llm_response = response completion_record.completion_end_time = datetime.now() @@ -93,7 +99,7 @@ async def _yield_chunks( start_marker: str, chunks: Generator[str, None, None], end_marker: str, - ) -> str: + ) -> AsyncGenerator[str, None]: yield start_marker for chunk in chunks: yield chunk diff --git a/py/core/pipes/retrieval/vector_search_pipe.py b/py/core/pipes/retrieval/vector_search_pipe.py index 9c11e2e07..4a374ed68 100644 --- a/py/core/pipes/retrieval/vector_search_pipe.py +++ b/py/core/pipes/retrieval/vector_search_pipe.py @@ -1,6 +1,6 @@ import json import logging -from typing import Any, AsyncGenerator, Optional +from typing import Any, AsyncGenerator from uuid import UUID from core.base import ( @@ -24,36 +24,38 @@ def __init__( self, database_provider: DatabaseProvider, embedding_provider: EmbeddingProvider, + config: SearchPipe.SearchConfig, type: PipeType = PipeType.SEARCH, - config: Optional[SearchPipe.SearchConfig] = None, *args, **kwargs, ): super().__init__( - type=type, - config=config or SearchPipe.SearchConfig(), + config, + type, *args, **kwargs, ) self.embedding_provider = embedding_provider self.database_provider = database_provider - async def search( + self._config: SearchPipe.SearchConfig = config + + @property + def config(self) -> SearchPipe.SearchConfig: + return self._config + + async def search( # type: ignore self, message: str, - run_id: UUID, - vector_search_settings: VectorSearchSettings, + search_settings: VectorSearchSettings, *args: Any, **kwargs: Any, ) -> AsyncGenerator[VectorSearchResult, None]: - await self.enqueue_log( - run_id=run_id, key="search_query", value=message - ) - vector_search_settings.filters = ( - vector_search_settings.filters or self.config.filters + search_settings.filters = ( + search_settings.filters or self.config.filters ) - vector_search_settings.search_limit = ( - vector_search_settings.search_limit or self.config.search_limit + search_settings.search_limit = ( + search_settings.search_limit or self.config.search_limit ) results = [] query_vector = self.embedding_provider.get_embedding( @@ -64,26 +66,22 @@ async def search( self.database_provider.vector.hybrid_search( query_vector=query_vector, query_text=message, - search_settings=vector_search_settings, + search_settings=search_settings, ) - if vector_search_settings.use_hybrid_search + if search_settings.use_hybrid_search else self.database_provider.vector.semantic_search( query_vector=query_vector, - search_settings=vector_search_settings, + search_settings=search_settings, ) ) reranked_results = self.embedding_provider.rerank( query=message, results=search_results, - limit=vector_search_settings.search_limit, + limit=search_settings.search_limit, ) - include_title_if_available = kwargs.get( - "include_title_if_available", False - ) - if include_title_if_available: + if kwargs.get("include_title_if_available", False): for result in reranked_results: - title = result.metadata.get("title", None) - if title: + if title := result.metadata.get("title", None): text = result.text result.text = f"Document Title:{title}\n\nText:{text}" @@ -92,13 +90,7 @@ async def search( results.append(result) yield result - await self.enqueue_log( - run_id=run_id, - key="search_results", - value=json.dumps([ele.json() for ele in results]), - ) - - async def _run_logic( + async def _run_logic( # type: ignore self, input: AsyncPipe.Input, state: AsyncState, @@ -111,16 +103,25 @@ async def _run_logic( search_results = [] async for search_request in input.message: search_queries.append(search_request) + await self.enqueue_log( + run_id=run_id, key="search_query", value=search_request + ) + async for result in self.search( - message=search_request, - run_id=run_id, - vector_search_settings=vector_search_settings, + search_request, + vector_search_settings, *args, **kwargs, ): search_results.append(result) yield result + await self.enqueue_log( + run_id=run_id, + key="search_results", + value=json.dumps([ele.json() for ele in search_results]), + ) + await state.update( self.config.name, {"output": {"search_results": search_results}} ) diff --git a/py/core/providers/__init__.py b/py/core/providers/__init__.py index f4b137b6c..3fd2831fe 100644 --- a/py/core/providers/__init__.py +++ b/py/core/providers/__init__.py @@ -1,5 +1,8 @@ from .auth import R2RAuthProvider -from .chunking import R2RChunkingProvider, UnstructuredChunkingProvider +from .chunking import ( # type: ignore + R2RChunkingProvider, + UnstructuredChunkingProvider, +) from .crypto import BCryptConfig, BCryptProvider from .database import PostgresDBProvider from .embeddings import ( @@ -18,8 +21,8 @@ # Auth "R2RAuthProvider", # Chunking - "R2RChunkingProvider", - "UnstructuredChunkingProvider", + "R2RChunkingProvider", # type: ignore + "UnstructuredChunkingProvider", # type: ignore # Crypto "BCryptProvider", "BCryptConfig", diff --git a/py/core/providers/auth/r2r_auth.py b/py/core/providers/auth/r2r_auth.py index 220add96c..b665fabd5 100644 --- a/py/core/providers/auth/r2r_auth.py +++ b/py/core/providers/auth/r2r_auth.py @@ -16,7 +16,10 @@ Token, TokenData, ) -from core.base.api.models.auth.responses import UserResponse +from core.base.api.models import UserResponse + +DEFAULT_ACCESS_LIFETIME_IN_MINUTES = 3600 +DEFAULT_REFRESH_LIFETIME_IN_DAYS = 7 logger = logging.getLogger(__name__) oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") @@ -31,7 +34,7 @@ def __init__( crypto_provider: CryptoProvider, db_provider: DatabaseProvider, ): - super().__init__(config) + super().__init__(config, crypto_provider) logger.debug(f"Initializing R2RAuthProvider with config: {config}") self.crypto_provider = crypto_provider self.db_provider = db_provider @@ -46,6 +49,7 @@ def __init__( config.refresh_token_lifetime_in_days or os.getenv("R2R_REFRESH_LIFE_IN_MINUTES") ) + self.config: AuthConfig = config async def initialize(self): try: @@ -59,7 +63,10 @@ async def initialize(self): def create_access_token(self, data: dict) -> str: to_encode = data.copy() expire = datetime.now(timezone.utc) + timedelta( - minutes=self.access_token_lifetime_in_minutes + minutes=float( + self.access_token_lifetime_in_minutes + or DEFAULT_ACCESS_LIFETIME_IN_MINUTES + ) ) to_encode |= {"exp": expire.timestamp(), "token_type": "access"} return jwt.encode(to_encode, self.secret_key, algorithm="HS256") @@ -67,7 +74,10 @@ def create_access_token(self, data: dict) -> str: def create_refresh_token(self, data: dict) -> str: to_encode = data.copy() expire = datetime.now(timezone.utc) + timedelta( - days=self.refresh_token_lifetime_in_days + days=float( + self.refresh_token_lifetime_in_days + or DEFAULT_REFRESH_LIFETIME_IN_DAYS + ) ) to_encode |= {"exp": expire, "token_type": "refresh"} return jwt.encode(to_encode, self.secret_key, algorithm="HS256") @@ -149,7 +159,9 @@ async def register(self, email: str, password: str) -> Dict[str, str]: return new_user - async def verify_email(self, verification_code: str) -> Dict[str, str]: + async def verify_email( + self, email: str, verification_code: str + ) -> dict[str, str]: user_id = ( await self.db_provider.relational.get_user_id_by_verification_code( verification_code @@ -241,6 +253,14 @@ async def refresh_access_token( async def change_password( self, user: UserResponse, current_password: str, new_password: str ) -> Dict[str, str]: + if not isinstance(user.hashed_password, str): + logger.error( + f"Invalid hashed_password type: {type(user.hashed_password)}" + ) + raise R2RException( + status_code=500, message="Invalid password hash in database" + ) + if not self.crypto_provider.verify_password( current_password, user.hashed_password ): diff --git a/py/core/providers/chunking/__init__.py b/py/core/providers/chunking/__init__.py index 3d74e0c1c..f954e5c21 100644 --- a/py/core/providers/chunking/__init__.py +++ b/py/core/providers/chunking/__init__.py @@ -1,3 +1,4 @@ +# type: ignore from .r2r_chunking import R2RChunkingProvider from .unstructured_chunking import UnstructuredChunkingProvider diff --git a/py/core/providers/chunking/r2r_chunking.py b/py/core/providers/chunking/r2r_chunking.py index 18a80bc27..0c8f35a0d 100644 --- a/py/core/providers/chunking/r2r_chunking.py +++ b/py/core/providers/chunking/r2r_chunking.py @@ -1,3 +1,4 @@ +# type: ignore import logging from typing import Any, AsyncGenerator, Union @@ -8,7 +9,7 @@ Strategy, TextSplitter, ) -from core.base.abstractions.document import DocumentExtraction +from core.base.abstractions import DocumentExtraction logger = logging.getLogger(__name__) @@ -54,7 +55,7 @@ def _initialize_text_splitter(self) -> TextSplitter: else: raise ValueError(f"Unsupported method type: {self.config.method}") - def validate(self) -> bool: + def validate_config(self) -> bool: return self.config.chunk_size > 0 and self.config.chunk_overlap >= 0 def update_config(self, config_override: R2RChunkingConfig): diff --git a/py/core/providers/chunking/unstructured_chunking.py b/py/core/providers/chunking/unstructured_chunking.py index 2d95ad059..3147b32b0 100644 --- a/py/core/providers/chunking/unstructured_chunking.py +++ b/py/core/providers/chunking/unstructured_chunking.py @@ -1,8 +1,9 @@ +# type: ignore import logging from typing import AsyncGenerator, Union from core.base import ChunkingProvider, Strategy, UnstructuredChunkingConfig -from core.base.abstractions.document import DocumentExtraction +from core.base.abstractions import DocumentExtraction logger = logging.getLogger(__name__) diff --git a/py/core/providers/crypto/bcrypt.py b/py/core/providers/crypto/bcrypt.py index d69559de3..1ecb123f6 100644 --- a/py/core/providers/crypto/bcrypt.py +++ b/py/core/providers/crypto/bcrypt.py @@ -5,10 +5,11 @@ class BCryptConfig(CryptoConfig): + provider: str = "bcrypt" salt_rounds: int = 12 - def validate(self) -> None: - super().validate() + def validate_config(self) -> None: # type: ignore + super().validate_config() if self.salt_rounds < 4 or self.salt_rounds > 31: raise ValueError("salt_rounds must be between 4 and 31") @@ -27,6 +28,7 @@ def __init__(self, config: BCryptConfig): "BCryptProvider must be initialized with a BCryptConfig" ) super().__init__(config) + self.config: BCryptConfig = config def get_password_hash(self, password: str) -> str: return self.bcrypt.hashpw( diff --git a/py/core/providers/database/base.py b/py/core/providers/database/base.py index f00007a45..d52231f91 100644 --- a/py/core/providers/database/base.py +++ b/py/core/providers/database/base.py @@ -1,6 +1,6 @@ -from typing import Any, Optional, Union +from typing import Any, Optional, Sequence, Union -from sqlalchemy import text +from sqlalchemy import TextClause, text from .vecs import Client @@ -8,7 +8,7 @@ # TODO: This should be defined at the mixin, not here def execute_query( vx: Client, - query: Union[str, text], + query: Union[str, TextClause], params: Optional[dict[str, Any]] = None, ): with vx.Session() as sess: @@ -22,12 +22,12 @@ def execute_query( class QueryBuilder: def __init__(self, table_name: str): self.table_name = table_name - self.conditions = [] - self.params = {} + self.conditions: list[str] = [] + self.params: dict = {} self.select_fields = "*" self.operation = "SELECT" - self.insert_data = None - self.limit_value = None + self.limit_value: Optional[int] = None + self.insert_data: Optional[dict] = None def select(self, fields: list[str]): self.select_fields = ", ".join(fields) @@ -80,17 +80,23 @@ def _get_table_name(self, base_name: str) -> str: raise NotImplementedError("Subclasses must implement this method") def execute_query( - self, query: Union[str, text], params: Optional[dict[str, Any]] = None + self, + query: Union[str, TextClause], + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): raise NotImplementedError("Subclasses must implement this method") def fetch_query( - self, query: Union[str, text], params: Optional[dict[str, Any]] = None + self, + query: Union[str, TextClause], + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): raise NotImplementedError("Subclasses must implement this method") def fetchrow_query( - self, query: Union[str, text], params: Optional[dict[str, Any]] = None + self, + query: Union[str, TextClause], + params: Optional[Union[dict[str, Any], Sequence[Any]]] = None, ): raise NotImplementedError("Subclasses must implement this method") diff --git a/py/core/providers/database/collection.py b/py/core/providers/database/collection.py index 78d3f77c0..5366812a3 100644 --- a/py/core/providers/database/collection.py +++ b/py/core/providers/database/collection.py @@ -1,18 +1,17 @@ import json +import logging from datetime import datetime -from typing import Optional +from typing import Optional, Union from uuid import UUID from core.base import R2RException from core.base.abstractions import DocumentInfo, DocumentType, IngestionStatus -from core.base.api.models.auth.responses import UserResponse -from core.base.api.models.management.responses import ( - GroupOverviewResponse, - GroupResponse, -) +from core.base.api.models import CollectionOverviewResponse, CollectionResponse from .base import DatabaseMixin +logger = logging.getLogger(__name__) + class CollectionMixin(DatabaseMixin): async def create_table(self) -> None: @@ -38,7 +37,7 @@ async def collection_exists(self, collection_id: UUID) -> bool: async def create_collection( self, name: str, description: str = "" - ) -> GroupResponse: + ) -> CollectionResponse: current_time = datetime.utcnow() query = f""" INSERT INTO {self._get_table_name('collections')} (name, description, created_at, updated_at) @@ -48,7 +47,7 @@ async def create_collection( params = [name, description, current_time, current_time] try: - async with self.pool.acquire() as conn: + async with self.pool.acquire() as conn: # type: ignore row = await conn.fetchrow(query, *params) if not row: @@ -56,7 +55,7 @@ async def create_collection( status_code=500, message="Failed to create collection" ) - return GroupResponse( + return CollectionResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -69,7 +68,7 @@ async def create_collection( message=f"An error occurred while creating the collection: {str(e)}", ) - async def get_collection(self, collection_id: UUID) -> GroupResponse: + async def get_collection(self, collection_id: UUID) -> CollectionResponse: """Get a collection by its ID.""" if not await self.collection_exists(collection_id): raise R2RException(status_code=404, message="Collection not found") @@ -83,7 +82,7 @@ async def get_collection(self, collection_id: UUID) -> GroupResponse: if not result: raise R2RException(status_code=404, message="Collection not found") - return GroupResponse( + return CollectionResponse( collection_id=result["collection_id"], name=result["name"], description=result["description"], @@ -92,25 +91,44 @@ async def get_collection(self, collection_id: UUID) -> GroupResponse: ) async def update_collection( - self, collection_id: UUID, name: str, description: str - ) -> GroupResponse: + self, + collection_id: UUID, + name: Optional[str] = None, + description: Optional[str] = None, + ) -> CollectionResponse: """Update an existing collection.""" if not await self.collection_exists(collection_id): raise R2RException(status_code=404, message="Collection not found") + update_fields = [] + params: list = [] + + if name is not None: + update_fields.append("name = $1") + params.append(name) + + if description is not None: + update_fields.append("description = ${}".format(len(params) + 1)) + params.append(description) + + if not update_fields: + raise R2RException(status_code=400, message="No fields to update") + + update_fields.append("updated_at = NOW()") + params.append(collection_id) + query = f""" UPDATE {self._get_table_name('collections')} - SET name = $1, description = $2, updated_at = NOW() - WHERE collection_id = $3 + SET {', '.join(update_fields)} + WHERE collection_id = ${len(params)} RETURNING collection_id, name, description, created_at, updated_at """ - result = await self.fetchrow_query( - query, [name, description, collection_id] - ) + + result = await self.fetchrow_query(query, params) if not result: raise R2RException(status_code=404, message="Collection not found") - return GroupResponse( + return CollectionResponse( collection_id=result["collection_id"], name=result["name"], description=result["description"], @@ -138,21 +156,28 @@ async def delete_collection(self, collection_id: UUID) -> None: raise R2RException(status_code=404, message="Collection not found") async def list_collections( - self, offset: int = 0, limit: int = 100 - ) -> list[GroupResponse]: + self, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: """List collections with pagination.""" query = f""" - SELECT collection_id, name, description, created_at, updated_at + SELECT collection_id, name, description, created_at, updated_at, COUNT(*) OVER() AS total_entries FROM {self._get_table_name('collections')} ORDER BY name OFFSET $1 - LIMIT $2 """ - results = await self.fetch_query(query, [offset, limit]) + + conditions = [offset] + if limit != -1: + query += " LIMIT $2" + conditions.append(limit) + + results = await self.fetch_query(query, conditions) if not results: - return [] - return [ - GroupResponse( + logger.info("No collections found.") + return {"results": [], "total_entries": 0} + + collections = [ + CollectionResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -161,10 +186,13 @@ async def list_collections( ) for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": collections, "total_entries": total_entries} async def get_collections_by_ids( self, collection_ids: list[UUID] - ) -> list[GroupResponse]: + ) -> list[CollectionResponse]: query = f""" SELECT collection_id, name, description, created_at, updated_at FROM {self._get_table_name("collections")} @@ -177,7 +205,7 @@ async def get_collections_by_ids( message=f"These collections were not found: {set(collection_ids) - {row['collection_id'] for row in results}}", ) return [ - GroupResponse( + CollectionResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -187,95 +215,9 @@ async def get_collections_by_ids( for row in results ] - async def add_user_to_collection( - self, user_id: UUID, collection_id: UUID - ) -> bool: - """Add a user to a collection.""" - if not await self.collection_exists(collection_id): - raise R2RException(status_code=404, message="Collection not found") - - query = f""" - UPDATE {self._get_table_name('users')} - SET collection_ids = array_append(collection_ids, $1) - WHERE user_id = $2 AND NOT ($1 = ANY(collection_ids)) - RETURNING user_id - """ - result = await self.fetchrow_query(query, [collection_id, user_id]) - return bool(result) - - async def remove_user_from_collection( - self, user_id: UUID, collection_id: UUID - ) -> None: - """Remove a user from a collection.""" - if not await self.collection_exists(collection_id): - raise R2RException(status_code=404, message="Collection not found") - - query = f""" - UPDATE {self._get_table_name('users')} - SET collection_ids = array_remove(collection_ids, $1) - WHERE user_id = $2 AND $1 = ANY(collection_ids) - RETURNING user_id - """ - result = await self.fetchrow_query(query, [collection_id, user_id]) - if not result: - raise R2RException( - status_code=404, - message="User is not a member of the specified collection", - ) - - async def get_users_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[UserResponse]: - """ - Get all users in a specific collection with pagination. - - Args: - collection_id (UUID): The ID of the collection to get users from. - offset (int): The number of users to skip. - limit (int): The maximum number of users to return. - - Returns: - List[UserResponse]: A list of UserResponse objects representing the users in the collection. - - Raises: - R2RException: If the collection doesn't exist. - """ - if not await self.collection_exists(collection_id): - raise R2RException(status_code=404, message="Collection not found") - - query = f""" - SELECT u.user_id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, - u.is_verified, u.collection_ids, u.name, u.bio, u.profile_picture - FROM {self._get_table_name('users')} u - WHERE $1 = ANY(u.collection_ids) - ORDER BY u.name - OFFSET $2 - LIMIT $3 - """ - results = await self.fetch_query(query, [collection_id, offset, limit]) - - return [ - UserResponse( - id=row["user_id"], - email=row["email"], - is_active=row["is_active"], - is_superuser=row["is_superuser"], - created_at=row["created_at"], - updated_at=row["updated_at"], - is_verified=row["is_verified"], - collection_ids=row["collection_ids"], - name=row["name"], - bio=row["bio"], - profile_picture=row["profile_picture"], - hashed_password=None, - verification_code_expiry=None, - ) - for row in results - ] - async def documents_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[DocumentInfo]: + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[DocumentInfo], int]]: """ Get all documents in a specific collection with pagination. Args: @@ -290,15 +232,20 @@ async def documents_in_collection( if not await self.collection_exists(collection_id): raise R2RException(status_code=404, message="Collection not found") query = f""" - SELECT d.document_id, d.user_id, d.type, d.metadata, d.title, d.version, d.size_in_bytes, d.ingestion_status, d.created_at, d.updated_at + SELECT d.document_id, d.user_id, d.type, d.metadata, d.title, d.version, d.size_in_bytes, d.ingestion_status, d.created_at, d.updated_at, COUNT(*) OVER() AS total_entries FROM {self._get_table_name('document_info')} d WHERE $1 = ANY(d.collection_ids) ORDER BY d.created_at DESC OFFSET $2 - LIMIT $3 """ - results = await self.fetch_query(query, [collection_id, offset, limit]) - return [ + + conditions = [collection_id, offset] + if limit != -1: + query += " LIMIT $3" + conditions.append(limit) + + results = await self.fetch_query(query, conditions) + documents = [ DocumentInfo( id=row["document_id"], user_id=row["user_id"], @@ -314,13 +261,16 @@ async def documents_in_collection( ) for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": documents, "total_entries": total_entries} async def get_collections_overview( self, collection_ids: Optional[list[UUID]] = None, offset: int = 0, - limit: int = 100, - ) -> list[GroupOverviewResponse]: + limit: int = -1, + ) -> dict[str, Union[list[CollectionOverviewResponse], int]]: """Get an overview of collections, optionally filtered by collection IDs, with pagination.""" query = f""" WITH collection_overview AS ( @@ -330,27 +280,34 @@ async def get_collections_overview( FROM {self._get_table_name('collections')} g LEFT JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) LEFT JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) - """ - params = [] - if collection_ids: - query += " WHERE g.collection_id = ANY($1)" - params.append(collection_ids) - - query += """ + {' WHERE g.collection_id = ANY($1)' if collection_ids else ''} GROUP BY g.collection_id, g.name, g.description, g.created_at, g.updated_at + ), + counted_overview AS ( + SELECT *, COUNT(*) OVER() AS total_entries + FROM collection_overview ) - SELECT * FROM collection_overview + SELECT * FROM counted_overview ORDER BY name - OFFSET ${} LIMIT ${} - """.format( - len(params) + 1, len(params) + 2 - ) + OFFSET ${2 if collection_ids else 1} + {f'LIMIT ${3 if collection_ids else 2}' if limit != -1 else ''} + """ - params.extend([offset, limit]) + params: list = [] + if collection_ids: + params.append(collection_ids) + params.append(offset) + if limit != -1: + params.append(limit) results = await self.fetch_query(query, params) - return [ - GroupOverviewResponse( + + if not results: + logger.info("No collections found.") + return {"results": [], "total_entries": 0} + + collections = [ + CollectionOverviewResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -362,22 +319,31 @@ async def get_collections_overview( for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": collections, "total_entries": total_entries} + async def get_collections_for_user( - self, user_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[GroupResponse]: + self, user_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" - SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at + SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries FROM {self._get_table_name('collections')} g JOIN {self._get_table_name('users')} u ON g.collection_id = ANY(u.collection_ids) WHERE u.user_id = $1 ORDER BY g.name OFFSET $2 - LIMIT $3 """ - results = await self.fetch_query(query, [user_id, offset, limit]) - return [ - GroupResponse( + params = [user_id, offset] + if limit != -1: + query += " LIMIT $3" + params.append(limit) + + results = await self.fetch_query(query, params) + + collections = [ + CollectionResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -386,6 +352,9 @@ async def get_collections_for_user( ) for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": collections, "total_entries": total_entries} async def assign_document_to_collection( self, document_id: UUID, collection_id: UUID @@ -449,21 +418,26 @@ async def assign_document_to_collection( ) async def document_collections( - self, document_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[GroupResponse]: + self, document_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[CollectionResponse], int]]: query = f""" - SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at + SELECT g.collection_id, g.name, g.description, g.created_at, g.updated_at, COUNT(*) OVER() AS total_entries FROM {self._get_table_name('collections')} g JOIN {self._get_table_name('document_info')} d ON g.collection_id = ANY(d.collection_ids) WHERE d.document_id = $1 ORDER BY g.name OFFSET $2 - LIMIT $3 """ - results = await self.fetch_query(query, [document_id, offset, limit]) - return [ - GroupResponse( + conditions: list = [document_id, offset] + if limit != -1: + query += " LIMIT $3" + conditions.append(limit) + + results = await self.fetch_query(query, conditions) + + collections = [ + CollectionResponse( collection_id=row["collection_id"], name=row["name"], description=row["description"], @@ -473,6 +447,10 @@ async def document_collections( for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": collections, "total_entries": total_entries} + async def remove_document_from_collection( self, document_id: UUID, collection_id: UUID ) -> None: diff --git a/py/core/providers/database/document.py b/py/core/providers/database/document.py index 93c937f5d..e3409b097 100644 --- a/py/core/providers/database/document.py +++ b/py/core/providers/database/document.py @@ -1,7 +1,7 @@ import asyncio import json import logging -from typing import Optional, Union +from typing import Any, Optional, Union from uuid import UUID import asyncpg @@ -15,7 +15,7 @@ String, Table, ) -from sqlalchemy.dialects.postgresql import UUID +from sqlalchemy.dialects.postgresql import UUID as SqlUUID from core.base import ( DocumentInfo, @@ -37,9 +37,9 @@ def __init__(self, *args, **kwargs): self.document_info_table = Table( self._get_table_name("document_info"), self.metadata, - Column("document_id", UUID, primary_key=True), - Column("collection_ids", ARRAY(UUID)), - Column("user_id", UUID), + Column("document_id", SqlUUID, primary_key=True), + Column("collection_ids", ARRAY(SqlUUID)), + Column("user_id", SqlUUID), Column("type", String), Column("metadata", JSON), Column("title", String), @@ -69,7 +69,7 @@ async def create_table(self): updated_at TIMESTAMPTZ DEFAULT NOW(), ingestion_attempt_number INT DEFAULT 0 ); - CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.collection_name} + CREATE INDEX IF NOT EXISTS idx_collection_ids_{self.project_name} ON {self._get_table_name('document_info')} USING GIN (collection_ids); """ await self.execute_query(query) @@ -104,7 +104,7 @@ async def upsert_documents_overview( retries = 0 while retries < max_retries: try: - async with self.pool.acquire() as conn: + async with self.pool.acquire() as conn: # type: ignore async with conn.transaction(): # Lock the row for update check_query = f""" @@ -224,10 +224,10 @@ async def get_documents_overview( filter_document_ids: Optional[list[UUID]] = None, filter_collection_ids: Optional[list[UUID]] = None, offset: int = 0, - limit: int = 100, - ) -> list[DocumentInfo]: + limit: int = -1, + ) -> dict[str, Any]: conditions = [] - params = [] + params: list[Any] = [] param_index = 1 if filter_document_ids: @@ -254,7 +254,8 @@ async def get_documents_overview( query = f""" SELECT document_id, collection_ids, user_id, type, metadata, title, version, - size_in_bytes, ingestion_status, created_at, updated_at, restructuring_status + size_in_bytes, ingestion_status, created_at, updated_at, restructuring_status, + COUNT(*) OVER() AS total_entries {base_query} ORDER BY created_at DESC OFFSET ${param_index} @@ -265,11 +266,13 @@ async def get_documents_overview( if limit != -1: query += f" LIMIT ${param_index}" params.append(limit) + param_index += 1 try: results = await self.fetch_query(query, params) + total_entries = results[0]["total_entries"] if results else 0 - return [ + documents = [ DocumentInfo( id=row["document_id"], collection_ids=row["collection_ids"], @@ -288,6 +291,8 @@ async def get_documents_overview( ) for row in results ] + + return {"results": documents, "total_entries": total_entries} except Exception as e: logger.error(f"Error in get_documents_overview: {str(e)}") raise R2RException( diff --git a/py/core/providers/database/postgres.py b/py/core/providers/database/postgres.py index 040a0f456..b23aee747 100644 --- a/py/core/providers/database/postgres.py +++ b/py/core/providers/database/postgres.py @@ -23,16 +23,18 @@ def __init__( self, config: DatabaseConfig, dimension: int, - crypto_provider: Optional[CryptoProvider] = None, + crypto_provider: CryptoProvider, user: Optional[str] = None, password: Optional[str] = None, host: Optional[str] = None, port: Optional[int] = None, db_name: Optional[str] = None, - collection_name: Optional[str] = None, + project_name: Optional[str] = None, *args, **kwargs, ): + super().__init__(config) + user = config.user or os.getenv("POSTGRES_USER") if not user: raise ValueError( @@ -54,7 +56,7 @@ def __init__( ) self.host = host - port = config.port or os.getenv("POSTGRES_PORT") + port = config.port or os.getenv("POSTGRES_PORT") # type: ignore if not port: raise ValueError( "Error, please set a valid POSTGRES_PORT environment variable or set a 'port' in the 'database' settings of your `r2r.toml`." @@ -68,18 +70,21 @@ def __init__( ) self.db_name = db_name - collection_name = ( - config.vecs_collection + project_name = ( + config.project_name + or config.vecs_collection # remove after deprecation or os.getenv("POSTGRES_PROJECT_NAME") - or os.getenv("POSTGRES_VECS_COLLECTION") + or os.getenv( + "POSTGRES_VECS_COLLECTION" + ) # remove after deprecation ) - if not collection_name: + if not project_name: raise ValueError( - "Error, please set a valid POSTGRES_PROJECT_NAME environment variable or set a 'vecs_collection' in the 'database' settings of your `r2r.toml`." + "Error, please set a valid POSTGRES_PROJECT_NAME environment variable or set a 'project_name' in the 'database' settings of your `r2r.toml`." ) - self.collection_name = collection_name + self.project_name = project_name - if not all([user, password, host, port, db_name, collection_name]): + if not all([user, password, host, port, db_name, project_name]): raise ValueError( "Error, please set the POSTGRES_USER, POSTGRES_PASSWORD, POSTGRES_HOST, POSTGRES_PORT, POSTGRES_DBNAME, and POSTGRES_PROJECT_NAME environment variables to use pgvector database." ) @@ -97,12 +102,10 @@ def __init__( logger.info("Connecting to Postgres via TCP/IP") self.vector_db_dimension = dimension - self.collection_name = collection_name + self.project_name = project_name self.conn = None self.config: DatabaseConfig = config self.crypto_provider = crypto_provider - self.vector = (None,) - self.relational = (None,) async def initialize(self): self.vector = self._initialize_vector_db() @@ -112,7 +115,7 @@ def _initialize_vector_db(self) -> VectorDBProvider: return PostgresVectorDBProvider( self.config, connection_string=self.connection_string, - collection_name=self.collection_name, + project_name=self.project_name, dimension=self.vector_db_dimension, ) @@ -121,7 +124,7 @@ async def _initialize_relational_db(self) -> RelationalDBProvider: self.config, connection_string=self.connection_string, crypto_provider=self.crypto_provider, - collection_name=self.collection_name, + project_name=self.project_name, ) await relational_db.initialize() return relational_db diff --git a/py/core/providers/database/relational.py b/py/core/providers/database/relational.py index 5bfeaced8..9a8291544 100644 --- a/py/core/providers/database/relational.py +++ b/py/core/providers/database/relational.py @@ -3,6 +3,7 @@ import asyncpg +from core.base import RelationalDBProvider from core.providers.database.base import DatabaseMixin from core.providers.database.collection import CollectionMixin from core.providers.database.document import DocumentMixin @@ -13,20 +14,21 @@ class PostgresRelationalDBProvider( + RelationalDBProvider, DocumentMixin, CollectionMixin, BlacklistedTokensMixin, UserMixin, ): def __init__( - self, config, connection_string, crypto_provider, collection_name + self, config, connection_string, crypto_provider, project_name ): + super().__init__(config) self.config = config self.connection_string = connection_string self.crypto_provider = crypto_provider - self.collection_name = collection_name + self.project_name = project_name self.pool = None - super().__init__() async def initialize(self): try: @@ -42,7 +44,7 @@ async def initialize(self): await self._initialize_relational_db() def _get_table_name(self, base_name: str) -> str: - return f"{base_name}_{self.collection_name}" + return f"{base_name}_{self.project_name}" @asynccontextmanager async def get_connection(self): diff --git a/py/core/providers/database/tokens.py b/py/core/providers/database/tokens.py index 920316b56..009c6aaf9 100644 --- a/py/core/providers/database/tokens.py +++ b/py/core/providers/database/tokens.py @@ -12,14 +12,16 @@ async def create_table(self): token TEXT NOT NULL, blacklisted_at TIMESTAMPTZ DEFAULT NOW() ); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.collection_name}_token + CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_token ON {self._get_table_name('blacklisted_tokens')} (token); - CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.collection_name}_blacklisted_at + CREATE INDEX IF NOT EXISTS idx_blacklisted_tokens_{self.project_name}_blacklisted_at ON {self._get_table_name('blacklisted_tokens')} (blacklisted_at); """ await self.execute_query(query) - async def blacklist_token(self, token: str, current_time: datetime = None): + async def blacklist_token( + self, token: str, current_time: Optional[datetime] = None + ): if current_time is None: current_time = datetime.utcnow() diff --git a/py/core/providers/database/user.py b/py/core/providers/database/user.py index 58dd3d890..d4bde81ed 100644 --- a/py/core/providers/database/user.py +++ b/py/core/providers/database/user.py @@ -1,9 +1,9 @@ from datetime import datetime -from typing import Optional +from typing import Optional, Union from uuid import UUID from core.base.abstractions import R2RException, UserStats -from core.base.api.models.auth.responses import UserResponse +from core.base.api.models import UserResponse from core.base.utils import generate_id_from_label from .base import DatabaseMixin, QueryBuilder @@ -101,23 +101,19 @@ async def get_user_by_email(self, email: str) -> UserResponse: if not result: raise R2RException(status_code=404, message="User not found") - return ( - UserResponse( - id=result["user_id"], - email=result["email"], - hashed_password=result["hashed_password"], - is_superuser=result["is_superuser"], - is_active=result["is_active"], - is_verified=result["is_verified"], - created_at=result["created_at"], - updated_at=result["updated_at"], - name=result["name"], - profile_picture=result["profile_picture"], - bio=result["bio"], - collection_ids=result["collection_ids"], - ) - if result - else None + return UserResponse( + id=result["user_id"], + email=result["email"], + hashed_password=result["hashed_password"], + is_superuser=result["is_superuser"], + is_active=result["is_active"], + is_verified=result["is_verified"], + created_at=result["created_at"], + updated_at=result["updated_at"], + name=result["name"], + profile_picture=result["profile_picture"], + bio=result["bio"], + collection_ids=result["collection_ids"], ) async def create_user(self, email: str, password: str) -> UserResponse: @@ -131,7 +127,7 @@ async def create_user(self, email: str, password: str) -> UserResponse: if e.status_code != 404: raise e - hashed_password = self.crypto_provider.get_password_hash(password) + hashed_password = self.crypto_provider.get_password_hash(password) # type: ignore query = f""" INSERT INTO {self._get_table_name('users')} (email, user_id, hashed_password, collection_ids) @@ -382,43 +378,74 @@ async def remove_user_from_collection( ) return None - async def mark_user_as_superuser(self, user_id: UUID): - query = f""" - UPDATE {self._get_table_name('users')} - SET is_superuser = TRUE, is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL - WHERE user_id = $1 + async def get_users_in_collection( + self, collection_id: UUID, offset: int = 0, limit: int = -1 + ) -> dict[str, Union[list[UserResponse], int]]: """ - await self.execute_query(query, [user_id]) + Get all users in a specific collection with pagination. + + Args: + collection_id (UUID): The ID of the collection to get users from. + offset (int): The number of users to skip. + limit (int): The maximum number of users to return. + + Returns: + List[UserResponse]: A list of UserResponse objects representing the users in the collection. + + Raises: + R2RException: If the collection doesn't exist. + """ + if not await self.collection_exists(collection_id): # type: ignore + raise R2RException(status_code=404, message="Collection not found") - async def get_users_in_collection( - self, collection_id: UUID, offset: int = 0, limit: int = 100 - ) -> list[UserResponse]: query = f""" - SELECT user_id, email, is_superuser, is_active, is_verified, created_at, updated_at, name, profile_picture, bio, collection_ids - FROM {self._get_table_name('users')} - WHERE $1 = ANY(collection_ids) - ORDER BY email - OFFSET $2 LIMIT $3 + SELECT u.user_id, u.email, u.is_active, u.is_superuser, u.created_at, u.updated_at, + u.is_verified, u.collection_ids, u.name, u.bio, u.profile_picture, + COUNT(*) OVER() AS total_entries + FROM {self._get_table_name('users')} u + WHERE $1 = ANY(u.collection_ids) + ORDER BY u.name + OFFSET $2 """ - results = await self.fetch_query(query, [collection_id, offset, limit]) - return [ + conditions = [collection_id, offset] + if limit != -1: + query += " LIMIT $3" + conditions.append(limit) + + results = await self.fetch_query(query, conditions) + + users = [ UserResponse( id=row["user_id"], email=row["email"], - is_superuser=row["is_superuser"], is_active=row["is_active"], - is_verified=row["is_verified"], + is_superuser=row["is_superuser"], created_at=row["created_at"], updated_at=row["updated_at"], + is_verified=row["is_verified"], + collection_ids=row["collection_ids"], name=row["name"], - profile_picture=row["profile_picture"], bio=row["bio"], - collection_ids=row["collection_ids"], + profile_picture=row["profile_picture"], + hashed_password=None, + verification_code_expiry=None, ) for row in results ] + total_entries = results[0]["total_entries"] if results else 0 + + return {"results": users, "total_entries": total_entries} + + async def mark_user_as_superuser(self, user_id: UUID): + query = f""" + UPDATE {self._get_table_name('users')} + SET is_superuser = TRUE, is_verified = TRUE, verification_code = NULL, verification_code_expiry = NULL + WHERE user_id = $1 + """ + await self.execute_query(query, [user_id]) + async def get_user_id_by_verification_code( self, verification_code: str ) -> Optional[UUID]: @@ -447,8 +474,8 @@ async def get_users_overview( self, user_ids: Optional[list[UUID]] = None, offset: int = 0, - limit: int = 100, - ) -> list[UserStats]: + limit: int = -1, + ) -> dict[str, Union[list[UserStats], int]]: query = f""" WITH user_docs AS ( SELECT @@ -462,7 +489,8 @@ async def get_users_overview( u.collection_ids, COUNT(d.document_id) AS num_files, COALESCE(SUM(d.size_in_bytes), 0) AS total_size_in_bytes, - ARRAY_AGG(d.document_id) FILTER (WHERE d.document_id IS NOT NULL) AS document_ids + ARRAY_AGG(d.document_id) FILTER (WHERE d.document_id IS NOT NULL) AS document_ids, + COUNT(*) OVER() AS total_entries FROM {self._get_table_name('users')} u LEFT JOIN {self._get_table_name('document_info')} d ON u.user_id = d.user_id {' WHERE u.user_id = ANY($3::uuid[])' if user_ids else ''} @@ -472,16 +500,20 @@ async def get_users_overview( FROM user_docs ORDER BY email OFFSET $1 - LIMIT $2 """ - params = [offset, limit] + params: list = [offset] + + if limit != -1: + query += " LIMIT $2" + params.append(limit) + if user_ids: params.append(user_ids) results = await self.fetch_query(query, params) - return [ + users = [ UserStats( user_id=row[0], email=row[1], @@ -497,3 +529,7 @@ async def get_users_overview( ) for row in results ] + + total_entries = results[0]["total_entries"] + + return {"results": users, "total_entries": total_entries} diff --git a/py/core/providers/database/vecs/adapter/markdown.py b/py/core/providers/database/vecs/adapter/markdown.py index 149573f43..9a70e39bc 100644 --- a/py/core/providers/database/vecs/adapter/markdown.py +++ b/py/core/providers/database/vecs/adapter/markdown.py @@ -38,10 +38,15 @@ def split_by_heading( for section in sections: chunks = flu(section.split(" ")).chunk(max_tokens) - is_not_useless_chunk = lambda i: not i in ["", "\n", []] + is_not_useless_chunk = lambda i: not i in [ + "", + "\n", + [], + ] # noqa: E731, E713 joined_chunks = filter( - is_not_useless_chunk, [" ".join(chunk) for chunk in chunks] + is_not_useless_chunk, + [" ".join(chunk) for chunk in chunks], # noqa: E731, E713 ) for joined_chunk in joined_chunks: diff --git a/py/core/providers/database/vecs/client.py b/py/core/providers/database/vecs/client.py index 50938f576..c0d04af87 100644 --- a/py/core/providers/database/vecs/client.py +++ b/py/core/providers/database/vecs/client.py @@ -144,7 +144,7 @@ def _supports_hnsw(self): and not self.vector_version.startswith("0.0") ) - def get_or_create_collection( + def get_or_create_vector_table( self, name: str, *, @@ -181,7 +181,7 @@ def get_or_create_collection( return collection._create_if_not_exists() - @deprecated("use Client.get_or_create_collection") + @deprecated("use Client.get_or_create_vector_table") def create_collection(self, name: str, dimension: int) -> Collection: """ Create a new vector collection. @@ -200,7 +200,7 @@ def create_collection(self, name: str, dimension: int) -> Collection: return Collection(name, dimension, self)._create() - @deprecated("use Client.get_or_create_collection") + @deprecated("use Client.get_or_create_vector_table") def get_collection(self, name: str) -> Collection: """ Retrieve an existing vector collection. diff --git a/py/core/providers/database/vecs/collection.py b/py/core/providers/database/vecs/collection.py index d659daf90..6211d101a 100644 --- a/py/core/providers/database/vecs/collection.py +++ b/py/core/providers/database/vecs/collection.py @@ -633,8 +633,8 @@ def semantic_search( # if filters: stmt = stmt.filter(self.build_filters(search_settings.filters)) # type: ignore - stmt = stmt.order_by(distance_clause) + stmt = stmt.offset(search_settings.offset) stmt = stmt.limit(search_settings.search_limit) with self.client.Session() as sess: @@ -649,7 +649,12 @@ def semantic_search( sess.execute( text( "set local hnsw.ef_search = :ef_search" - ).bindparams(ef_search=search_settings.ef_search) + ).bindparams( + ef_search=max( + search_settings.ef_search, + search_settings.search_limit, + ) + ) ) if len(cols) == 1: return [str(x) for x in sess.scalars(stmt).fetchall()] @@ -681,6 +686,7 @@ def full_text_search( .where(self.table.c.fts.op("@@")(ts_query)) .where(self.build_filters(search_settings.filters)) .order_by(rank_function.desc()) + .offset(search_settings.offset) .limit(search_settings.hybrid_search_settings.full_text_limit) ) @@ -948,6 +954,16 @@ def is_indexed_for_measure(self, measure: IndexMeasure): return False + def close(self): + """ + Closes the database connection associated with this collection. + + This method should be called when you are done using the collection to release + the database resources. + """ + if self.client: + self.client.close() + def create_index( self, measure: IndexMeasure = IndexMeasure.cosine_distance, diff --git a/py/core/providers/database/vector.py b/py/core/providers/database/vector.py index bd8338e39..ae63aa6a0 100644 --- a/py/core/providers/database/vector.py +++ b/py/core/providers/database/vector.py @@ -1,6 +1,6 @@ import concurrent.futures +import copy import logging -import os from concurrent.futures import ThreadPoolExecutor from typing import Any, Optional @@ -40,10 +40,10 @@ def __init__(self, config: DatabaseConfig, *args, **kwargs): raise ValueError( "Error occurred while attempting to connect to the pgvector provider." ) - self.collection_name = kwargs.get("collection_name", None) - if not self.collection_name: + self.project_name = kwargs.get("project_name", None) + if not self.project_name: raise ValueError( - "Please provide a valid `collection_name` to the `PostgresVectorDBProvider`." + "Please provide a valid `project_name` to the `PostgresVectorDBProvider`." ) dimension = kwargs.get("dimension", None) if not dimension: @@ -51,10 +51,9 @@ def __init__(self, config: DatabaseConfig, *args, **kwargs): "Please provide a valid `dimension` to the `PostgresVectorDBProvider`." ) - self.collection: Optional[Collection] = None self._initialize_vector_db(dimension) logger.info( - f"Successfully initialized PGVectorDB with collection: {self.collection_name}" + f"Successfully initialized PGVectorDB for project: {self.project_name}" ) def _initialize_vector_db(self, dimension: int) -> None: @@ -64,8 +63,8 @@ def _initialize_vector_db(self, dimension: int) -> None: sess.execute(text("CREATE EXTENSION IF NOT EXISTS btree_gin;")) sess.commit() - self.collection = self.vx.get_or_create_collection( - name=self.collection_name, dimension=dimension + self.collection = self.vx.get_or_create_vector_table( + name=self.project_name, dimension=dimension ) self.create_index() @@ -124,14 +123,14 @@ def semantic_search( ) return [ VectorSearchResult( - fragment_id=result[0], - extraction_id=result[1], - document_id=result[2], - user_id=result[3], - collection_ids=result[4], - text=result[5], - score=1 - float(result[6]), - metadata=result[7], + fragment_id=result[0], # type: ignore + extraction_id=result[1], # type: ignore + document_id=result[2], # type: ignore + user_id=result[3], # type: ignore + collection_ids=result[4], # type: ignore + text=result[5], # type: ignore + score=1 - float(result[6]), # type: ignore + metadata=result[7], # type: ignore ) for result in results ] @@ -143,10 +142,9 @@ def full_text_search( raise ValueError( "Please call `initialize_collection` before attempting to run `full_text_search`." ) - results = self.collection.full_text_search( + return self.collection.full_text_search( query_text=query_text, search_settings=search_settings ) - return results def hybrid_search( self, @@ -168,20 +166,28 @@ def hybrid_search( "The `full_text_limit` must be greater than or equal to the `search_limit`." ) + semantic_settings = copy.deepcopy(search_settings) + semantic_settings.search_limit += search_settings.offset + + full_text_settings = copy.deepcopy(search_settings) + full_text_settings.hybrid_search_settings.full_text_limit += ( # type: ignore + search_settings.offset + ) + # Use ThreadPoolExecutor to run searches in parallel with ThreadPoolExecutor(max_workers=2) as executor: semantic_future = executor.submit( - self.semantic_search, query_vector, search_settings + self.semantic_search, query_vector, semantic_settings ) full_text_future = executor.submit( - self.full_text_search, query_text, search_settings + self.full_text_search, query_text, full_text_settings ) # Wait for both searches to complete concurrent.futures.wait([semantic_future, full_text_future]) - semantic_results = semantic_future.result() - full_text_results = full_text_future.result() + semantic_results: list[VectorSearchResult] = semantic_future.result() + full_text_results: list[VectorSearchResult] = full_text_future.result() semantic_limit = search_settings.search_limit full_text_limit = ( @@ -240,43 +246,46 @@ def hybrid_search( combined_results = { k: v for k, v in combined_results.items() - if v["semantic_rank"] <= semantic_limit * 2 - and v["full_text_rank"] <= full_text_limit * 2 + if v["semantic_rank"] <= semantic_limit * 2 # type: ignore + and v["full_text_rank"] <= full_text_limit * 2 # type: ignore } # Calculate RRF scores - for result in combined_results.values(): - semantic_score = 1 / (rrf_k + result["semantic_rank"]) - full_text_score = 1 / (rrf_k + result["full_text_rank"]) - result["rrf_score"] = ( + for result in combined_results.values(): # type: ignore + semantic_score = 1 / (rrf_k + result["semantic_rank"]) # type: ignore + full_text_score = 1 / (rrf_k + result["full_text_rank"]) # type: ignore + result["rrf_score"] = ( # type: ignore semantic_score * semantic_weight + full_text_score * full_text_weight ) / (semantic_weight + full_text_weight) - # Sort by RRF score and convert to VectorSearchResult - limit = min(semantic_limit, full_text_limit) + # Sort by RRF score and apply offset and limit sorted_results = sorted( combined_results.values(), - key=lambda x: x["rrf_score"], + key=lambda x: x["rrf_score"], # type: ignore reverse=True, - )[:limit] + ) + offset_results = sorted_results[ + search_settings.offset : search_settings.offset + + search_settings.search_limit + ] return [ VectorSearchResult( - fragment_id=result["data"].fragment_id, - extraction_id=result["data"].extraction_id, - document_id=result["data"].document_id, - user_id=result["data"].user_id, - collection_ids=result["data"].collection_ids, - text=result["data"].text, - score=result["rrf_score"], + fragment_id=result["data"].fragment_id, # type: ignore + extraction_id=result["data"].extraction_id, # type: ignore + document_id=result["data"].document_id, # type: ignore + user_id=result["data"].user_id, # type: ignore + collection_ids=result["data"].collection_ids, # type: ignore + text=result["data"].text, # type: ignore + score=result["rrf_score"], # type: ignore metadata={ - **result["data"].metadata, + **result["data"].metadata, # type: ignore "semantic_rank": result["semantic_rank"], "full_text_rank": result["full_text_rank"], }, ) - for result in sorted_results + for result in offset_results ] def create_index( @@ -303,7 +312,7 @@ def create_index( def delete( self, filters: dict[str, Any], - ) -> list[str]: + ) -> dict[str, dict[str, str]]: if self.collection is None: raise ValueError( "Please call `initialize_collection` before attempting to run `delete`." @@ -343,10 +352,10 @@ def assign_document_to_collection( result = sess.execute( query, {"document_id": document_id, "collection_id": collection_id}, - ) + ).fetchone() sess.commit() - if result.rowcount == 0: + if not result: logger.warning( f"Document {document_id} not found or already assigned to collection {collection_id}" ) @@ -383,10 +392,10 @@ def remove_document_from_collection( result = sess.execute( query, {"document_id": document_id, "collection_id": collection_id}, - ) + ).fetchone() sess.commit() - if result.rowcount == 0: + if not result: logger.warning( f"Document {document_id} not found in collection {collection_id} or already removed" ) @@ -448,9 +457,16 @@ def delete_collection(self, collection_id: str) -> None: ) with self.vx.Session() as sess: - result = sess.execute(query, {"collection_id": collection_id}) + result = sess.execute( + query, {"collection_id": collection_id} + ).fetchone() sess.commit() + if not result: + raise ValueError( + f"Collection {collection_id} not found in any documents." + ) + affected_rows = result.rowcount logger.info( f"Removed collection {collection_id} from {affected_rows} documents." @@ -458,7 +474,7 @@ def delete_collection(self, collection_id: str) -> None: def get_document_chunks( self, document_id: str, offset: int = 0, limit: int = -1 - ) -> dict: + ) -> dict[str, Any]: if not self.collection: raise ValueError("Collection is not initialized.") @@ -466,7 +482,7 @@ def get_document_chunks( table_name = self.collection.table.name query = text( f""" - SELECT fragment_id, extraction_id, document_id, user_id, collection_ids, text, metadata + SELECT fragment_id, extraction_id, document_id, user_id, collection_ids, text, metadata, COUNT(*) OVER() AS total FROM vecs."{table_name}" WHERE document_id = :document_id ORDER BY CAST(metadata->>'chunk_order' AS INTEGER) @@ -481,15 +497,31 @@ def get_document_chunks( with self.vx.Session() as sess: results = sess.execute(query, params).fetchall() - return [ - { - "fragment_id": result[0], - "extraction_id": result[1], - "document_id": result[2], - "user_id": result[3], - "collection_ids": result[4], - "text": result[5], - "metadata": result[6], - } - for result in results - ] + chunks = [] + total = 0 + + if results: + total = results[0][7] + chunks = [ + { + "fragment_id": result[0], + "extraction_id": result[1], + "document_id": result[2], + "user_id": result[3], + "collection_ids": result[4], + "text": result[5], + "metadata": result[6], + } + for result in results + ] + + return {"results": chunks, "total_entries": total} + + def close(self) -> None: + if self.vx: + with self.vx.Session() as sess: + sess.close() + if sess.bind: + sess.bind.dispose() # type: ignore + + logger.info("Closed PGVectorDB connection.") diff --git a/py/core/providers/embeddings/litellm.py b/py/core/providers/embeddings/litellm.py index a0b6398d8..4d131c43d 100644 --- a/py/core/providers/embeddings/litellm.py +++ b/py/core/providers/embeddings/litellm.py @@ -173,10 +173,3 @@ def rerank( limit: int = 10, ): return results[:limit] - - def tokenize_string( - self, text: str, model: str, stage: EmbeddingProvider.PipeStage - ) -> list[int]: - raise NotImplementedError( - "Tokenization is not supported by LiteLLMEmbeddingProvider." - ) diff --git a/py/core/providers/embeddings/ollama.py b/py/core/providers/embeddings/ollama.py index c18880b96..7a61d8c57 100644 --- a/py/core/providers/embeddings/ollama.py +++ b/py/core/providers/embeddings/ollama.py @@ -183,10 +183,3 @@ def rerank( limit: int = 10, ) -> list[VectorSearchResult]: return results[:limit] - - def tokenize_string( - self, text: str, model: str, stage: EmbeddingProvider.PipeStage - ) -> list[int]: - raise NotImplementedError( - "Tokenization is not supported by OllamaEmbeddingProvider." - ) diff --git a/py/core/providers/file/postgres.py b/py/core/providers/file/postgres.py index dd2051f8d..dcc110428 100644 --- a/py/core/providers/file/postgres.py +++ b/py/core/providers/file/postgres.py @@ -14,8 +14,8 @@ class PostgresFileProvider(FileProvider): def __init__(self, config: FileConfig, db_provider: PostgresDBProvider): - super().__init__() - self.config = config + super().__init__(config) + self.config: FileConfig = config self.db_provider = db_provider self.pool = None @@ -71,6 +71,12 @@ async def upsert_file( file_size: int, file_type: Optional[str] = None, ) -> None: + if not self.pool: + raise R2RException( + status_code=500, + message="Connection to the database is not initialized", + ) + query = f""" INSERT INTO {self._get_table_name('file_storage')} (document_id, file_name, file_oid, file_size, file_type) @@ -96,6 +102,12 @@ async def upsert_file( async def store_file( self, document_id, file_name, file_content: io.BytesIO, file_type=None ): + if not self.pool: + raise R2RException( + status_code=500, + message="Connection to the database is not initialized", + ) + file_size = file_content.getbuffer().nbytes async with self.pool.acquire() as conn: async with conn.transaction(): @@ -136,6 +148,12 @@ async def _write_lobject(self, conn, oid, file_content): async def retrieve_file( self, document_id: UUID ) -> Optional[tuple[str, BinaryIO, int]]: + if not self.pool: + raise R2RException( + status_code=500, + message="Connection to the database is not initialized", + ) + query = f""" SELECT file_name, file_oid, file_size FROM {self._get_table_name('file_storage')} @@ -203,6 +221,12 @@ async def _read_lobject(self, conn, oid: int) -> bytes: return file_data.getvalue() async def delete_file(self, document_id: UUID) -> bool: + if not self.pool: + raise R2RException( + status_code=500, + message="Connection to the database is not initialized", + ) + query = f""" SELECT file_oid FROM {self._get_table_name('file_storage')} WHERE document_id = $1 @@ -236,26 +260,31 @@ async def get_files_overview( offset: int = 0, limit: int = 100, ) -> list[dict]: + if not self.pool: + raise R2RException( + status_code=500, + message="Connection to the database is not initialized", + ) + conditions = [] params = [] + query = f""" + SELECT document_id, file_name, file_oid, file_size, file_type, created_at, updated_at + FROM {self._get_table_name('file_storage')} + """ if filter_document_ids: - conditions.append("document_id = ANY($1)") - params.append(filter_document_ids) + conditions.append(f"document_id = ANY(${len(params) + 1})") + params.append([str(doc_id) for doc_id in filter_document_ids]) if filter_file_names: - conditions.append("file_name = ANY($2)") + conditions.append(f"file_name = ANY(${len(params) + 1})") params.append(filter_file_names) - query = f""" - SELECT document_id, file_name, file_oid, file_size, file_type, created_at, updated_at - FROM {self._get_table_name('file_storage')} - """ - if conditions: query += " WHERE " + " AND ".join(conditions) - query += " ORDER BY created_at DESC OFFSET $3 LIMIT $4" + query += f" ORDER BY created_at DESC OFFSET ${len(params) + 1} LIMIT ${len(params) + 2}" params.extend([offset, limit]) async with self.pool.acquire() as conn: diff --git a/py/core/providers/kg/neo4j/__init__.py b/py/core/providers/kg/neo4j/__init__.py index 63be447ee..bb1d985f3 100644 --- a/py/core/providers/kg/neo4j/__init__.py +++ b/py/core/providers/kg/neo4j/__init__.py @@ -1,3 +1,3 @@ -from .provider import Neo4jKGProvider +from .provider import Neo4jKGProvider # type: ignore __all__ = ["Neo4jKGProvider"] diff --git a/py/core/providers/kg/neo4j/graph_queries.py b/py/core/providers/kg/neo4j/graph_queries.py index f6f7f2b08..84daa5aaf 100644 --- a/py/core/providers/kg/neo4j/graph_queries.py +++ b/py/core/providers/kg/neo4j/graph_queries.py @@ -1,3 +1,4 @@ +# type: ignore UNIQUE_CONSTRAINTS = [] GET_CHUNKS_QUERY = """ diff --git a/py/core/providers/kg/neo4j/provider.py b/py/core/providers/kg/neo4j/provider.py index 6ba24dbf9..90bf8518f 100644 --- a/py/core/providers/kg/neo4j/provider.py +++ b/py/core/providers/kg/neo4j/provider.py @@ -1,28 +1,22 @@ +# type: ignore import json import logging import os from datetime import date, datetime from decimal import Decimal -from typing import Any, Dict, List, Optional, Tuple, AsyncGenerator +from typing import Any, AsyncGenerator, Dict, List, Optional, Tuple from uuid import UUID -from core.base import ( - KGConfig, - KGProvider, - R2RException, -) - -from core.base.abstractions.document import DocumentFragment -from core.base.abstractions.graph import ( +from core.base import KGConfig, KGProvider, R2RException +from core.base.abstractions import ( Community, + DocumentFragment, Entity, KGExtraction, RelationshipType, Triple, ) -logger = logging.getLogger(__name__) - from .graph_queries import ( GET_CHUNKS_QUERY, GET_COMMUNITIES_QUERY, @@ -37,6 +31,8 @@ UNIQUE_CONSTRAINTS, ) +logger = logging.getLogger(__name__) + class Neo4jKGProvider(KGProvider): @@ -274,9 +270,9 @@ def get_community_entities_and_triples( query = """MATCH (a:__Entity__) - [r] -> (b:__Entity__) WHERE a.communityIds[$level] = $community_id OR b.communityIds[$level] = $community_id - RETURN a.name AS source, b.name AS target, a.description AS source_description, + RETURN ID(a) AS source_id, a.name AS source, id(b) AS target_id, b.name AS target, a.description AS source_description, b.description AS target_description, labels(a) AS source_labels, labels(b) AS target_labels, - r.description AS relationship_description, r.name AS relationship_name, r.weight AS relationship_weight + r.description AS relationship_description, r.name AS relationship_name, r.weight AS relationship_weight, ID(r) AS relationship_id """ neo4j_records = self.structured_query( @@ -289,6 +285,7 @@ def get_community_entities_and_triples( entities = [ Entity( + id=record["source_id"], name=record["source"], description=record["source_description"], category=", ".join(record["source_labels"]), @@ -298,6 +295,7 @@ def get_community_entities_and_triples( triples = [ Triple( + id=record["relationship_id"], subject=record["source"], predicate=record["relationship_name"], object=record["target"], @@ -450,7 +448,9 @@ async def vector_query( for property_name in property_names } - def perform_graph_clustering(self, leiden_params: dict) -> Tuple[int, int]: + def perform_graph_clustering( + self, leiden_params: dict + ) -> Tuple[int, int, set[tuple[int, Any]]]: """ Perform graph clustering on the graph. @@ -518,7 +518,7 @@ def perform_graph_clustering(self, leiden_params: dict) -> Tuple[int, int]: result = self.structured_query(GRAPH_PROJECTION_QUERY) # step 2: run the hierarchical leiden algorithm on the graph. - seed_property = leiden_params.get("seed_property", "communityIds") + # seed_property = leiden_params.get("seed_property", "communityIds") write_property = leiden_params.get("write_property", "communityIds") random_seed = leiden_params.get("random_seed", 42) include_intermediate_communities = leiden_params.get( @@ -551,7 +551,7 @@ def perform_graph_clustering(self, leiden_params: dict) -> Tuple[int, int]: result = self.structured_query(GRAPH_CLUSTERING_QUERY).records[0] - community_count = result["communityCount"] + community_count: int = result["communityCount"] modularities = result["modularities"] logger.info( diff --git a/py/core/providers/llm/litellm.py b/py/core/providers/llm/litellm.py index 24cfd2a32..8131d65f7 100644 --- a/py/core/providers/llm/litellm.py +++ b/py/core/providers/llm/litellm.py @@ -1,7 +1,7 @@ import logging from typing import Any -from core.base.abstractions.llm import GenerationConfig +from core.base.abstractions import GenerationConfig from core.base.providers.llm import CompletionConfig, CompletionProvider logger = logging.getLogger(__name__) diff --git a/py/core/providers/llm/openai.py b/py/core/providers/llm/openai.py index d4dddd46b..5f1370f52 100644 --- a/py/core/providers/llm/openai.py +++ b/py/core/providers/llm/openai.py @@ -4,7 +4,7 @@ from openai import AsyncOpenAI, OpenAI -from core.base.abstractions.llm import GenerationConfig +from core.base.abstractions import GenerationConfig from core.base.providers.llm import CompletionConfig, CompletionProvider logger = logging.getLogger(__name__) diff --git a/py/core/providers/orchestration/hatchet.py b/py/core/providers/orchestration/hatchet.py index 9a1b97eba..13c89b349 100644 --- a/py/core/providers/orchestration/hatchet.py +++ b/py/core/providers/orchestration/hatchet.py @@ -3,13 +3,14 @@ from hatchet_sdk import Hatchet -from core.base import OrchestrationProvider +from core.base import OrchestrationConfig, OrchestrationProvider class HatchetOrchestrationProvider(OrchestrationProvider): - def __init__(self, config: Any): + def __init__(self, config: OrchestrationConfig): super().__init__(config) self.orchestrator = Hatchet() + self.config: OrchestrationConfig = config # for type hinting self.worker def register_workflow(self, workflow: Any) -> None: diff --git a/py/core/providers/parsing/__init__.py b/py/core/providers/parsing/__init__.py index 58ee4aa04..1cc3cb0f4 100644 --- a/py/core/providers/parsing/__init__.py +++ b/py/core/providers/parsing/__init__.py @@ -1,4 +1,4 @@ -from .r2r_parsing import R2RParsingProvider +from .r2r_parsing import R2RParsingProvider # type: ignore from .unstructured_parsing import UnstructuredParsingProvider __all__ = ["R2RParsingProvider", "UnstructuredParsingProvider"] diff --git a/py/core/providers/parsing/r2r_parsing.py b/py/core/providers/parsing/r2r_parsing.py index 720f061ce..9994a6274 100644 --- a/py/core/providers/parsing/r2r_parsing.py +++ b/py/core/providers/parsing/r2r_parsing.py @@ -1,9 +1,11 @@ +# type: ignore import logging import time -from typing import Any, AsyncGenerator +from typing import Any, AsyncGenerator, Union from core import parsers from core.base import ( + AsyncParser, Document, DocumentExtraction, DocumentType, @@ -47,7 +49,7 @@ class R2RParsingProvider(ParsingProvider): def __init__(self, config: ParsingConfig): super().__init__(config) - self.parsers = {} + self.parsers: dict[DocumentType, AsyncParser] = {} self._initialize_parsers() def _initialize_parsers(self): @@ -65,37 +67,38 @@ def _initialize_parsers(self): if parser_name := getattr(parsers, parser_override.parser): self.parsers[parser_override.document_type] = parser_name() - async def parse( + async def parse( # type: ignore self, file_content: bytes, document: Document - ) -> AsyncGenerator[DocumentExtraction, None]: + ) -> AsyncGenerator[ + Union[DocumentExtraction, R2RDocumentProcessingError], None + ]: if document.type not in self.parsers: yield R2RDocumentProcessingError( document_id=document.id, error_message=f"Parser for {document.type} not found in `R2RParsingProvider`.", ) - return + else: + parser = self.parsers[document.type] + texts = await parser.ingest(file_content) + t0 = time.time() - parser = self.parsers[document.type] - texts = parser.ingest(file_content) - t0 = time.time() + iteration = 0 + async for text in texts: + yield DocumentExtraction( + id=generate_id_from_label(f"{document.id}-{iteration}"), + document_id=document.id, + user_id=document.user_id, + collection_ids=document.collection_ids, + data=text, + metadata=document.metadata, + ) + iteration += 1 - iteration = 0 - async for text in texts: - yield DocumentExtraction( - id=generate_id_from_label(f"{document.id}-{iteration}"), - document_id=document.id, - user_id=document.user_id, - collection_ids=document.collection_ids, - data=text, - metadata=document.metadata, + logger.debug( + f"Parsed document with id={document.id}, title={document.metadata.get('title', None)}, " + f"user_id={document.metadata.get('user_id', None)}, metadata={document.metadata} " + f"into {iteration} extractions in t={time.time() - t0:.2f} seconds." ) - iteration += 1 - - logger.debug( - f"Parsed document with id={document.id}, title={document.metadata.get('title', None)}, " - f"user_id={document.metadata.get('user_id', None)}, metadata={document.metadata} " - f"into {iteration} extractions in t={time.time() - t0:.2f} seconds." - ) def get_parser_for_document_type(self, doc_type: DocumentType) -> Any: return self.parsers.get(doc_type) diff --git a/py/core/providers/parsing/unstructured_parsing.py b/py/core/providers/parsing/unstructured_parsing.py index 91715b716..3b5393e30 100644 --- a/py/core/providers/parsing/unstructured_parsing.py +++ b/py/core/providers/parsing/unstructured_parsing.py @@ -1,6 +1,5 @@ -import asyncio +# TODO - cleanup type issues in this file that relate to `bytes` import base64 -import json import logging import os import time @@ -9,12 +8,12 @@ from typing import Any, AsyncGenerator import httpx -from pydantic import BaseModel from unstructured_client import UnstructuredClient from unstructured_client.models import operations, shared from core import parsers from core.base import ( + AsyncParser, Document, DocumentExtraction, DocumentType, @@ -22,7 +21,7 @@ ParsingProvider, generate_id_from_label, ) -from core.base.abstractions.base import R2RSerializable +from core.base.abstractions import R2RSerializable logger = logging.getLogger(__name__) @@ -52,7 +51,7 @@ class UnstructuredParsingProvider(ParsingProvider): DocumentType.PNG: [parsers.ImageParser], DocumentType.SVG: [parsers.ImageParser], DocumentType.MP3: [parsers.AudioParser], - DocumentType.MP4: [parsers.MovieParser], + # DocumentType.MP4: [parsers.MovieParser], } IMAGE_TYPES = { @@ -64,6 +63,8 @@ class UnstructuredParsingProvider(ParsingProvider): } def __init__(self, use_api: bool, config: ParsingConfig): + super().__init__(config) + self.config: ParsingConfig = config if config.excluded_parsers: logger.warning( "Excluded parsers are not supported by the unstructured parsing provider." @@ -104,7 +105,7 @@ def __init__(self, use_api: bool, config: ParsingConfig): self.client = httpx.AsyncClient() super().__init__(config) - self.parsers = {} + self.parsers: dict[DocumentType, AsyncParser] = {} self._initialize_parsers() def _initialize_parsers(self): @@ -126,19 +127,19 @@ async def parse_fallback( self, file_content: bytes, document: Document, chunk_size: int ) -> AsyncGenerator[FallbackElement, None]: - texts = self.parsers[document.type].ingest( + texts = self.parsers[document.type].ingest( # type: ignore file_content, chunk_size=chunk_size ) chunk_id = 0 - async for text in texts: + async for text in texts: # type: ignore if text and text != "": yield FallbackElement( text=text, metadata={"chunk_id": chunk_id} ) chunk_id += 1 - async def parse( + async def parse( # type: ignore self, file_content: bytes, document: Document ) -> AsyncGenerator[DocumentExtraction, None]: @@ -160,15 +161,14 @@ async def parse( logger.info( f"Parsing {document.type}: {document.id} with unstructured" ) - if isinstance(file_content, bytes): - file_content = BytesIO(file_content) + file_content = BytesIO(file_content) # type: ignore # TODO - Include check on excluded parsers here. if self.use_api: logger.info(f"Using API to parse document {document.id}") files = self.shared.Files( - content=file_content.read(), + content=file_content.read(), # type: ignore file_name=document.metadata.get("title", "unknown_file"), ) @@ -178,15 +178,15 @@ async def parse( **self.config.chunking_config.extra_fields, ) ) - elements = self.client.general.partition(req) - elements = list(elements.elements) + elements = self.client.general.partition(req) # type: ignore + elements = list(elements.elements) # type: ignore else: logger.info( f"Using local unstructured fastapi server to parse document {document.id}" ) # Base64 encode the file content - encoded_content = base64.b64encode(file_content.read()).decode( + encoded_content = base64.b64encode(file_content.read()).decode( # type: ignore "utf-8" ) @@ -194,7 +194,7 @@ async def parse( f"Sending a request to {self.local_unstructured_url}/partition" ) - elements = await self.client.post( + response = await self.client.post( f"{self.local_unstructured_url}/partition", json={ "file_content": encoded_content, # Use encoded string @@ -203,26 +203,31 @@ async def parse( timeout=300, # Adjust timeout as needed ) - elements = elements.json() - elements = elements["elements"] + elements = response.json().get("elements", []) iteration = 0 # if there are no chunks for iteration, element in enumerate(elements): - if not isinstance(element, dict): - element = element.to_dict() - - if element.get("text", "") == "": - continue - - metadata = copy(document.metadata) - for key, value in element.items(): - if key == "text": - text = value - elif key == "metadata": - for k, v in value.items(): - if k not in metadata: - if k != "orig_elements": - metadata[f"unstructured_{k}"] = v + if isinstance(element, FallbackElement): + text = element.text + metadata = copy(document.metadata) + metadata.update(element.metadata) + else: + element_dict = ( + element.to_dict() + if not isinstance(element, dict) + else element + ) + text = element_dict.get("text", "") + if text == "": + continue + + metadata = copy(document.metadata) + for key, value in element_dict.items(): + if key == "metadata": + for k, v in value.items(): + if k not in metadata: + if k != "orig_elements": + metadata[f"unstructured_{k}"] = v # indicate that the document was chunked using unstructured # nullifies the need for chunking in the pipeline diff --git a/py/core/providers/prompts/defaults/graphrag_community_reports.yaml b/py/core/providers/prompts/defaults/graphrag_community_reports.yaml deleted file mode 100644 index 9d57b92be..000000000 --- a/py/core/providers/prompts/defaults/graphrag_community_reports.yaml +++ /dev/null @@ -1,150 +0,0 @@ -graphrag_community_reports: - template: | - You are an AI assistant that helps a human analyst to perform general information discovery. Information discovery is the process of identifying and assessing relevant information associated with certain entities (e.g., organizations and individuals) within a network. - - # Goal - Write a comprehensive report of a community, given a list of entities that belong to the community as well as their relationships and optional associated claims. The report will be used to inform decision-makers about information associated with the community and their potential impact. The content of this report includes an overview of the community's key entities, their legal compliance, technical capabilities, reputation, and noteworthy claims. - - # Report Structure - - The report should include the following sections: - - - TITLE: community's name that represents its key entities - title should be short but specific. When possible, include representative named entities in the title. - - SUMMARY: An executive summary of the community's overall structure, how its entities are related to each other, and significant information associated with its entities. - - IMPACT SEVERITY RATING: a float score between 0-10 that represents the severity of IMPACT posed by entities within the community. IMPACT is the scored importance of a community. - - RATING EXPLANATION: Give a single sentence explanation of the IMPACT severity rating. - - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each insight should have a short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive. - - Return output as a well-formed JSON-formatted string with the following format: - {{ - "title": , - "summary": , - "rating": , - "rating_explanation": , - "findings": [ - {{ - "summary":, - "explanation": - }}, - {{ - "summary":, - "explanation": - }} - ] - }} - - # Grounding Rules - - Points supported by data should list their data references as follows: - - "This is an example sentence supported by multiple data references [Data: (record ids); (record ids)]." - - Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. - - For example: - "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23); Claims (7, 2, 34, 64, 46, +more)]." - - where 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record. - - Do not include information where the supporting evidence for it is not provided. - - - # Example Input - ----------- - Text: - - Entities - - id,entity,description - 5,VERDANT OASIS PLAZA,Verdant Oasis Plaza is the location of the Unity March - 6,HARMONY ASSEMBLY,Harmony Assembly is an organization that is holding a march at Verdant Oasis Plaza - - Relationships - - id,source,target,description - 37,VERDANT OASIS PLAZA,UNITY MARCH,Verdant Oasis Plaza is the location of the Unity March - 38,VERDANT OASIS PLAZA,HARMONY ASSEMBLY,Harmony Assembly is holding a march at Verdant Oasis Plaza - 39,VERDANT OASIS PLAZA,UNITY MARCH,The Unity March is taking place at Verdant Oasis Plaza - 40,VERDANT OASIS PLAZA,TRIBUNE SPOTLIGHT,Tribune Spotlight is reporting on the Unity march taking place at Verdant Oasis Plaza - 41,VERDANT OASIS PLAZA,BAILEY ASADI,Bailey Asadi is speaking at Verdant Oasis Plaza about the march - 43,HARMONY ASSEMBLY,UNITY MARCH,Harmony Assembly is organizing the Unity March - - Output: - {{ - "title": "Verdant Oasis Plaza and Unity March", - "summary": "The community revolves around the Verdant Oasis Plaza, which is the location of the Unity March. The plaza has relationships with the Harmony Assembly, Unity March, and Tribune Spotlight, all of which are associated with the march event.", - "rating": 5.0, - "rating_explanation": "The impact severity rating is moderate due to the potential for unrest or conflict during the Unity March.", - "findings": [ - {{ - "summary": "Verdant Oasis Plaza as the central location", - "explanation": "Verdant Oasis Plaza is the central entity in this community, serving as the location for the Unity March. This plaza is the common link between all other entities, suggesting its significance in the community. The plaza's association with the march could potentially lead to issues such as public disorder or conflict, depending on the nature of the march and the reactions it provokes. [Data: Entities (5), Relationships (37, 38, 39, 40, 41,+more)]" - }}, - {{ - "summary": "Harmony Assembly's role in the community", - "explanation": "Harmony Assembly is another key entity in this community, being the organizer of the march at Verdant Oasis Plaza. The nature of Harmony Assembly and its march could be a potential source of threat, depending on their objectives and the reactions they provoke. The relationship between Harmony Assembly and the plaza is crucial in understanding the dynamics of this community. [Data: Entities(6), Relationships (38, 43)]" - }}, - {{ - "summary": "Unity March as a significant event", - "explanation": "The Unity March is a significant event taking place at Verdant Oasis Plaza. This event is a key factor in the community's dynamics and could be a potential source of threat, depending on the nature of the march and the reactions it provokes. The relationship between the march and the plaza is crucial in understanding the dynamics of this community. [Data: Relationships (39)]" - }}, - {{ - "summary": "Role of Tribune Spotlight", - "explanation": "Tribune Spotlight is reporting on the Unity March taking place in Verdant Oasis Plaza. This suggests that the event has attracted media attention, which could amplify its impact on the community. The role of Tribune Spotlight could be significant in shaping public perception of the event and the entities involved. [Data: Relationships (40)]" - }} - ] - }} - - - # Real Data - - Use the following text for your answer. Do not make anything up in your answer. - - Text: - {input_text} - - The report should include the following sections: - - - TITLE: community's name that represents its key entities - title should be short but specific. When possible, include representative named entities in the title. - - SUMMARY: An executive summary of the community's overall structure, how its entities are related to each other, and significant information associated with its entities. - - IMPACT SEVERITY RATING: a float score between 0-10 that represents the severity of IMPACT posed by entities within the community. IMPACT is the scored importance of a community. - - RATING EXPLANATION: Give a single sentence explanation of the IMPACT severity rating. - - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each insight should have a short summary followed by multiple paragraphs of explanatory text grounded according to the grounding rules below. Be comprehensive. - - Return output as a well-formed JSON-formatted string with the following format: - {{ - "title": , - "summary": , - "rating": , - "rating_explanation": , - "findings": [ - {{ - "summary":, - "explanation": - }}, - {{ - "summary":, - "explanation": - }} - ] - }} - - # Grounding Rules - - Points supported by data should list their data references as follows: - - "This is an example sentence supported by multiple data references [Data: (record ids); (record ids)]." - - Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. - - For example: - "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23); Claims (7, 2, 34, 64, 46, +more)]." - - where 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record. - - Do not include information where the supporting evidence for it is not provided. - - Output:""" - - input_types: - input_text: str diff --git a/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml b/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml new file mode 100644 index 000000000..87e8dbc69 --- /dev/null +++ b/py/core/providers/prompts/defaults/graphrag_community_reports_prompt.yaml @@ -0,0 +1,95 @@ +graphrag_community_reports_prompt: + template: | + You are an AI assistant that helps a human analyst perform information discovery within the ecosystem of Y Combinator startups. Information discovery is the process of identifying and assessing relevant information associated with certain startups and their interconnections within the Y Combinator network. + + # Goal + Write a comprehensive report of a Y Combinator community, given a list of startups that belong to the community, their relationships, and optional associated claims. The report will be used to inform decision-makers about information related to the startups and their potential impact. The content of this report includes an overview of the community's key startups, their funding status, technical capabilities, reputation, and noteworthy claims. + + # Report Structure + + The report should include the following sections: + + - TITLE: The community's name that represents its key startups - the title should be short but specific. When possible, include representative startup names in the title. + - SUMMARY: An executive summary of the community's overall structure, how its startups are related to each other, and significant information associated with its startups. + - IMPACT SEVERITY RATING: A float score between 0-10 that represents the severity of IMPACT posed by startups within the community. IMPACT is the scored importance of the community. + - RATING EXPLANATION: Provide a single sentence explanation of the IMPACT severity rating. + - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each finding should be a comprehensive statement grounded according to the grounding rules below. + + Return output as a well-formed JSON-formatted string with the following format: + ```json + {{ + "title": , + "summary": , + "rating": , + "rating_explanation": , + "findings": [ + "", + "", + "", + "", + "" + // Additional findings... + ] + }} + ``` + + # Grounding Rules + + Points supported by data should list their data references as follows: + + "This is an example sentence supported by multiple data references [Data: (record ids); (record ids)]." + + Do not list more than 5 record ids in a single reference. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. + + For example: + "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (1), Entities (5, 7); Relationships (23)." + + where 1, 5, 7, 23, 2, 34, 46, and 64 represent the id (not the index) of the relevant data record. + + Do not include information where the supporting evidence for it is not provided. + + # Example Input + ----------- + Text: + + Entities + + id,startup,description + 101,OpenAI,OpenAI is an AI research and deployment company. + 102,Stripe,Stripe is a technology company that builds economic infrastructure for the internet. + 103,Airbnb,Airbnb is an online marketplace for lodging and tourism experiences. + + Relationships + + id,source,target,description + 201,OpenAI,Stripe,OpenAI partnered with Stripe to integrate payment solutions. + 202,Stripe,Airbnb,Stripe provides payment processing services to Airbnb. + 203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service. + 204,Stripe,OpenAI,Stripe invested in OpenAI's latest funding round. + 205,Airbnb,Stripe,Airbnb and Stripe collaborate on expanding global payment options. + + Output: + {{ + "title": "OpenAI, Stripe, and Airbnb Community", + "summary": "The community comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.", + "rating": 7.5, + "rating_explanation": "The impact severity rating is high due to the significant influence these startups have on technology, finance, and the global economy.", + "findings": [ + "OpenAI stands out as a leader in artificial intelligence research and deployment within the Y Combinator community. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]", + "Stripe serves as a critical financial infrastructure provider, facilitating payment processing for startups like Airbnb and partnering with OpenAI to enhance payment solutions. Its strategic investments and collaborations underscore its importance in the Y Combinator ecosystem, enabling seamless financial transactions and supporting startup growth. [Data: Entities (102), Relationships (201, 202, 204, 205, +more)]", + "Airbnb leverages OpenAI's artificial intelligence tools to enhance its customer service capabilities, showcasing the practical application of AI in improving user experience. This integration highlights Airbnb's commitment to innovation and efficiency, positioning it as a forward-thinking leader within the community. [Data: Entities (103), Relationships (203, 205, +more)]", + "Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation within the Y Combinator community. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]", + "The collaboration between Airbnb and Stripe to expand global payment options demonstrates a commitment to scalability and accessibility in the Y Combinator ecosystem. This initiative is pivotal in enabling startups to reach a broader international market, thereby increasing their impact and revenue potential. [Data: Relationships (205)]" + ] + }} + + # Real Data + + Use the following text for your answer. Do not make anything up in your answer. + + Text: + {input_text} + + Output: + input_types: + input_text: str diff --git a/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml b/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml index 1b1f2bd8b..e4675268a 100644 --- a/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml +++ b/py/core/providers/prompts/defaults/graphrag_triplet_extraction_zero_shot.yaml @@ -22,17 +22,99 @@ graphrag_triplet_extraction_zero_shot: Format each relationship as ("relationship"$$$$$$$$$$$$$$$$$$$$) 3. Make sure that each entity has at least one relationship. If you are not able to find a relationship from the pair of entities above, first create a new entity based on the source entity description and then create a relationship connecting them. Again, make sure that each entity has at least one relationship. If you don't do this, you will be fired. - 3. When finished, output. - -Real Data- + 3. When finished, output in the format in as given in the examples below. Do not repeat the same entity or relationship multiple times. + + Example 1: + If the list is empty, extract all entities and relations. + Entity_types: + Relation_types: + Text: + The Verdantis's Central Institution is scheduled to meet on Monday and Thursday, with the institution planning to release its latest policy decision on Thursday at 1:30 p.m. PDT, followed by a press conference where Central Institution Chair Martin Smith will take questions. Investors expect the Market Strategy Committee to hold its benchmark interest rate steady in a range of 3.5%-3.75%. ###################### - text: {input} + Output: + ("entity"$$$$Central Institution$$$$Organization$$$$The central bank of Verdantis, responsible for monetary policy and setting interest rates) + ("entity"$$$$Martin Smith$$$$Person$$$$Chair of the Central Institution of Verdantis) + ("entity"$$$$Market Strategy Committee$$$$Organization$$$$Committee within the Central Institution that makes key decisions on monetary policy) + ("entity"$$$$Monday$$$$Time$$$$First meeting day of the Central Institution) + ("entity"$$$$Thursday$$$$Time$$$$Second meeting day of the Central Institution, when policy decisions are announced) + ("entity"$$$$1:30 PM PDT$$$$Time$$$$Scheduled time for the Central Institution's policy decision release on Thursday) + ("entity"$$$$Press Conference$$$$Event$$$$Media briefing held by the Central Institution following the policy decision release) + ("entity"$$$$Interest Rate$$$$Economic Concept$$$$Key monetary policy tool used by the Central Institution to influence the economy) + ("entity"$$$$3.5%-3.75%$$$$Economic Value$$$$Expected range for the benchmark interest rate) + ("relationship"$$$$Martin Smith$$$$Central Institution$$$$Chairs$$$$Martin Smith is the Chair of the Central Institution and will lead the press conference$$$$9) + ("relationship"$$$$Central Institution$$$$Press Conference$$$$Conducts$$$$The Central Institution conducts a press conference following its policy decision release$$$$9) + ("relationship"$$$$Market Strategy Committee$$$$Central Institution$$$$Part Of$$$$The Market Strategy Committee is a key decision-making body within the Central Institution$$$$9) + ("relationship"$$$$Market Strategy Committee$$$$Interest Rate$$$$Sets$$$$The Market Strategy Committee determines the benchmark interest rate$$$$9) + ("relationship"$$$$Central Institution$$$$Interest Rate$$$$Controls$$$$The Central Institution controls interest rates as part of its monetary policy$$$$9) + ("relationship"$$$$3.5%-3.75%$$$$Interest Rate$$$$Expected Range$$$$Investors anticipate the benchmark interest rate to remain within this range$$$$8) + ("relationship"$$$$Monday$$$$Central Institution$$$$Meeting Day$$$$The Central Institution holds its first meeting of the week on Monday$$$$7) + ("relationship"$$$$Thursday$$$$Central Institution$$$$Decision Day$$$$The Central Institution announces its policy decision on Thursday$$$$9) + ("relationship"$$$$1:30 PM PDT$$$$Central Institution$$$$Press Conference$$$$The policy decision release at 1:30 PM PDT is followed by the press conference$$$$8) + ###################### + Example 2: + If the list is empty, extract all entities and relations. + Entity_types: Organization + Relation_types: Formerly Owned By - Only extract entities of the given types. If this list is empty, extract all entities. - {entity_types} + Text: + TechGlobal's (TG) stock skyrocketed in its opening day on the Global Exchange Thursday. But IPO experts warn that the semiconductor corporation's debut on the public markets isn't indicative of how other newly listed companies may perform. - Only extract relations of the given types. If this list is empty, extract all relations. - {relation_types} + TechGlobal, a formerly public company, was taken private by Vision Holdings in 2014. The well-established chip designer says it powers 85% of premium smartphones. + ###################### + Output: + ("entity"$$$$TECHGLOBAL$$$$Organization$$$$TechGlobal is a stock now listed on the Global Exchange which powers 85% of premium smartphones) + ("entity"$$$$VISION HOLDINGS$$$$Organization$$$$Vision Holdings is a firm that previously owned TechGlobal) + ("relationship"$$$$TECHGLOBAL$$$$VISION HOLDINGS$$$$Formerly Owned By$$$$Vision Holdings formerly owned TechGlobal from 2014 until present$$$$5) + + ###################### + Example 3: + If the list is empty, extract all entities and relations. + Entity_types: Organization,Geo,Person + Relation_types: "" + Text: + Five Aurelians jailed for 8 years in Firuzabad and widely regarded as hostages are on their way home to Aurelia. + + The swap orchestrated by Quintara was finalized when $8bn of Firuzi funds were transferred to financial institutions in Krohaara, the capital of Quintara. + + The exchange initiated in Firuzabad's capital, Tiruzia, led to the four men and one woman, who are also Firuzi nationals, boarding a chartered flight to Krohaara. + + They were welcomed by senior Aurelian officials and are now on their way to Aurelia's capital, Cashion. + + The Aurelians include 39-year-old businessman Samuel Namara, who has been held in Tiruzia's Alhamia Prison, as well as journalist Durke Bataglani, 59, and environmentalist Meggie Tazbah, 53, who also holds Bratinas nationality. + ###################### + Output: + ("entity"$$$$FIRUZABAD$$$$Geo$$$$Firuzabad held Aurelians as hostages) + ("entity"$$$$AURELIA$$$$Geo$$$$Country seeking to release hostages) + ("entity"$$$$QUINTARA$$$$Geo$$$$Country that negotiated a swap of money in exchange for hostages) + ("entity"$$$$TIRUZIA$$$$Geo$$$$Capital of Firuzabad where the Aurelians were being held) + ("entity"$$$$KROHAARA$$$$Geo$$$$Capital city in Quintara) + ("entity"$$$$CASHION$$$$Geo$$$$Capital city in Aurelia) + ("entity"$$$$SAMUEL NAMARA$$$$Person$$$$Aurelian who spent time in Tiruzia's Alhamia Prison) + ("entity"$$$$ALHAMIA PRISON$$$$Geo$$$$Prison in Tiruzia) + ("entity"$$$$DURKE BATAGLANI$$$$Person$$$$Aurelian journalist who was held hostage) + ("entity"$$$$MEGGIE TAZBAH$$$$Person$$$$Bratinas national and environmentalist who was held hostage) + ("relationship"$$$$FIRUZABAD$$$$AURELIA$$$$Negotiated Hostage Exchange$$$$Firuzabad negotiated a hostage exchange with Aurelia$$$$2) + ("relationship"$$$$QUINTARA$$$$AURELIA$$$$Negotiated Hostage Exchange$$$$Quintara brokered the hostage exchange between Firuzabad and Aurelia$$$$2) + ("relationship"$$$$QUINTARA$$$$FIRUZABAD$$$$Negotiated Hostage Exchange$$$$Quintara brokered the hostage exchange between Firuzabad and Aurelia$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$ALHAMIA PRISON$$$$Held At Alhamia Prison$$$$Samuel Namara was a prisoner at Alhamia prison$$$$8) + ("relationship"$$$$SAMUEL NAMARA$$$$MEGGIE TAZBAH$$$$Exchanged Hostages$$$$Samuel Namara and Meggie Tazbah were exchanged in the same hostage release$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$DURKE BATAGLANI$$$$Exchanged Hostages$$$$Samuel Namara and Durke Bataglani were exchanged in the same hostage release$$$$2) + ("relationship"$$$$MEGGIE TAZBAH$$$$DURKE BATAGLANI$$$$Exchanged Hostages$$$$Meggie Tazbah and Durke Bataglani were exchanged in the same hostage release$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$FIRUZABAD$$$$Held As Hostage$$$$Samuel Namara was a hostage in Firuzabad$$$$2) + ("relationship"$$$$MEGGIE TAZBAH$$$$FIRUZABAD$$$$Held As Hostage$$$$Meggie Tazbah was a hostage in Firuzabad$$$$2) + ("relationship"$$$$DURKE BATAGLANI$$$$FIRUZABAD$$$$Held As Hostage$$$$Durke Bataglani was a hostage in Firuzabad$$$$2) + + -Real Data- + ###################### + If the list is empty, extract all entities and relations. + Entity_types: {entity_types} + Relation_types: {relation_types} + + Text: + {input} + ###################### + Output: input_types: max_knowledge_triples: int diff --git a/py/core/providers/prompts/r2r_prompts.py b/py/core/providers/prompts/r2r_prompts.py index 51e4e6d1c..0df76cfb8 100644 --- a/py/core/providers/prompts/r2r_prompts.py +++ b/py/core/providers/prompts/r2r_prompts.py @@ -7,18 +7,17 @@ import asyncpg import yaml -from core.base import Prompt, PromptConfig, PromptProvider +from core.base import DatabaseProvider, Prompt, PromptConfig, PromptProvider from core.base.utils import generate_id_from_label -from core.providers.database.postgres import PostgresDBProvider logger = logging.getLogger(__name__) class R2RPromptProvider(PromptProvider): - def __init__(self, config: PromptConfig, db_provider: PostgresDBProvider): + def __init__(self, config: PromptConfig, db_provider: DatabaseProvider): super().__init__(config) self.prompts: dict[str, Prompt] = {} - self.config = config + self.config: PromptConfig = config self.db_provider = db_provider self.pool: Optional[asyncpg.pool.Pool] = None # Initialize pool @@ -109,7 +108,7 @@ async def _load_prompts_from_database(self): results = await self.fetch_query(query) for row in results: ( - prompt_id, + _, name, template, input_types, diff --git a/py/core/telemetry/events.py b/py/core/telemetry/events.py index 70f192e29..0151a5fcc 100644 --- a/py/core/telemetry/events.py +++ b/py/core/telemetry/events.py @@ -1,5 +1,5 @@ import uuid -from typing import Any, Dict +from typing import Any, Dict, Optional class BaseTelemetryEvent: @@ -16,7 +16,10 @@ def __init__(self, user_id: str): class FeatureUsageEvent(BaseTelemetryEvent): def __init__( - self, user_id: str, feature: str, properties: Dict[str, Any] = None + self, + user_id: str, + feature: str, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "FeatureUsage", @@ -34,7 +37,7 @@ def __init__( user_id: str, endpoint: str, error_message: str, - properties: Dict[str, Any] = None, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "Error", @@ -49,7 +52,10 @@ def __init__( class RequestLatencyEvent(BaseTelemetryEvent): def __init__( - self, endpoint: str, latency: float, properties: Dict[str, Any] = None + self, + endpoint: str, + latency: float, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "RequestLatency", @@ -63,7 +69,10 @@ def __init__( class GeographicDistributionEvent(BaseTelemetryEvent): def __init__( - self, user_id: str, country: str, properties: Dict[str, Any] = None + self, + user_id: str, + country: str, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "GeographicDistribution", @@ -77,7 +86,10 @@ def __init__( class SessionDurationEvent(BaseTelemetryEvent): def __init__( - self, user_id: str, duration: float, properties: Dict[str, Any] = None + self, + user_id: str, + duration: float, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "SessionDuration", @@ -91,7 +103,10 @@ def __init__( class UserPathEvent(BaseTelemetryEvent): def __init__( - self, user_id: str, path: str, properties: Dict[str, Any] = None + self, + user_id: str, + path: str, + properties: Optional[Dict[str, Any]] = None, ): super().__init__( "UserPath", diff --git a/py/core/telemetry/posthog.py b/py/core/telemetry/posthog.py index 05bc5c3fb..200244ee8 100644 --- a/py/core/telemetry/posthog.py +++ b/py/core/telemetry/posthog.py @@ -3,7 +3,7 @@ import posthog -from core.telemetry.events import BaseTelemetryEvent +from core.telemetry.events import BaseTelemetryEvent # type: ignore logger = logging.getLogger(__name__) diff --git a/py/core/utils/__init__.py b/py/core/utils/__init__.py new file mode 100644 index 000000000..e43dc06c9 --- /dev/null +++ b/py/core/utils/__init__.py @@ -0,0 +1,34 @@ +from shared.utils.base_utils import ( + decrement_version, + format_entity_types, + format_relations, + format_search_results_for_llm, + format_search_results_for_stream, + generate_id_from_label, + generate_run_id, + generate_user_document_id, + increment_version, + run_pipeline, + to_async_generator, +) +from shared.utils.splitter.text import ( + RecursiveCharacterTextSplitter, + TextSplitter, +) + +__all__ = [ + "format_entity_types", + "format_relations", + "generate_id_from_label", + "format_search_results_for_stream", + "format_search_results_for_llm", + "generate_run_id", + "generate_user_document_id", + "increment_version", + "decrement_version", + "run_pipeline", + "to_async_generator", + # Text splitter + "RecursiveCharacterTextSplitter", + "TextSplitter", +] diff --git a/py/poetry.lock b/py/poetry.lock index f7c653424..bdf282345 100644 --- a/py/poetry.lock +++ b/py/poetry.lock @@ -1,4 +1,4 @@ -# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand. +# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand. [[package]] name = "aiohappyeyeballs" @@ -206,13 +206,13 @@ files = [ [[package]] name = "anyio" -version = "4.4.0" +version = "4.5.0" description = "High level compatibility layer for multiple asynchronous event loop implementations" optional = false python-versions = ">=3.8" files = [ - {file = "anyio-4.4.0-py3-none-any.whl", hash = "sha256:c1b2d8f46a8a812513012e1107cb0e68c17159a7a594208005a57dc776e1bdc7"}, - {file = "anyio-4.4.0.tar.gz", hash = "sha256:5aadc6a1bbb7cdb0bede386cac5e2940f5e2ff3aa20277e991cf028e0585ce94"}, + {file = "anyio-4.5.0-py3-none-any.whl", hash = "sha256:fdeb095b7cc5a5563175eedd926ec4ae55413bb4be5770c424af0ba46ccb4a78"}, + {file = "anyio-4.5.0.tar.gz", hash = "sha256:c5a275fe5ca0afd788001f58fca1e69e29ce706d746e317d660e21f70c530ef9"}, ] [package.dependencies] @@ -222,9 +222,9 @@ sniffio = ">=1.1" typing-extensions = {version = ">=4.1", markers = "python_version < \"3.11\""} [package.extras] -doc = ["Sphinx (>=7)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] -test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.17)"] -trio = ["trio (>=0.23)"] +doc = ["Sphinx (>=7.4,<8.0)", "packaging", "sphinx-autodoc-typehints (>=1.2.0)", "sphinx-rtd-theme"] +test = ["anyio[trio]", "coverage[toml] (>=7)", "exceptiongroup (>=1.2.0)", "hypothesis (>=4.0)", "psutil (>=5.9)", "pytest (>=7.0)", "pytest-mock (>=3.6.1)", "trustme", "uvloop (>=0.21.0b1)"] +trio = ["trio (>=0.26.1)"] [[package]] name = "apscheduler" @@ -476,17 +476,17 @@ uvloop = ["uvloop (>=0.15.2)"] [[package]] name = "boto3" -version = "1.35.19" +version = "1.35.24" description = "The AWS SDK for Python" optional = false python-versions = ">=3.8" files = [ - {file = "boto3-1.35.19-py3-none-any.whl", hash = "sha256:84b3fe1727945bc3cada832d969ddb3dc0d08fce1677064ca8bdc13a89c1a143"}, - {file = "boto3-1.35.19.tar.gz", hash = "sha256:9979fe674780a0b7100eae9156d74ee374cd1638a9f61c77277e3ce712f3e496"}, + {file = "boto3-1.35.24-py3-none-any.whl", hash = "sha256:97fcc1a14cbc759e4ba9535ced703a99fcf652c9c4b8dfcd06f292c80551684b"}, + {file = "boto3-1.35.24.tar.gz", hash = "sha256:be7807f30f26d6c0057e45cfd09dad5968e664488bf4f9138d0bb7a0f6d8ed40"}, ] [package.dependencies] -botocore = ">=1.35.19,<1.36.0" +botocore = ">=1.35.24,<1.36.0" jmespath = ">=0.7.1,<2.0.0" s3transfer = ">=0.10.0,<0.11.0" @@ -495,13 +495,13 @@ crt = ["botocore[crt] (>=1.21.0,<2.0a0)"] [[package]] name = "botocore" -version = "1.35.19" +version = "1.35.24" description = "Low-level, data-driven core of boto 3." optional = false python-versions = ">=3.8" files = [ - {file = "botocore-1.35.19-py3-none-any.whl", hash = "sha256:c83f7f0cacfe7c19b109b363ebfa8736e570d24922f16ed371681f58ebab44a9"}, - {file = "botocore-1.35.19.tar.gz", hash = "sha256:42d6d8db7250cbd7899f786f9861e02cab17dc238f64d6acb976098ed9809625"}, + {file = "botocore-1.35.24-py3-none-any.whl", hash = "sha256:eb9ccc068255cc3d24c36693fda6aec7786db05ae6c2b13bcba66dce6a13e2e3"}, + {file = "botocore-1.35.24.tar.gz", hash = "sha256:1e59b0f14f4890c4f70bd6a58a634b9464bed1c4c6171f87c8795d974ade614b"}, ] [package.dependencies] @@ -538,7 +538,7 @@ files = [ name = "cffi" version = "1.17.1" description = "Foreign Function Interface for Python calling C code." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "cffi-1.17.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:df8b1c11f177bc2313ec4b2d46baec87a5f3e71fc8b45dab2ee7cae86d9aba14"}, @@ -966,7 +966,7 @@ toml = ["tomli"] name = "cryptography" version = "43.0.1" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "cryptography-43.0.1-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:8385d98f6a3bf8bb2d65a73e17ed87a3ba84f6991c155691c51112075f9ffc5d"}, @@ -1102,7 +1102,7 @@ files = [ name = "distro" version = "1.9.0" description = "Distro - an OS platform information API" -optional = true +optional = false python-versions = ">=3.6" files = [ {file = "distro-1.9.0-py3-none-any.whl", hash = "sha256:7bffd925d65168f85027d8da9af6bddab658135b840670a223589bc0c8ef02b2"}, @@ -1164,13 +1164,13 @@ idna = ">=2.0.0" [[package]] name = "emoji" -version = "2.12.1" +version = "2.13.0" description = "Emoji for Python" optional = true python-versions = ">=3.7" files = [ - {file = "emoji-2.12.1-py3-none-any.whl", hash = "sha256:a00d62173bdadc2510967a381810101624a2f0986145b8da0cffa42e29430235"}, - {file = "emoji-2.12.1.tar.gz", hash = "sha256:4aa0488817691aa58d83764b6c209f8a27c0b3ab3f89d1b8dceca1a62e4973eb"}, + {file = "emoji-2.13.0-py3-none-any.whl", hash = "sha256:6c8027f02c448731fd5f13e38521e14594e48e9c5d2c3862f95d086d8aa84a7c"}, + {file = "emoji-2.13.0.tar.gz", hash = "sha256:e32e46a1b4445dffbc37cc82ea2d0dacb9323b857f14297eba7decaadfb4890e"}, ] [package.dependencies] @@ -1206,37 +1206,38 @@ test = ["pytest (>=6)"] [[package]] name = "fastapi" -version = "0.109.2" +version = "0.114.2" description = "FastAPI framework, high performance, easy to learn, fast to code, ready for production" optional = false python-versions = ">=3.8" files = [ - {file = "fastapi-0.109.2-py3-none-any.whl", hash = "sha256:2c9bab24667293b501cad8dd388c05240c850b58ec5876ee3283c47d6e1e3a4d"}, - {file = "fastapi-0.109.2.tar.gz", hash = "sha256:f3817eac96fe4f65a2ebb4baa000f394e55f5fccdaf7f75250804bc58f354f73"}, + {file = "fastapi-0.114.2-py3-none-any.whl", hash = "sha256:44474a22913057b1acb973ab90f4b671ba5200482e7622816d79105dcece1ac5"}, + {file = "fastapi-0.114.2.tar.gz", hash = "sha256:0adb148b62edb09e8c6eeefa3ea934e8f276dabc038c5a82989ea6346050c3da"}, ] [package.dependencies] pydantic = ">=1.7.4,<1.8 || >1.8,<1.8.1 || >1.8.1,<2.0.0 || >2.0.0,<2.0.1 || >2.0.1,<2.1.0 || >2.1.0,<3.0.0" -starlette = ">=0.36.3,<0.37.0" +starlette = ">=0.37.2,<0.39.0" typing-extensions = ">=4.8.0" [package.extras] -all = ["email-validator (>=2.0.0)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +all = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "itsdangerous (>=1.1.0)", "jinja2 (>=2.11.2)", "orjson (>=3.2.1)", "pydantic-extra-types (>=2.0.0)", "pydantic-settings (>=2.0.0)", "python-multipart (>=0.0.7)", "pyyaml (>=5.3.1)", "ujson (>=4.0.1,!=4.0.2,!=4.1.0,!=4.2.0,!=4.3.0,!=5.0.0,!=5.1.0)", "uvicorn[standard] (>=0.12.0)"] +standard = ["email-validator (>=2.0.0)", "fastapi-cli[standard] (>=0.0.5)", "httpx (>=0.23.0)", "jinja2 (>=2.11.2)", "python-multipart (>=0.0.7)", "uvicorn[standard] (>=0.12.0)"] [[package]] name = "filelock" -version = "3.16.0" +version = "3.16.1" description = "A platform independent file lock." optional = false python-versions = ">=3.8" files = [ - {file = "filelock-3.16.0-py3-none-any.whl", hash = "sha256:f6ed4c963184f4c84dd5557ce8fece759a3724b37b80c6c4f20a2f63a4dc6609"}, - {file = "filelock-3.16.0.tar.gz", hash = "sha256:81de9eb8453c769b63369f87f11131a7ab04e367f8d97ad39dc230daa07e3bec"}, + {file = "filelock-3.16.1-py3-none-any.whl", hash = "sha256:2082e5703d51fbf98ea75855d9d5527e33d8ff23099bec374a134febee6946b0"}, + {file = "filelock-3.16.1.tar.gz", hash = "sha256:c249fbfcd5db47e5e2d6d62198e565475ee65e4831e2561c8e313fa7eb961435"}, ] [package.extras] -docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4)"] -testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.1.1)", "pytest (>=8.3.2)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.3)"] +docs = ["furo (>=2024.8.6)", "sphinx (>=8.0.2)", "sphinx-autodoc-typehints (>=2.4.1)"] +testing = ["covdefaults (>=2.3)", "coverage (>=7.6.1)", "diff-cover (>=9.2)", "pytest (>=8.3.3)", "pytest-asyncio (>=0.24)", "pytest-cov (>=5)", "pytest-mock (>=3.14)", "pytest-timeout (>=2.3.1)", "virtualenv (>=20.26.4)"] typing = ["typing-extensions (>=4.12.2)"] [[package]] @@ -1510,13 +1511,13 @@ files = [ [[package]] name = "google-api-core" -version = "2.19.2" +version = "2.20.0" description = "Google API client core library" optional = true python-versions = ">=3.7" files = [ - {file = "google_api_core-2.19.2-py3-none-any.whl", hash = "sha256:53ec0258f2837dd53bbd3d3df50f5359281b3cc13f800c941dd15a9b5a415af4"}, - {file = "google_api_core-2.19.2.tar.gz", hash = "sha256:ca07de7e8aa1c98a8bfca9321890ad2340ef7f2eb136e558cee68f24b94b0a8f"}, + {file = "google_api_core-2.20.0-py3-none-any.whl", hash = "sha256:ef0591ef03c30bb83f79b3d0575c3f31219001fc9c5cf37024d08310aeffed8a"}, + {file = "google_api_core-2.20.0.tar.gz", hash = "sha256:f74dff1889ba291a4b76c5079df0711810e2d9da81abfdc99957bc961c1eb28f"}, ] [package.dependencies] @@ -1541,13 +1542,13 @@ grpcio-gcp = ["grpcio-gcp (>=0.2.2,<1.0.dev0)"] [[package]] name = "google-auth" -version = "2.34.0" +version = "2.35.0" description = "Google Authentication Library" optional = true python-versions = ">=3.7" files = [ - {file = "google_auth-2.34.0-py2.py3-none-any.whl", hash = "sha256:72fd4733b80b6d777dcde515628a9eb4a577339437012874ea286bca7261ee65"}, - {file = "google_auth-2.34.0.tar.gz", hash = "sha256:8eb87396435c19b20d32abd2f984e31c191a15284af72eb922f10e5bde9c04cc"}, + {file = "google_auth-2.35.0-py2.py3-none-any.whl", hash = "sha256:25df55f327ef021de8be50bad0dfd4a916ad0de96da86cd05661c9297723ad3f"}, + {file = "google_auth-2.35.0.tar.gz", hash = "sha256:f4c64ed4e01e8e8b646ef34c018f8bf3338df0c8e37d8b3bba40e7f574a3278a"}, ] [package.dependencies] @@ -1598,77 +1599,84 @@ grpc = ["grpcio (>=1.44.0,<2.0.0.dev0)"] [[package]] name = "greenlet" -version = "3.1.0" +version = "3.1.1" description = "Lightweight in-process concurrent programming" optional = true python-versions = ">=3.7" files = [ - {file = "greenlet-3.1.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:a814dc3100e8a046ff48faeaa909e80cdb358411a3d6dd5293158425c684eda8"}, - {file = "greenlet-3.1.0-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a771dc64fa44ebe58d65768d869fcfb9060169d203446c1d446e844b62bdfdca"}, - {file = "greenlet-3.1.0-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:0e49a65d25d7350cca2da15aac31b6f67a43d867448babf997fe83c7505f57bc"}, - {file = "greenlet-3.1.0-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2cd8518eade968bc52262d8c46727cfc0826ff4d552cf0430b8d65aaf50bb91d"}, - {file = "greenlet-3.1.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76dc19e660baea5c38e949455c1181bc018893f25372d10ffe24b3ed7341fb25"}, - {file = "greenlet-3.1.0-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:c0a5b1c22c82831f56f2f7ad9bbe4948879762fe0d59833a4a71f16e5fa0f682"}, - {file = "greenlet-3.1.0-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:2651dfb006f391bcb240635079a68a261b227a10a08af6349cba834a2141efa1"}, - {file = "greenlet-3.1.0-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:3e7e6ef1737a819819b1163116ad4b48d06cfdd40352d813bb14436024fcda99"}, - {file = "greenlet-3.1.0-cp310-cp310-win_amd64.whl", hash = "sha256:ffb08f2a1e59d38c7b8b9ac8083c9c8b9875f0955b1e9b9b9a965607a51f8e54"}, - {file = "greenlet-3.1.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:9730929375021ec90f6447bff4f7f5508faef1c02f399a1953870cdb78e0c345"}, - {file = "greenlet-3.1.0-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:713d450cf8e61854de9420fb7eea8ad228df4e27e7d4ed465de98c955d2b3fa6"}, - {file = "greenlet-3.1.0-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4c3446937be153718250fe421da548f973124189f18fe4575a0510b5c928f0cc"}, - {file = "greenlet-3.1.0-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1ddc7bcedeb47187be74208bc652d63d6b20cb24f4e596bd356092d8000da6d6"}, - {file = "greenlet-3.1.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:44151d7b81b9391ed759a2f2865bbe623ef00d648fed59363be2bbbd5154656f"}, - {file = "greenlet-3.1.0-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6cea1cca3be76c9483282dc7760ea1cc08a6ecec1f0b6ca0a94ea0d17432da19"}, - {file = "greenlet-3.1.0-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:619935a44f414274a2c08c9e74611965650b730eb4efe4b2270f91df5e4adf9a"}, - {file = "greenlet-3.1.0-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:221169d31cada333a0c7fd087b957c8f431c1dba202c3a58cf5a3583ed973e9b"}, - {file = "greenlet-3.1.0-cp311-cp311-win_amd64.whl", hash = "sha256:01059afb9b178606b4b6e92c3e710ea1635597c3537e44da69f4531e111dd5e9"}, - {file = "greenlet-3.1.0-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:24fc216ec7c8be9becba8b64a98a78f9cd057fd2dc75ae952ca94ed8a893bf27"}, - {file = "greenlet-3.1.0-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:3d07c28b85b350564bdff9f51c1c5007dfb2f389385d1bc23288de51134ca303"}, - {file = "greenlet-3.1.0-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:243a223c96a4246f8a30ea470c440fe9db1f5e444941ee3c3cd79df119b8eebf"}, - {file = "greenlet-3.1.0-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:26811df4dc81271033a7836bc20d12cd30938e6bd2e9437f56fa03da81b0f8fc"}, - {file = "greenlet-3.1.0-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c9d86401550b09a55410f32ceb5fe7efcd998bd2dad9e82521713cb148a4a15f"}, - {file = "greenlet-3.1.0-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:26d9c1c4f1748ccac0bae1dbb465fb1a795a75aba8af8ca871503019f4285e2a"}, - {file = "greenlet-3.1.0-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:cd468ec62257bb4544989402b19d795d2305eccb06cde5da0eb739b63dc04665"}, - {file = "greenlet-3.1.0-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:a53dfe8f82b715319e9953330fa5c8708b610d48b5c59f1316337302af5c0811"}, - {file = "greenlet-3.1.0-cp312-cp312-win_amd64.whl", hash = "sha256:28fe80a3eb673b2d5cc3b12eea468a5e5f4603c26aa34d88bf61bba82ceb2f9b"}, - {file = "greenlet-3.1.0-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:76b3e3976d2a452cba7aa9e453498ac72240d43030fdc6d538a72b87eaff52fd"}, - {file = "greenlet-3.1.0-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:655b21ffd37a96b1e78cc48bf254f5ea4b5b85efaf9e9e2a526b3c9309d660ca"}, - {file = "greenlet-3.1.0-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c6f4c2027689093775fd58ca2388d58789009116844432d920e9147f91acbe64"}, - {file = "greenlet-3.1.0-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:76e5064fd8e94c3f74d9fd69b02d99e3cdb8fc286ed49a1f10b256e59d0d3a0b"}, - {file = "greenlet-3.1.0-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6a4bf607f690f7987ab3291406e012cd8591a4f77aa54f29b890f9c331e84989"}, - {file = "greenlet-3.1.0-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:037d9ac99540ace9424cb9ea89f0accfaff4316f149520b4ae293eebc5bded17"}, - {file = "greenlet-3.1.0-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:90b5bbf05fe3d3ef697103850c2ce3374558f6fe40fd57c9fac1bf14903f50a5"}, - {file = "greenlet-3.1.0-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:726377bd60081172685c0ff46afbc600d064f01053190e4450857483c4d44484"}, - {file = "greenlet-3.1.0-cp313-cp313-win_amd64.whl", hash = "sha256:d46d5069e2eeda111d6f71970e341f4bd9aeeee92074e649ae263b834286ecc0"}, - {file = "greenlet-3.1.0-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:81eeec4403a7d7684b5812a8aaa626fa23b7d0848edb3a28d2eb3220daddcbd0"}, - {file = "greenlet-3.1.0-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:4a3dae7492d16e85ea6045fd11cb8e782b63eac8c8d520c3a92c02ac4573b0a6"}, - {file = "greenlet-3.1.0-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4b5ea3664eed571779403858d7cd0a9b0ebf50d57d2cdeafc7748e09ef8cd81a"}, - {file = "greenlet-3.1.0-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a22f4e26400f7f48faef2d69c20dc055a1f3043d330923f9abe08ea0aecc44df"}, - {file = "greenlet-3.1.0-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:13ff8c8e54a10472ce3b2a2da007f915175192f18e6495bad50486e87c7f6637"}, - {file = "greenlet-3.1.0-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:f9671e7282d8c6fcabc32c0fb8d7c0ea8894ae85cee89c9aadc2d7129e1a9954"}, - {file = "greenlet-3.1.0-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:184258372ae9e1e9bddce6f187967f2e08ecd16906557c4320e3ba88a93438c3"}, - {file = "greenlet-3.1.0-cp37-cp37m-win32.whl", hash = "sha256:a0409bc18a9f85321399c29baf93545152d74a49d92f2f55302f122007cfda00"}, - {file = "greenlet-3.1.0-cp37-cp37m-win_amd64.whl", hash = "sha256:9eb4a1d7399b9f3c7ac68ae6baa6be5f9195d1d08c9ddc45ad559aa6b556bce6"}, - {file = "greenlet-3.1.0-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:a8870983af660798dc1b529e1fd6f1cefd94e45135a32e58bd70edd694540f33"}, - {file = "greenlet-3.1.0-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cfcfb73aed40f550a57ea904629bdaf2e562c68fa1164fa4588e752af6efdc3f"}, - {file = "greenlet-3.1.0-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f9482c2ed414781c0af0b35d9d575226da6b728bd1a720668fa05837184965b7"}, - {file = "greenlet-3.1.0-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d58ec349e0c2c0bc6669bf2cd4982d2f93bf067860d23a0ea1fe677b0f0b1e09"}, - {file = "greenlet-3.1.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:dd65695a8df1233309b701dec2539cc4b11e97d4fcc0f4185b4a12ce54db0491"}, - {file = "greenlet-3.1.0-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:665b21e95bc0fce5cab03b2e1d90ba9c66c510f1bb5fdc864f3a377d0f553f6b"}, - {file = "greenlet-3.1.0-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:d3c59a06c2c28a81a026ff11fbf012081ea34fb9b7052f2ed0366e14896f0a1d"}, - {file = "greenlet-3.1.0-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5415b9494ff6240b09af06b91a375731febe0090218e2898d2b85f9b92abcda0"}, - {file = "greenlet-3.1.0-cp38-cp38-win32.whl", hash = "sha256:1544b8dd090b494c55e60c4ff46e238be44fdc472d2589e943c241e0169bcea2"}, - {file = "greenlet-3.1.0-cp38-cp38-win_amd64.whl", hash = "sha256:7f346d24d74c00b6730440f5eb8ec3fe5774ca8d1c9574e8e57c8671bb51b910"}, - {file = "greenlet-3.1.0-cp39-cp39-macosx_10_9_universal2.whl", hash = "sha256:db1b3ccb93488328c74e97ff888604a8b95ae4f35f4f56677ca57a4fc3a4220b"}, - {file = "greenlet-3.1.0-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:44cd313629ded43bb3b98737bba2f3e2c2c8679b55ea29ed73daea6b755fe8e7"}, - {file = "greenlet-3.1.0-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fad7a051e07f64e297e6e8399b4d6a3bdcad3d7297409e9a06ef8cbccff4f501"}, - {file = "greenlet-3.1.0-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:c3967dcc1cd2ea61b08b0b276659242cbce5caca39e7cbc02408222fb9e6ff39"}, - {file = "greenlet-3.1.0-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:d45b75b0f3fd8d99f62eb7908cfa6d727b7ed190737dec7fe46d993da550b81a"}, - {file = "greenlet-3.1.0-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:2d004db911ed7b6218ec5c5bfe4cf70ae8aa2223dffbb5b3c69e342bb253cb28"}, - {file = "greenlet-3.1.0-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:b9505a0c8579899057cbefd4ec34d865ab99852baf1ff33a9481eb3924e2da0b"}, - {file = "greenlet-3.1.0-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:5fd6e94593f6f9714dbad1aaba734b5ec04593374fa6638df61592055868f8b8"}, - {file = "greenlet-3.1.0-cp39-cp39-win32.whl", hash = "sha256:d0dd943282231480aad5f50f89bdf26690c995e8ff555f26d8a5b9887b559bcc"}, - {file = "greenlet-3.1.0-cp39-cp39-win_amd64.whl", hash = "sha256:ac0adfdb3a21dc2a24ed728b61e72440d297d0fd3a577389df566651fcd08f97"}, - {file = "greenlet-3.1.0.tar.gz", hash = "sha256:b395121e9bbe8d02a750886f108d540abe66075e61e22f7353d9acb0b81be0f0"}, + {file = "greenlet-3.1.1-cp310-cp310-macosx_11_0_universal2.whl", hash = "sha256:0bbae94a29c9e5c7e4a2b7f0aae5c17e8e90acbfd3bf6270eeba60c39fce3563"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0fde093fb93f35ca72a556cf72c92ea3ebfda3d79fc35bb19fbe685853869a83"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:36b89d13c49216cadb828db8dfa6ce86bbbc476a82d3a6c397f0efae0525bdd0"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94b6150a85e1b33b40b1464a3f9988dcc5251d6ed06842abff82e42632fac120"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93147c513fac16385d1036b7e5b102c7fbbdb163d556b791f0f11eada7ba65dc"}, + {file = "greenlet-3.1.1-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:da7a9bff22ce038e19bf62c4dd1ec8391062878710ded0a845bcf47cc0200617"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:b2795058c23988728eec1f36a4e5e4ebad22f8320c85f3587b539b9ac84128d7"}, + {file = "greenlet-3.1.1-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:ed10eac5830befbdd0c32f83e8aa6288361597550ba669b04c48f0f9a2c843c6"}, + {file = "greenlet-3.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:77c386de38a60d1dfb8e55b8c1101d68c79dfdd25c7095d51fec2dd800892b80"}, + {file = "greenlet-3.1.1-cp311-cp311-macosx_11_0_universal2.whl", hash = "sha256:e4d333e558953648ca09d64f13e6d8f0523fa705f51cae3f03b5983489958c70"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09fc016b73c94e98e29af67ab7b9a879c307c6731a2c9da0db5a7d9b7edd1159"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d5e975ca70269d66d17dd995dafc06f1b06e8cb1ec1e9ed54c1d1e4a7c4cf26e"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:3b2813dc3de8c1ee3f924e4d4227999285fd335d1bcc0d2be6dc3f1f6a318ec1"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e347b3bfcf985a05e8c0b7d462ba6f15b1ee1c909e2dcad795e49e91b152c383"}, + {file = "greenlet-3.1.1-cp311-cp311-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:9e8f8c9cb53cdac7ba9793c276acd90168f416b9ce36799b9b885790f8ad6c0a"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:62ee94988d6b4722ce0028644418d93a52429e977d742ca2ccbe1c4f4a792511"}, + {file = "greenlet-3.1.1-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:1776fd7f989fc6b8d8c8cb8da1f6b82c5814957264d1f6cf818d475ec2bf6395"}, + {file = "greenlet-3.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:48ca08c771c268a768087b408658e216133aecd835c0ded47ce955381105ba39"}, + {file = "greenlet-3.1.1-cp312-cp312-macosx_11_0_universal2.whl", hash = "sha256:4afe7ea89de619adc868e087b4d2359282058479d7cfb94970adf4b55284574d"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f406b22b7c9a9b4f8aa9d2ab13d6ae0ac3e85c9a809bd590ad53fed2bf70dc79"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c3a701fe5a9695b238503ce5bbe8218e03c3bcccf7e204e455e7462d770268aa"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2846930c65b47d70b9d178e89c7e1a69c95c1f68ea5aa0a58646b7a96df12441"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99cfaa2110534e2cf3ba31a7abcac9d328d1d9f1b95beede58294a60348fba36"}, + {file = "greenlet-3.1.1-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:1443279c19fca463fc33e65ef2a935a5b09bb90f978beab37729e1c3c6c25fe9"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:b7cede291382a78f7bb5f04a529cb18e068dd29e0fb27376074b6d0317bf4dd0"}, + {file = "greenlet-3.1.1-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:23f20bb60ae298d7d8656c6ec6db134bca379ecefadb0b19ce6f19d1f232a942"}, + {file = "greenlet-3.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:7124e16b4c55d417577c2077be379514321916d5790fa287c9ed6f23bd2ffd01"}, + {file = "greenlet-3.1.1-cp313-cp313-macosx_11_0_universal2.whl", hash = "sha256:05175c27cb459dcfc05d026c4232f9de8913ed006d42713cb8a5137bd49375f1"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:935e943ec47c4afab8965954bf49bfa639c05d4ccf9ef6e924188f762145c0ff"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:667a9706c970cb552ede35aee17339a18e8f2a87a51fba2ed39ceeeb1004798a"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:b8a678974d1f3aa55f6cc34dc480169d58f2e6d8958895d68845fa4ab566509e"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:efc0f674aa41b92da8c49e0346318c6075d734994c3c4e4430b1c3f853e498e4"}, + {file = "greenlet-3.1.1-cp313-cp313-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:0153404a4bb921f0ff1abeb5ce8a5131da56b953eda6e14b88dc6bbc04d2049e"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:275f72decf9932639c1c6dd1013a1bc266438eb32710016a1c742df5da6e60a1"}, + {file = "greenlet-3.1.1-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:c4aab7f6381f38a4b42f269057aee279ab0fc7bf2e929e3d4abfae97b682a12c"}, + {file = "greenlet-3.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:b42703b1cf69f2aa1df7d1030b9d77d3e584a70755674d60e710f0af570f3761"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:f1695e76146579f8c06c1509c7ce4dfe0706f49c6831a817ac04eebb2fd02011"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:7876452af029456b3f3549b696bb36a06db7c90747740c5302f74a9e9fa14b13"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:4ead44c85f8ab905852d3de8d86f6f8baf77109f9da589cb4fa142bd3b57b475"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8320f64b777d00dd7ccdade271eaf0cad6636343293a25074cc5566160e4de7b"}, + {file = "greenlet-3.1.1-cp313-cp313t-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:6510bf84a6b643dabba74d3049ead221257603a253d0a9873f55f6a59a65f822"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_aarch64.whl", hash = "sha256:04b013dc07c96f83134b1e99888e7a79979f1a247e2a9f59697fa14b5862ed01"}, + {file = "greenlet-3.1.1-cp313-cp313t-musllinux_1_1_x86_64.whl", hash = "sha256:411f015496fec93c1c8cd4e5238da364e1da7a124bcb293f085bf2860c32c6f6"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:47da355d8687fd65240c364c90a31569a133b7b60de111c255ef5b606f2ae291"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:98884ecf2ffb7d7fe6bd517e8eb99d31ff7855a840fa6d0d63cd07c037f6a981"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f1d4aeb8891338e60d1ab6127af1fe45def5259def8094b9c7e34690c8858803"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:db32b5348615a04b82240cc67983cb315309e88d444a288934ee6ceaebcad6cc"}, + {file = "greenlet-3.1.1-cp37-cp37m-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:dcc62f31eae24de7f8dce72134c8651c58000d3b1868e01392baea7c32c247de"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_aarch64.whl", hash = "sha256:1d3755bcb2e02de341c55b4fca7a745a24a9e7212ac953f6b3a48d117d7257aa"}, + {file = "greenlet-3.1.1-cp37-cp37m-musllinux_1_1_x86_64.whl", hash = "sha256:b8da394b34370874b4572676f36acabac172602abf054cbc4ac910219f3340af"}, + {file = "greenlet-3.1.1-cp37-cp37m-win32.whl", hash = "sha256:a0dfc6c143b519113354e780a50381508139b07d2177cb6ad6a08278ec655798"}, + {file = "greenlet-3.1.1-cp37-cp37m-win_amd64.whl", hash = "sha256:54558ea205654b50c438029505def3834e80f0869a70fb15b871c29b4575ddef"}, + {file = "greenlet-3.1.1-cp38-cp38-macosx_11_0_universal2.whl", hash = "sha256:346bed03fe47414091be4ad44786d1bd8bef0c3fcad6ed3dee074a032ab408a9"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dfc59d69fc48664bc693842bd57acfdd490acafda1ab52c7836e3fc75c90a111"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d21e10da6ec19b457b82636209cbe2331ff4306b54d06fa04b7c138ba18c8a81"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:37b9de5a96111fc15418819ab4c4432e4f3c2ede61e660b1e33971eba26ef9ba"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ef9ea3f137e5711f0dbe5f9263e8c009b7069d8a1acea822bd5e9dae0ae49c8"}, + {file = "greenlet-3.1.1-cp38-cp38-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:85f3ff71e2e60bd4b4932a043fbbe0f499e263c628390b285cb599154a3b03b1"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:95ffcf719966dd7c453f908e208e14cde192e09fde6c7186c8f1896ef778d8cd"}, + {file = "greenlet-3.1.1-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:03a088b9de532cbfe2ba2034b2b85e82df37874681e8c470d6fb2f8c04d7e4b7"}, + {file = "greenlet-3.1.1-cp38-cp38-win32.whl", hash = "sha256:8b8b36671f10ba80e159378df9c4f15c14098c4fd73a36b9ad715f057272fbef"}, + {file = "greenlet-3.1.1-cp38-cp38-win_amd64.whl", hash = "sha256:7017b2be767b9d43cc31416aba48aab0d2309ee31b4dbf10a1d38fb7972bdf9d"}, + {file = "greenlet-3.1.1-cp39-cp39-macosx_11_0_universal2.whl", hash = "sha256:396979749bd95f018296af156201d6211240e7a23090f50a8d5d18c370084dc3"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ca9d0ff5ad43e785350894d97e13633a66e2b50000e8a183a50a88d834752d42"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:f6ff3b14f2df4c41660a7dec01045a045653998784bf8cfcb5a525bdffffbc8f"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:94ebba31df2aa506d7b14866fed00ac141a867e63143fe5bca82a8e503b36437"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:73aaad12ac0ff500f62cebed98d8789198ea0e6f233421059fa68a5aa7220145"}, + {file = "greenlet-3.1.1-cp39-cp39-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:63e4844797b975b9af3a3fb8f7866ff08775f5426925e1e0bbcfe7932059a12c"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:7939aa3ca7d2a1593596e7ac6d59391ff30281ef280d8632fa03d81f7c5f955e"}, + {file = "greenlet-3.1.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:d0028e725ee18175c6e422797c407874da24381ce0690d6b9396c204c7f7276e"}, + {file = "greenlet-3.1.1-cp39-cp39-win32.whl", hash = "sha256:5e06afd14cbaf9e00899fae69b24a32f2196c19de08fcb9f4779dd4f004e5e7c"}, + {file = "greenlet-3.1.1-cp39-cp39-win_amd64.whl", hash = "sha256:3319aa75e0e0639bc15ff54ca327e8dc7a6fe404003496e3c6925cd3142e0e22"}, + {file = "greenlet-3.1.1.tar.gz", hash = "sha256:4ce3ac6cdb6adf7946475d7ef31777c26d94bccc377e070a7986bd2d5c515467"}, ] [package.extras] @@ -1727,6 +1735,7 @@ files = [ {file = "grpcio-1.66.1-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:9c9bebc6627873ec27a70fc800f6083a13c70b23a5564788754b9ee52c5aef6c"}, {file = "grpcio-1.66.1-cp39-cp39-win32.whl", hash = "sha256:30a1c2cf9390c894c90bbc70147f2372130ad189cffef161f0432d0157973f45"}, {file = "grpcio-1.66.1-cp39-cp39-win_amd64.whl", hash = "sha256:17663598aadbedc3cacd7bbde432f541c8e07d2496564e22b214b22c7523dac8"}, + {file = "grpcio-1.66.1.tar.gz", hash = "sha256:35334f9c9745add3e357e3372756fd32d925bd52c41da97f4dfdafbde0bf0ee2"}, ] [package.extras] @@ -1843,13 +1852,13 @@ files = [ [[package]] name = "hatchet-sdk" -version = "0.36.22" +version = "0.36.24" description = "" optional = false python-versions = "<4.0,>=3.10" files = [ - {file = "hatchet_sdk-0.36.22-py3-none-any.whl", hash = "sha256:ec69de6a6308ffbf730e1b6361b00224e3f2932b8070bd9c59987ea2d79cfa8d"}, - {file = "hatchet_sdk-0.36.22.tar.gz", hash = "sha256:e8ead3b30c21b8bf3f13ae799125eb8a84e612caf8f50a2eec395946939bf650"}, + {file = "hatchet_sdk-0.36.24-py3-none-any.whl", hash = "sha256:0d633e7210d541ae3278717f451ad6af1a579b57b7cc0fcb2786536ac65162ed"}, + {file = "hatchet_sdk-0.36.24.tar.gz", hash = "sha256:13ad42dec15a1c7da3dbba616e5f8ddc54b922e3a8420203f22533650ed1fe0d"}, ] [package.dependencies] @@ -1916,13 +1925,13 @@ zstd = ["zstandard (>=0.18.0)"] [[package]] name = "huggingface-hub" -version = "0.24.7" +version = "0.25.0" description = "Client library to download and publish models, datasets and other repos on the huggingface.co hub" optional = true python-versions = ">=3.8.0" files = [ - {file = "huggingface_hub-0.24.7-py3-none-any.whl", hash = "sha256:a212c555324c8a7b1ffdd07266bb7e7d69ca71aa238d27b7842d65e9a26ac3e5"}, - {file = "huggingface_hub-0.24.7.tar.gz", hash = "sha256:0ad8fb756e2831da0ac0491175b960f341fe06ebcf80ed6f8728313f95fc0207"}, + {file = "huggingface_hub-0.25.0-py3-none-any.whl", hash = "sha256:e2f357b35d72d5012cfd127108c4e14abcd61ba4ebc90a5a374dc2456cb34e12"}, + {file = "huggingface_hub-0.25.0.tar.gz", hash = "sha256:fb5fbe6c12fcd99d187ec7db95db9110fb1a20505f23040a5449a717c1a0db4d"}, ] [package.dependencies] @@ -1964,13 +1973,13 @@ pyreadline3 = {version = "*", markers = "sys_platform == \"win32\" and python_ve [[package]] name = "identify" -version = "2.6.0" +version = "2.6.1" description = "File identification library for Python" optional = false python-versions = ">=3.8" files = [ - {file = "identify-2.6.0-py2.py3-none-any.whl", hash = "sha256:e79ae4406387a9d300332b5fd366d8994f1525e8414984e1a59e058b2eda2dd0"}, - {file = "identify-2.6.0.tar.gz", hash = "sha256:cb171c685bdc31bcc4c1734698736a7d5b6c8bf2e0c15117f4d469c8640ae5cf"}, + {file = "identify-2.6.1-py2.py3-none-any.whl", hash = "sha256:53863bcac7caf8d2ed85bd20312ea5dcfc22226800f6d6881f232d861db5a8f0"}, + {file = "identify-2.6.1.tar.gz", hash = "sha256:91478c5fb7c3aac5ff7bf9b4344f803843dc586832d5f110d672b19aa1984c98"}, ] [package.extras] @@ -1978,15 +1987,18 @@ license = ["ukkonen"] [[package]] name = "idna" -version = "3.8" +version = "3.10" description = "Internationalized Domain Names in Applications (IDNA)" optional = false python-versions = ">=3.6" files = [ - {file = "idna-3.8-py3-none-any.whl", hash = "sha256:050b4e5baadcd44d760cedbd2b8e639f2ff89bbc7a5730fcc662954303377aac"}, - {file = "idna-3.8.tar.gz", hash = "sha256:d838c2c0ed6fced7693d5e8ab8e734d5f8fda53a039c0164afb0b82e771e3603"}, + {file = "idna-3.10-py3-none-any.whl", hash = "sha256:946d195a0d259cbba61165e88e65941f16e9b36ea6ddb97f00452bae8b1287d3"}, + {file = "idna-3.10.tar.gz", hash = "sha256:12f65c9b470abda6dc35cf8e63cc574b1c52b11df2c86030af0ac09b01b13ea9"}, ] +[package.extras] +all = ["flake8 (>=7.1.1)", "mypy (>=1.11.2)", "pytest (>=8.3.2)", "ruff (>=0.6.2)"] + [[package]] name = "imageio" version = "2.35.1" @@ -2128,7 +2140,7 @@ i18n = ["Babel (>=2.7)"] name = "jiter" version = "0.5.0" description = "Fast iterable JSON parser." -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "jiter-0.5.0-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b599f4e89b3def9a94091e6ee52e1d7ad7bc33e238ebb9c4c63f211d74822c3f"}, @@ -2431,13 +2443,13 @@ tesseract = ["pytesseract"] [[package]] name = "litellm" -version = "1.45.0" +version = "1.46.8" description = "Library to easily interface with LLM API providers" optional = true python-versions = "!=2.7.*,!=3.0.*,!=3.1.*,!=3.2.*,!=3.3.*,!=3.4.*,!=3.5.*,!=3.6.*,!=3.7.*,>=3.8" files = [ - {file = "litellm-1.45.0-py3-none-any.whl", hash = "sha256:87d712beabc6cbf24437557186c5b266729f3139bf7dd2e54573e1e9845b905a"}, - {file = "litellm-1.45.0.tar.gz", hash = "sha256:21285496e72b9b16ff9d3d1332965c3d1a9b0ae8dbb26f27a7080af28a117174"}, + {file = "litellm-1.46.8-py3-none-any.whl", hash = "sha256:112acc854d67ced573dc5d60bbf8b493dea1e61244013685dace8c2d912aa1b3"}, + {file = "litellm-1.46.8.tar.gz", hash = "sha256:443c67d33e1a264641b80bf170cad1ba42d6fa9816f86df5eaaaf10c1e21b551"}, ] [package.dependencies] @@ -3422,13 +3434,13 @@ sympy = "*" [[package]] name = "openai" -version = "1.45.0" +version = "1.47.0" description = "The official Python library for the openai API" -optional = true +optional = false python-versions = ">=3.7.1" files = [ - {file = "openai-1.45.0-py3-none-any.whl", hash = "sha256:2f1f7b7cf90f038a9f1c24f0d26c0f1790c102ec5acd07ffd70a9b7feac1ff4e"}, - {file = "openai-1.45.0.tar.gz", hash = "sha256:731207d10637335413aa3c0955f8f8df30d7636a4a0f9c381f2209d32cf8de97"}, + {file = "openai-1.47.0-py3-none-any.whl", hash = "sha256:9ccc8737dfa791f7bd903db4758c176b8544a8cd89d3a3d2add3cea02a34c3a0"}, + {file = "openai-1.47.0.tar.gz", hash = "sha256:6e14d6f77c8cf546646afcd87a2ef752505b3710d2564a2e433e17307dfa86a0"}, ] [package.dependencies] @@ -3509,40 +3521,53 @@ files = [ [[package]] name = "pandas" -version = "2.2.2" +version = "2.2.3" description = "Powerful data structures for data analysis, time series, and statistics" optional = true python-versions = ">=3.9" files = [ - {file = "pandas-2.2.2-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:90c6fca2acf139569e74e8781709dccb6fe25940488755716d1d354d6bc58bce"}, - {file = "pandas-2.2.2-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c7adfc142dac335d8c1e0dcbd37eb8617eac386596eb9e1a1b77791cf2498238"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4abfe0be0d7221be4f12552995e58723c7422c80a659da13ca382697de830c08"}, - {file = "pandas-2.2.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8635c16bf3d99040fdf3ca3db669a7250ddf49c55dc4aa8fe0ae0fa8d6dcc1f0"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:40ae1dffb3967a52203105a077415a86044a2bea011b5f321c6aa64b379a3f51"}, - {file = "pandas-2.2.2-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:8e5a0b00e1e56a842f922e7fae8ae4077aee4af0acb5ae3622bd4b4c30aedf99"}, - {file = "pandas-2.2.2-cp310-cp310-win_amd64.whl", hash = "sha256:ddf818e4e6c7c6f4f7c8a12709696d193976b591cc7dc50588d3d1a6b5dc8772"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:696039430f7a562b74fa45f540aca068ea85fa34c244d0deee539cb6d70aa288"}, - {file = "pandas-2.2.2-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8e90497254aacacbc4ea6ae5e7a8cd75629d6ad2b30025a4a8b09aa4faf55151"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:58b84b91b0b9f4bafac2a0ac55002280c094dfc6402402332c0913a59654ab2b"}, - {file = "pandas-2.2.2-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6d2123dc9ad6a814bcdea0f099885276b31b24f7edf40f6cdbc0912672e22eee"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:2925720037f06e89af896c70bca73459d7e6a4be96f9de79e2d440bd499fe0db"}, - {file = "pandas-2.2.2-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:0cace394b6ea70c01ca1595f839cf193df35d1575986e484ad35c4aeae7266c1"}, - {file = "pandas-2.2.2-cp311-cp311-win_amd64.whl", hash = "sha256:873d13d177501a28b2756375d59816c365e42ed8417b41665f346289adc68d24"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:9dfde2a0ddef507a631dc9dc4af6a9489d5e2e740e226ad426a05cabfbd7c8ef"}, - {file = "pandas-2.2.2-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:e9b79011ff7a0f4b1d6da6a61aa1aa604fb312d6647de5bad20013682d1429ce"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1cb51fe389360f3b5a4d57dbd2848a5f033350336ca3b340d1c53a1fad33bcad"}, - {file = "pandas-2.2.2-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:eee3a87076c0756de40b05c5e9a6069c035ba43e8dd71c379e68cab2c20f16ad"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:3e374f59e440d4ab45ca2fffde54b81ac3834cf5ae2cdfa69c90bc03bde04d76"}, - {file = "pandas-2.2.2-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:43498c0bdb43d55cb162cdc8c06fac328ccb5d2eabe3cadeb3529ae6f0517c32"}, - {file = "pandas-2.2.2-cp312-cp312-win_amd64.whl", hash = "sha256:d187d355ecec3629624fccb01d104da7d7f391db0311145817525281e2804d23"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:0ca6377b8fca51815f382bd0b697a0814c8bda55115678cbc94c30aacbb6eff2"}, - {file = "pandas-2.2.2-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:9057e6aa78a584bc93a13f0a9bf7e753a5e9770a30b4d758b8d5f2a62a9433cd"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:001910ad31abc7bf06f49dcc903755d2f7f3a9186c0c040b827e522e9cef0863"}, - {file = "pandas-2.2.2-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:66b479b0bd07204e37583c191535505410daa8df638fd8e75ae1b383851fe921"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:a77e9d1c386196879aa5eb712e77461aaee433e54c68cf253053a73b7e49c33a"}, - {file = "pandas-2.2.2-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:92fd6b027924a7e178ac202cfbe25e53368db90d56872d20ffae94b96c7acc57"}, - {file = "pandas-2.2.2-cp39-cp39-win_amd64.whl", hash = "sha256:640cef9aa381b60e296db324337a554aeeb883ead99dc8f6c18e81a93942f5f4"}, - {file = "pandas-2.2.2.tar.gz", hash = "sha256:9e79019aba43cb4fda9e4d983f8e88ca0373adbb697ae9c6c43093218de28b54"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:1948ddde24197a0f7add2bdc4ca83bf2b1ef84a1bc8ccffd95eda17fd836ecb5"}, + {file = "pandas-2.2.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:381175499d3802cde0eabbaf6324cce0c4f5d52ca6f8c377c29ad442f50f6348"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:d9c45366def9a3dd85a6454c0e7908f2b3b8e9c138f5dc38fed7ce720d8453ed"}, + {file = "pandas-2.2.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:86976a1c5b25ae3f8ccae3a5306e443569ee3c3faf444dfd0f41cda24667ad57"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:b8661b0238a69d7aafe156b7fa86c44b881387509653fdf857bebc5e4008ad42"}, + {file = "pandas-2.2.3-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:37e0aced3e8f539eccf2e099f65cdb9c8aa85109b0be6e93e2baff94264bdc6f"}, + {file = "pandas-2.2.3-cp310-cp310-win_amd64.whl", hash = "sha256:56534ce0746a58afaf7942ba4863e0ef81c9c50d3f0ae93e9497d6a41a057645"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:66108071e1b935240e74525006034333f98bcdb87ea116de573a6a0dccb6c039"}, + {file = "pandas-2.2.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:7c2875855b0ff77b2a64a0365e24455d9990730d6431b9e0ee18ad8acee13dbd"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:cd8d0c3be0515c12fed0bdbae072551c8b54b7192c7b1fda0ba56059a0179698"}, + {file = "pandas-2.2.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c124333816c3a9b03fbeef3a9f230ba9a737e9e5bb4060aa2107a86cc0a497fc"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:63cc132e40a2e084cf01adf0775b15ac515ba905d7dcca47e9a251819c575ef3"}, + {file = "pandas-2.2.3-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:29401dbfa9ad77319367d36940cd8a0b3a11aba16063e39632d98b0e931ddf32"}, + {file = "pandas-2.2.3-cp311-cp311-win_amd64.whl", hash = "sha256:3fc6873a41186404dad67245896a6e440baacc92f5b716ccd1bc9ed2995ab2c5"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:b1d432e8d08679a40e2a6d8b2f9770a5c21793a6f9f47fdd52c5ce1948a5a8a9"}, + {file = "pandas-2.2.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:a5a1595fe639f5988ba6a8e5bc9649af3baf26df3998a0abe56c02609392e0a4"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:5de54125a92bb4d1c051c0659e6fcb75256bf799a732a87184e5ea503965bce3"}, + {file = "pandas-2.2.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fffb8ae78d8af97f849404f21411c95062db1496aeb3e56f146f0355c9989319"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:6dfcb5ee8d4d50c06a51c2fffa6cff6272098ad6540aed1a76d15fb9318194d8"}, + {file = "pandas-2.2.3-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:062309c1b9ea12a50e8ce661145c6aab431b1e99530d3cd60640e255778bd43a"}, + {file = "pandas-2.2.3-cp312-cp312-win_amd64.whl", hash = "sha256:59ef3764d0fe818125a5097d2ae867ca3fa64df032331b7e0917cf5d7bf66b13"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:f00d1345d84d8c86a63e476bb4955e46458b304b9575dcf71102b5c705320015"}, + {file = "pandas-2.2.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:3508d914817e153ad359d7e069d752cdd736a247c322d932eb89e6bc84217f28"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:22a9d949bfc9a502d320aa04e5d02feab689d61da4e7764b62c30b991c42c5f0"}, + {file = "pandas-2.2.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f3a255b2c19987fbbe62a9dfd6cff7ff2aa9ccab3fc75218fd4b7530f01efa24"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:800250ecdadb6d9c78eae4990da62743b857b470883fa27f652db8bdde7f6659"}, + {file = "pandas-2.2.3-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:6374c452ff3ec675a8f46fd9ab25c4ad0ba590b71cf0656f8b6daa5202bca3fb"}, + {file = "pandas-2.2.3-cp313-cp313-win_amd64.whl", hash = "sha256:61c5ad4043f791b61dd4752191d9f07f0ae412515d59ba8f005832a532f8736d"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:3b71f27954685ee685317063bf13c7709a7ba74fc996b84fc6821c59b0f06468"}, + {file = "pandas-2.2.3-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:38cf8125c40dae9d5acc10fa66af8ea6fdf760b2714ee482ca691fc66e6fcb18"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:ba96630bc17c875161df3818780af30e43be9b166ce51c9a18c1feae342906c2"}, + {file = "pandas-2.2.3-cp313-cp313t-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1db71525a1538b30142094edb9adc10be3f3e176748cd7acc2240c2f2e5aa3a4"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:15c0e1e02e93116177d29ff83e8b1619c93ddc9c49083f237d4312337a61165d"}, + {file = "pandas-2.2.3-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:ad5b65698ab28ed8d7f18790a0dc58005c7629f227be9ecc1072aa74c0c1d43a"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:bc6b93f9b966093cb0fd62ff1a7e4c09e6d546ad7c1de191767baffc57628f39"}, + {file = "pandas-2.2.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:5dbca4c1acd72e8eeef4753eeca07de9b1db4f398669d5994086f788a5d7cc30"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux2014_aarch64.manylinux_2_17_aarch64.whl", hash = "sha256:8cd6d7cc958a3910f934ea8dbdf17b2364827bb4dafc38ce6eef6bb3d65ff09c"}, + {file = "pandas-2.2.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:99df71520d25fade9db7c1076ac94eb994f4d2673ef2aa2e86ee039b6746d20c"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:31d0ced62d4ea3e231a9f228366919a5ea0b07440d9d4dac345376fd8e1477ea"}, + {file = "pandas-2.2.3-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:7eee9e7cea6adf3e3d24e304ac6b8300646e2a5d1cd3a3c2abed9101b0846761"}, + {file = "pandas-2.2.3-cp39-cp39-win_amd64.whl", hash = "sha256:4850ba03528b6dd51d6c5d273c46f183f39a9baf3f0143e566b89450965b105e"}, + {file = "pandas-2.2.3.tar.gz", hash = "sha256:4f18ba62b61d7e192368b84517265a99b4d7ee8912f8708660fb4a366cc82667"}, ] [package.dependencies] @@ -3904,13 +3929,13 @@ xmp = ["defusedxml"] [[package]] name = "platformdirs" -version = "4.3.2" +version = "4.3.6" description = "A small Python package for determining appropriate platform-specific dirs, e.g. a `user data dir`." optional = false python-versions = ">=3.8" files = [ - {file = "platformdirs-4.3.2-py3-none-any.whl", hash = "sha256:eb1c8582560b34ed4ba105009a4badf7f6f85768b30126f351328507b2beb617"}, - {file = "platformdirs-4.3.2.tar.gz", hash = "sha256:9e5e27a08aa095dd127b9f2e764d74254f482fef22b0970773bfba79d091ab8c"}, + {file = "platformdirs-4.3.6-py3-none-any.whl", hash = "sha256:73e575e1408ab8103900836b97580d5307456908a03e92031bab39e4554cc3fb"}, + {file = "platformdirs-4.3.6.tar.gz", hash = "sha256:357fb2acbc885b0419afd3ce3ed34564c13c9b95c89360cd9563f73aa5e2b907"}, ] [package.extras] @@ -3971,13 +3996,13 @@ tests = ["pytest (>=5.4.1)", "pytest-cov (>=2.8.1)", "pytest-mypy (>=0.8.0)", "p [[package]] name = "posthog" -version = "3.6.5" +version = "3.6.6" description = "Integrate PostHog into any python application." optional = true python-versions = "*" files = [ - {file = "posthog-3.6.5-py2.py3-none-any.whl", hash = "sha256:f8b7c573826b061a1d22c9495169c38ebe83a1df2729f49c7129a9c23a02acf6"}, - {file = "posthog-3.6.5.tar.gz", hash = "sha256:7fd3ca809e15476c35f75d18cd6bba31395daf0a17b75242965c469fb6292510"}, + {file = "posthog-3.6.6-py2.py3-none-any.whl", hash = "sha256:38834fd7f0732582a20d4eb4674c8d5c088e464d14d1b3f8c176e389aecaa4ef"}, + {file = "posthog-3.6.6.tar.gz", hash = "sha256:1e04783293117109189ad7048f3eedbe21caff0e39bee5e2d47a93dd790fefac"}, ] [package.dependencies] @@ -4043,22 +4068,22 @@ testing = ["google-api-core (>=1.31.5)"] [[package]] name = "protobuf" -version = "4.25.4" +version = "4.25.5" description = "" optional = false python-versions = ">=3.8" files = [ - {file = "protobuf-4.25.4-cp310-abi3-win32.whl", hash = "sha256:db9fd45183e1a67722cafa5c1da3e85c6492a5383f127c86c4c4aa4845867dc4"}, - {file = "protobuf-4.25.4-cp310-abi3-win_amd64.whl", hash = "sha256:ba3d8504116a921af46499471c63a85260c1a5fc23333154a427a310e015d26d"}, - {file = "protobuf-4.25.4-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:eecd41bfc0e4b1bd3fa7909ed93dd14dd5567b98c941d6c1ad08fdcab3d6884b"}, - {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:4c8a70fdcb995dcf6c8966cfa3a29101916f7225e9afe3ced4395359955d3835"}, - {file = "protobuf-4.25.4-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:3319e073562e2515c6ddc643eb92ce20809f5d8f10fead3332f71c63be6a7040"}, - {file = "protobuf-4.25.4-cp38-cp38-win32.whl", hash = "sha256:7e372cbbda66a63ebca18f8ffaa6948455dfecc4e9c1029312f6c2edcd86c4e1"}, - {file = "protobuf-4.25.4-cp38-cp38-win_amd64.whl", hash = "sha256:051e97ce9fa6067a4546e75cb14f90cf0232dcb3e3d508c448b8d0e4265b61c1"}, - {file = "protobuf-4.25.4-cp39-cp39-win32.whl", hash = "sha256:90bf6fd378494eb698805bbbe7afe6c5d12c8e17fca817a646cd6a1818c696ca"}, - {file = "protobuf-4.25.4-cp39-cp39-win_amd64.whl", hash = "sha256:ac79a48d6b99dfed2729ccccee547b34a1d3d63289c71cef056653a846a2240f"}, - {file = "protobuf-4.25.4-py3-none-any.whl", hash = "sha256:bfbebc1c8e4793cfd58589acfb8a1026be0003e852b9da7db5a4285bde996978"}, - {file = "protobuf-4.25.4.tar.gz", hash = "sha256:0dc4a62cc4052a036ee2204d26fe4d835c62827c855c8a03f29fe6da146b380d"}, + {file = "protobuf-4.25.5-cp310-abi3-win32.whl", hash = "sha256:5e61fd921603f58d2f5acb2806a929b4675f8874ff5f330b7d6f7e2e784bbcd8"}, + {file = "protobuf-4.25.5-cp310-abi3-win_amd64.whl", hash = "sha256:4be0571adcbe712b282a330c6e89eae24281344429ae95c6d85e79e84780f5ea"}, + {file = "protobuf-4.25.5-cp37-abi3-macosx_10_9_universal2.whl", hash = "sha256:b2fde3d805354df675ea4c7c6338c1aecd254dfc9925e88c6d31a2bcb97eb173"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_aarch64.whl", hash = "sha256:919ad92d9b0310070f8356c24b855c98df2b8bd207ebc1c0c6fcc9ab1e007f3d"}, + {file = "protobuf-4.25.5-cp37-abi3-manylinux2014_x86_64.whl", hash = "sha256:fe14e16c22be926d3abfcb500e60cab068baf10b542b8c858fa27e098123e331"}, + {file = "protobuf-4.25.5-cp38-cp38-win32.whl", hash = "sha256:98d8d8aa50de6a2747efd9cceba361c9034050ecce3e09136f90de37ddba66e1"}, + {file = "protobuf-4.25.5-cp38-cp38-win_amd64.whl", hash = "sha256:b0234dd5a03049e4ddd94b93400b67803c823cfc405689688f59b34e0742381a"}, + {file = "protobuf-4.25.5-cp39-cp39-win32.whl", hash = "sha256:abe32aad8561aa7cc94fc7ba4fdef646e576983edb94a73381b03c53728a626f"}, + {file = "protobuf-4.25.5-cp39-cp39-win_amd64.whl", hash = "sha256:7a183f592dc80aa7c8da7ad9e55091c4ffc9497b3054452d629bb85fa27c2a45"}, + {file = "protobuf-4.25.5-py3-none-any.whl", hash = "sha256:0aebecb809cae990f8129ada5ca273d9d670b76d9bfc9b1809f0a9c02b7dbf41"}, + {file = "protobuf-4.25.5.tar.gz", hash = "sha256:7f8249476b4a9473645db7f8ab42b02fe1488cbe5fb72fddd445e0665afd8584"}, ] [[package]] @@ -4253,7 +4278,7 @@ files = [ name = "pycparser" version = "2.22" description = "C parser in Python" -optional = true +optional = false python-versions = ">=3.8" files = [ {file = "pycparser-2.22-py3-none-any.whl", hash = "sha256:c3702b6d3dd8c7abc1afa565d7e63d53a1d0bd86cdc24edd75470f4de499cfcc"}, @@ -4262,19 +4287,19 @@ files = [ [[package]] name = "pydantic" -version = "2.9.1" +version = "2.9.2" description = "Data validation using Python type hints" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic-2.9.1-py3-none-any.whl", hash = "sha256:7aff4db5fdf3cf573d4b3c30926a510a10e19a0774d38fc4967f78beb6deb612"}, - {file = "pydantic-2.9.1.tar.gz", hash = "sha256:1363c7d975c7036df0db2b4a61f2e062fbc0aa5ab5f2772e0ffc7191a4f4bce2"}, + {file = "pydantic-2.9.2-py3-none-any.whl", hash = "sha256:f048cec7b26778210e28a0459867920654d48e5e62db0958433636cde4254f12"}, + {file = "pydantic-2.9.2.tar.gz", hash = "sha256:d155cef71265d1e9807ed1c32b4c8deec042a44a50a4188b25ac67ecd81a9c0f"}, ] [package.dependencies] annotated-types = ">=0.6.0" email-validator = {version = ">=2.0.0", optional = true, markers = "extra == \"email\""} -pydantic-core = "2.23.3" +pydantic-core = "2.23.4" typing-extensions = {version = ">=4.6.1", markers = "python_version < \"3.13\""} [package.extras] @@ -4283,100 +4308,100 @@ timezone = ["tzdata"] [[package]] name = "pydantic-core" -version = "2.23.3" +version = "2.23.4" description = "Core functionality for Pydantic validation and serialization" optional = false python-versions = ">=3.8" files = [ - {file = "pydantic_core-2.23.3-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:7f10a5d1b9281392f1bf507d16ac720e78285dfd635b05737c3911637601bae6"}, - {file = "pydantic_core-2.23.3-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:3c09a7885dd33ee8c65266e5aa7fb7e2f23d49d8043f089989726391dd7350c5"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6470b5a1ec4d1c2e9afe928c6cb37eb33381cab99292a708b8cb9aa89e62429b"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:9172d2088e27d9a185ea0a6c8cebe227a9139fd90295221d7d495944d2367700"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86fc6c762ca7ac8fbbdff80d61b2c59fb6b7d144aa46e2d54d9e1b7b0e780e01"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f0cb80fd5c2df4898693aa841425ea1727b1b6d2167448253077d2a49003e0ed"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:03667cec5daf43ac4995cefa8aaf58f99de036204a37b889c24a80927b629cec"}, - {file = "pydantic_core-2.23.3-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:047531242f8e9c2db733599f1c612925de095e93c9cc0e599e96cf536aaf56ba"}, - {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:5499798317fff7f25dbef9347f4451b91ac2a4330c6669821c8202fd354c7bee"}, - {file = "pydantic_core-2.23.3-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:bbb5e45eab7624440516ee3722a3044b83fff4c0372efe183fd6ba678ff681fe"}, - {file = "pydantic_core-2.23.3-cp310-none-win32.whl", hash = "sha256:8b5b3ed73abb147704a6e9f556d8c5cb078f8c095be4588e669d315e0d11893b"}, - {file = "pydantic_core-2.23.3-cp310-none-win_amd64.whl", hash = "sha256:2b603cde285322758a0279995b5796d64b63060bfbe214b50a3ca23b5cee3e83"}, - {file = "pydantic_core-2.23.3-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:c889fd87e1f1bbeb877c2ee56b63bb297de4636661cc9bbfcf4b34e5e925bc27"}, - {file = "pydantic_core-2.23.3-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:ea85bda3189fb27503af4c45273735bcde3dd31c1ab17d11f37b04877859ef45"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a7f7f72f721223f33d3dc98a791666ebc6a91fa023ce63733709f4894a7dc611"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:2b2b55b0448e9da68f56b696f313949cda1039e8ec7b5d294285335b53104b61"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:c24574c7e92e2c56379706b9a3f07c1e0c7f2f87a41b6ee86653100c4ce343e5"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:f2b05e6ccbee333a8f4b8f4d7c244fdb7a979e90977ad9c51ea31261e2085ce0"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e2c409ce1c219c091e47cb03feb3c4ed8c2b8e004efc940da0166aaee8f9d6c8"}, - {file = "pydantic_core-2.23.3-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:d965e8b325f443ed3196db890d85dfebbb09f7384486a77461347f4adb1fa7f8"}, - {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:f56af3a420fb1ffaf43ece3ea09c2d27c444e7c40dcb7c6e7cf57aae764f2b48"}, - {file = "pydantic_core-2.23.3-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:5b01a078dd4f9a52494370af21aa52964e0a96d4862ac64ff7cea06e0f12d2c5"}, - {file = "pydantic_core-2.23.3-cp311-none-win32.whl", hash = "sha256:560e32f0df04ac69b3dd818f71339983f6d1f70eb99d4d1f8e9705fb6c34a5c1"}, - {file = "pydantic_core-2.23.3-cp311-none-win_amd64.whl", hash = "sha256:c744fa100fdea0d000d8bcddee95213d2de2e95b9c12be083370b2072333a0fa"}, - {file = "pydantic_core-2.23.3-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:e0ec50663feedf64d21bad0809f5857bac1ce91deded203efc4a84b31b2e4305"}, - {file = "pydantic_core-2.23.3-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:db6e6afcb95edbe6b357786684b71008499836e91f2a4a1e55b840955b341dbb"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:98ccd69edcf49f0875d86942f4418a4e83eb3047f20eb897bffa62a5d419c8fa"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:a678c1ac5c5ec5685af0133262103defb427114e62eafeda12f1357a12140162"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:01491d8b4d8db9f3391d93b0df60701e644ff0894352947f31fff3e52bd5c801"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fcf31facf2796a2d3b7fe338fe8640aa0166e4e55b4cb108dbfd1058049bf4cb"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:7200fd561fb3be06827340da066df4311d0b6b8eb0c2116a110be5245dceb326"}, - {file = "pydantic_core-2.23.3-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:dc1636770a809dee2bd44dd74b89cc80eb41172bcad8af75dd0bc182c2666d4c"}, - {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:67a5def279309f2e23014b608c4150b0c2d323bd7bccd27ff07b001c12c2415c"}, - {file = "pydantic_core-2.23.3-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:748bdf985014c6dd3e1e4cc3db90f1c3ecc7246ff5a3cd4ddab20c768b2f1dab"}, - {file = "pydantic_core-2.23.3-cp312-none-win32.whl", hash = "sha256:255ec6dcb899c115f1e2a64bc9ebc24cc0e3ab097775755244f77360d1f3c06c"}, - {file = "pydantic_core-2.23.3-cp312-none-win_amd64.whl", hash = "sha256:40b8441be16c1e940abebed83cd006ddb9e3737a279e339dbd6d31578b802f7b"}, - {file = "pydantic_core-2.23.3-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:6daaf5b1ba1369a22c8b050b643250e3e5efc6a78366d323294aee54953a4d5f"}, - {file = "pydantic_core-2.23.3-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:d015e63b985a78a3d4ccffd3bdf22b7c20b3bbd4b8227809b3e8e75bc37f9cb2"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a3fc572d9b5b5cfe13f8e8a6e26271d5d13f80173724b738557a8c7f3a8a3791"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:f6bd91345b5163ee7448bee201ed7dd601ca24f43f439109b0212e296eb5b423"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fc379c73fd66606628b866f661e8785088afe2adaba78e6bbe80796baf708a63"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:fbdce4b47592f9e296e19ac31667daed8753c8367ebb34b9a9bd89dacaa299c9"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fc3cf31edf405a161a0adad83246568647c54404739b614b1ff43dad2b02e6d5"}, - {file = "pydantic_core-2.23.3-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:8e22b477bf90db71c156f89a55bfe4d25177b81fce4aa09294d9e805eec13855"}, - {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:0a0137ddf462575d9bce863c4c95bac3493ba8e22f8c28ca94634b4a1d3e2bb4"}, - {file = "pydantic_core-2.23.3-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:203171e48946c3164fe7691fc349c79241ff8f28306abd4cad5f4f75ed80bc8d"}, - {file = "pydantic_core-2.23.3-cp313-none-win32.whl", hash = "sha256:76bdab0de4acb3f119c2a4bff740e0c7dc2e6de7692774620f7452ce11ca76c8"}, - {file = "pydantic_core-2.23.3-cp313-none-win_amd64.whl", hash = "sha256:37ba321ac2a46100c578a92e9a6aa33afe9ec99ffa084424291d84e456f490c1"}, - {file = "pydantic_core-2.23.3-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d063c6b9fed7d992bcbebfc9133f4c24b7a7f215d6b102f3e082b1117cddb72c"}, - {file = "pydantic_core-2.23.3-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:6cb968da9a0746a0cf521b2b5ef25fc5a0bee9b9a1a8214e0a1cfaea5be7e8a4"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:edbefe079a520c5984e30e1f1f29325054b59534729c25b874a16a5048028d16"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:cbaaf2ef20d282659093913da9d402108203f7cb5955020bd8d1ae5a2325d1c4"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:fb539d7e5dc4aac345846f290cf504d2fd3c1be26ac4e8b5e4c2b688069ff4cf"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:7e6f33503c5495059148cc486867e1d24ca35df5fc064686e631e314d959ad5b"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:04b07490bc2f6f2717b10c3969e1b830f5720b632f8ae2f3b8b1542394c47a8e"}, - {file = "pydantic_core-2.23.3-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:03795b9e8a5d7fda05f3873efc3f59105e2dcff14231680296b87b80bb327295"}, - {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:c483dab0f14b8d3f0df0c6c18d70b21b086f74c87ab03c59250dbf6d3c89baba"}, - {file = "pydantic_core-2.23.3-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:8b2682038e255e94baf2c473dca914a7460069171ff5cdd4080be18ab8a7fd6e"}, - {file = "pydantic_core-2.23.3-cp38-none-win32.whl", hash = "sha256:f4a57db8966b3a1d1a350012839c6a0099f0898c56512dfade8a1fe5fb278710"}, - {file = "pydantic_core-2.23.3-cp38-none-win_amd64.whl", hash = "sha256:13dd45ba2561603681a2676ca56006d6dee94493f03d5cadc055d2055615c3ea"}, - {file = "pydantic_core-2.23.3-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:82da2f4703894134a9f000e24965df73cc103e31e8c31906cc1ee89fde72cbd8"}, - {file = "pydantic_core-2.23.3-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:dd9be0a42de08f4b58a3cc73a123f124f65c24698b95a54c1543065baca8cf0e"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:89b731f25c80830c76fdb13705c68fef6a2b6dc494402987c7ea9584fe189f5d"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:c6de1ec30c4bb94f3a69c9f5f2182baeda5b809f806676675e9ef6b8dc936f28"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:bb68b41c3fa64587412b104294b9cbb027509dc2f6958446c502638d481525ef"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:1c3980f2843de5184656aab58698011b42763ccba11c4a8c35936c8dd6c7068c"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:94f85614f2cba13f62c3c6481716e4adeae48e1eaa7e8bac379b9d177d93947a"}, - {file = "pydantic_core-2.23.3-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:510b7fb0a86dc8f10a8bb43bd2f97beb63cffad1203071dc434dac26453955cd"}, - {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:1eba2f7ce3e30ee2170410e2171867ea73dbd692433b81a93758ab2de6c64835"}, - {file = "pydantic_core-2.23.3-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:4b259fd8409ab84b4041b7b3f24dcc41e4696f180b775961ca8142b5b21d0e70"}, - {file = "pydantic_core-2.23.3-cp39-none-win32.whl", hash = "sha256:40d9bd259538dba2f40963286009bf7caf18b5112b19d2b55b09c14dde6db6a7"}, - {file = "pydantic_core-2.23.3-cp39-none-win_amd64.whl", hash = "sha256:5a8cd3074a98ee70173a8633ad3c10e00dcb991ecec57263aacb4095c5efb958"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f399e8657c67313476a121a6944311fab377085ca7f490648c9af97fc732732d"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:6b5547d098c76e1694ba85f05b595720d7c60d342f24d5aad32c3049131fa5c4"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0dda0290a6f608504882d9f7650975b4651ff91c85673341789a476b1159f211"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:65b6e5da855e9c55a0c67f4db8a492bf13d8d3316a59999cfbaf98cc6e401961"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:09e926397f392059ce0afdcac920df29d9c833256354d0c55f1584b0b70cf07e"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:87cfa0ed6b8c5bd6ae8b66de941cece179281239d482f363814d2b986b79cedc"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:e61328920154b6a44d98cabcb709f10e8b74276bc709c9a513a8c37a18786cc4"}, - {file = "pydantic_core-2.23.3-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:ce3317d155628301d649fe5e16a99528d5680af4ec7aa70b90b8dacd2d725c9b"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:e89513f014c6be0d17b00a9a7c81b1c426f4eb9224b15433f3d98c1a071f8433"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:4f62c1c953d7ee375df5eb2e44ad50ce2f5aff931723b398b8bc6f0ac159791a"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2718443bc671c7ac331de4eef9b673063b10af32a0bb385019ad61dcf2cc8f6c"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a0d90e08b2727c5d01af1b5ef4121d2f0c99fbee692c762f4d9d0409c9da6541"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:2b676583fc459c64146debea14ba3af54e540b61762dfc0613dc4e98c3f66eeb"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:50e4661f3337977740fdbfbae084ae5693e505ca2b3130a6d4eb0f2281dc43b8"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:68f4cf373f0de6abfe599a38307f4417c1c867ca381c03df27c873a9069cda25"}, - {file = "pydantic_core-2.23.3-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:59d52cf01854cb26c46958552a21acb10dd78a52aa34c86f284e66b209db8cab"}, - {file = "pydantic_core-2.23.3.tar.gz", hash = "sha256:3cb0f65d8b4121c1b015c60104a685feb929a29d7cf204387c7f2688c7974690"}, + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_10_12_x86_64.whl", hash = "sha256:b10bd51f823d891193d4717448fab065733958bdb6a6b351967bd349d48d5c9b"}, + {file = "pydantic_core-2.23.4-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4fc714bdbfb534f94034efaa6eadd74e5b93c8fa6315565a222f7b6f42ca1166"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:63e46b3169866bd62849936de036f901a9356e36376079b05efa83caeaa02ceb"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ed1a53de42fbe34853ba90513cea21673481cd81ed1be739f7f2efb931b24916"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:cfdd16ab5e59fc31b5e906d1a3f666571abc367598e3e02c83403acabc092e07"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:255a8ef062cbf6674450e668482456abac99a5583bbafb73f9ad469540a3a232"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:4a7cd62e831afe623fbb7aabbb4fe583212115b3ef38a9f6b71869ba644624a2"}, + {file = "pydantic_core-2.23.4-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f09e2ff1f17c2b51f2bc76d1cc33da96298f0a036a137f5440ab3ec5360b624f"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_aarch64.whl", hash = "sha256:e38e63e6f3d1cec5a27e0afe90a085af8b6806ee208b33030e65b6516353f1a3"}, + {file = "pydantic_core-2.23.4-cp310-cp310-musllinux_1_1_x86_64.whl", hash = "sha256:0dbd8dbed2085ed23b5c04afa29d8fd2771674223135dc9bc937f3c09284d071"}, + {file = "pydantic_core-2.23.4-cp310-none-win32.whl", hash = "sha256:6531b7ca5f951d663c339002e91aaebda765ec7d61b7d1e3991051906ddde119"}, + {file = "pydantic_core-2.23.4-cp310-none-win_amd64.whl", hash = "sha256:7c9129eb40958b3d4500fa2467e6a83356b3b61bfff1b414c7361d9220f9ae8f"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:77733e3892bb0a7fa797826361ce8a9184d25c8dffaec60b7ffe928153680ba8"}, + {file = "pydantic_core-2.23.4-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:1b84d168f6c48fabd1f2027a3d1bdfe62f92cade1fb273a5d68e621da0e44e6d"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:df49e7a0861a8c36d089c1ed57d308623d60416dab2647a4a17fe050ba85de0e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:ff02b6d461a6de369f07ec15e465a88895f3223eb75073ffea56b84d9331f607"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:996a38a83508c54c78a5f41456b0103c30508fed9abcad0a59b876d7398f25fd"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:d97683ddee4723ae8c95d1eddac7c192e8c552da0c73a925a89fa8649bf13eea"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:216f9b2d7713eb98cb83c80b9c794de1f6b7e3145eef40400c62e86cee5f4e1e"}, + {file = "pydantic_core-2.23.4-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:6f783e0ec4803c787bcea93e13e9932edab72068f68ecffdf86a99fd5918878b"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_aarch64.whl", hash = "sha256:d0776dea117cf5272382634bd2a5c1b6eb16767c223c6a5317cd3e2a757c61a0"}, + {file = "pydantic_core-2.23.4-cp311-cp311-musllinux_1_1_x86_64.whl", hash = "sha256:d5f7a395a8cf1621939692dba2a6b6a830efa6b3cee787d82c7de1ad2930de64"}, + {file = "pydantic_core-2.23.4-cp311-none-win32.whl", hash = "sha256:74b9127ffea03643e998e0c5ad9bd3811d3dac8c676e47db17b0ee7c3c3bf35f"}, + {file = "pydantic_core-2.23.4-cp311-none-win_amd64.whl", hash = "sha256:98d134c954828488b153d88ba1f34e14259284f256180ce659e8d83e9c05eaa3"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_10_12_x86_64.whl", hash = "sha256:f3e0da4ebaef65158d4dfd7d3678aad692f7666877df0002b8a522cdf088f231"}, + {file = "pydantic_core-2.23.4-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:f69a8e0b033b747bb3e36a44e7732f0c99f7edd5cea723d45bc0d6e95377ffee"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:723314c1d51722ab28bfcd5240d858512ffd3116449c557a1336cbe3919beb87"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:bb2802e667b7051a1bebbfe93684841cc9351004e2badbd6411bf357ab8d5ac8"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:d18ca8148bebe1b0a382a27a8ee60350091a6ddaf475fa05ef50dc35b5df6327"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:33e3d65a85a2a4a0dc3b092b938a4062b1a05f3a9abde65ea93b233bca0e03f2"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:128585782e5bfa515c590ccee4b727fb76925dd04a98864182b22e89a4e6ed36"}, + {file = "pydantic_core-2.23.4-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:68665f4c17edcceecc112dfed5dbe6f92261fb9d6054b47d01bf6371a6196126"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_aarch64.whl", hash = "sha256:20152074317d9bed6b7a95ade3b7d6054845d70584216160860425f4fbd5ee9e"}, + {file = "pydantic_core-2.23.4-cp312-cp312-musllinux_1_1_x86_64.whl", hash = "sha256:9261d3ce84fa1d38ed649c3638feefeae23d32ba9182963e465d58d62203bd24"}, + {file = "pydantic_core-2.23.4-cp312-none-win32.whl", hash = "sha256:4ba762ed58e8d68657fc1281e9bb72e1c3e79cc5d464be146e260c541ec12d84"}, + {file = "pydantic_core-2.23.4-cp312-none-win_amd64.whl", hash = "sha256:97df63000f4fea395b2824da80e169731088656d1818a11b95f3b173747b6cd9"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_10_12_x86_64.whl", hash = "sha256:7530e201d10d7d14abce4fb54cfe5b94a0aefc87da539d0346a484ead376c3cc"}, + {file = "pydantic_core-2.23.4-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:df933278128ea1cd77772673c73954e53a1c95a4fdf41eef97c2b779271bd0bd"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0cb3da3fd1b6a5d0279a01877713dbda118a2a4fc6f0d821a57da2e464793f05"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:42c6dcb030aefb668a2b7009c85b27f90e51e6a3b4d5c9bc4c57631292015b0d"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:696dd8d674d6ce621ab9d45b205df149399e4bb9aa34102c970b721554828510"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:2971bb5ffe72cc0f555c13e19b23c85b654dd2a8f7ab493c262071377bfce9f6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8394d940e5d400d04cad4f75c0598665cbb81aecefaca82ca85bd28264af7f9b"}, + {file = "pydantic_core-2.23.4-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:0dff76e0602ca7d4cdaacc1ac4c005e0ce0dcfe095d5b5259163a80d3a10d327"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_aarch64.whl", hash = "sha256:7d32706badfe136888bdea71c0def994644e09fff0bfe47441deaed8e96fdbc6"}, + {file = "pydantic_core-2.23.4-cp313-cp313-musllinux_1_1_x86_64.whl", hash = "sha256:ed541d70698978a20eb63d8c5d72f2cc6d7079d9d90f6b50bad07826f1320f5f"}, + {file = "pydantic_core-2.23.4-cp313-none-win32.whl", hash = "sha256:3d5639516376dce1940ea36edf408c554475369f5da2abd45d44621cb616f769"}, + {file = "pydantic_core-2.23.4-cp313-none-win_amd64.whl", hash = "sha256:5a1504ad17ba4210df3a045132a7baeeba5a200e930f57512ee02909fc5c4cb5"}, + {file = "pydantic_core-2.23.4-cp38-cp38-macosx_10_12_x86_64.whl", hash = "sha256:d4488a93b071c04dc20f5cecc3631fc78b9789dd72483ba15d423b5b3689b555"}, + {file = "pydantic_core-2.23.4-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:81965a16b675b35e1d09dd14df53f190f9129c0202356ed44ab2728b1c905658"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4ffa2ebd4c8530079140dd2d7f794a9d9a73cbb8e9d59ffe24c63436efa8f271"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:61817945f2fe7d166e75fbfb28004034b48e44878177fc54d81688e7b85a3665"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:29d2c342c4bc01b88402d60189f3df065fb0dda3654744d5a165a5288a657368"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:5e11661ce0fd30a6790e8bcdf263b9ec5988e95e63cf901972107efc49218b13"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d18368b137c6295db49ce7218b1a9ba15c5bc254c96d7c9f9e924a9bc7825ad"}, + {file = "pydantic_core-2.23.4-cp38-cp38-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:ec4e55f79b1c4ffb2eecd8a0cfba9955a2588497d96851f4c8f99aa4a1d39b12"}, + {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_aarch64.whl", hash = "sha256:374a5e5049eda9e0a44c696c7ade3ff355f06b1fe0bb945ea3cac2bc336478a2"}, + {file = "pydantic_core-2.23.4-cp38-cp38-musllinux_1_1_x86_64.whl", hash = "sha256:5c364564d17da23db1106787675fc7af45f2f7b58b4173bfdd105564e132e6fb"}, + {file = "pydantic_core-2.23.4-cp38-none-win32.whl", hash = "sha256:d7a80d21d613eec45e3d41eb22f8f94ddc758a6c4720842dc74c0581f54993d6"}, + {file = "pydantic_core-2.23.4-cp38-none-win_amd64.whl", hash = "sha256:5f5ff8d839f4566a474a969508fe1c5e59c31c80d9e140566f9a37bba7b8d556"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_10_12_x86_64.whl", hash = "sha256:a4fa4fc04dff799089689f4fd502ce7d59de529fc2f40a2c8836886c03e0175a"}, + {file = "pydantic_core-2.23.4-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:0a7df63886be5e270da67e0966cf4afbae86069501d35c8c1b3b6c168f42cb36"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dcedcd19a557e182628afa1d553c3895a9f825b936415d0dbd3cd0bbcfd29b4b"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_armv7l.manylinux2014_armv7l.whl", hash = "sha256:5f54b118ce5de9ac21c363d9b3caa6c800341e8c47a508787e5868c6b79c9323"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_ppc64le.manylinux2014_ppc64le.whl", hash = "sha256:86d2f57d3e1379a9525c5ab067b27dbb8a0642fb5d454e17a9ac434f9ce523e3"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_s390x.manylinux2014_s390x.whl", hash = "sha256:de6d1d1b9e5101508cb37ab0d972357cac5235f5c6533d1071964c47139257df"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:1278e0d324f6908e872730c9102b0112477a7f7cf88b308e4fc36ce1bdb6d58c"}, + {file = "pydantic_core-2.23.4-cp39-cp39-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:9a6b5099eeec78827553827f4c6b8615978bb4b6a88e5d9b93eddf8bb6790f55"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_aarch64.whl", hash = "sha256:e55541f756f9b3ee346b840103f32779c695a19826a4c442b7954550a0972040"}, + {file = "pydantic_core-2.23.4-cp39-cp39-musllinux_1_1_x86_64.whl", hash = "sha256:a5c7ba8ffb6d6f8f2ab08743be203654bb1aaa8c9dcb09f82ddd34eadb695605"}, + {file = "pydantic_core-2.23.4-cp39-none-win32.whl", hash = "sha256:37b0fe330e4a58d3c58b24d91d1eb102aeec675a3db4c292ec3928ecd892a9a6"}, + {file = "pydantic_core-2.23.4-cp39-none-win_amd64.whl", hash = "sha256:1498bec4c05c9c787bde9125cfdcc63a41004ff167f495063191b863399b1a29"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f455ee30a9d61d3e1a15abd5068827773d6e4dc513e795f380cdd59932c782d5"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:1e90d2e3bd2c3863d48525d297cd143fe541be8bbf6f579504b9712cb6b643ec"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:2e203fdf807ac7e12ab59ca2bfcabb38c7cf0b33c41efeb00f8e5da1d86af480"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e08277a400de01bc72436a0ccd02bdf596631411f592ad985dcee21445bd0068"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:f220b0eea5965dec25480b6333c788fb72ce5f9129e8759ef876a1d805d00801"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:d06b0c8da4f16d1d1e352134427cb194a0a6e19ad5db9161bf32b2113409e728"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:ba1a0996f6c2773bd83e63f18914c1de3c9dd26d55f4ac302a7efe93fb8e7433"}, + {file = "pydantic_core-2.23.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:9a5bce9d23aac8f0cf0836ecfc033896aa8443b501c58d0602dbfd5bd5b37753"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:78ddaaa81421a29574a682b3179d4cf9e6d405a09b99d93ddcf7e5239c742e21"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-macosx_11_0_arm64.whl", hash = "sha256:883a91b5dd7d26492ff2f04f40fbb652de40fcc0afe07e8129e8ae779c2110eb"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:88ad334a15b32a791ea935af224b9de1bf99bcd62fabf745d5f3442199d86d59"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:233710f069d251feb12a56da21e14cca67994eab08362207785cf8c598e74577"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-manylinux_2_5_i686.manylinux1_i686.whl", hash = "sha256:19442362866a753485ba5e4be408964644dd6a09123d9416c54cd49171f50744"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_aarch64.whl", hash = "sha256:624e278a7d29b6445e4e813af92af37820fafb6dcc55c012c834f9e26f9aaaef"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-musllinux_1_1_x86_64.whl", hash = "sha256:f5ef8f42bec47f21d07668a043f077d507e5bf4e668d5c6dfe6aaba89de1a5b8"}, + {file = "pydantic_core-2.23.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:aea443fffa9fbe3af1a9ba721a87f926fe548d32cab71d188a6ede77d0ff244e"}, + {file = "pydantic_core-2.23.4.tar.gz", hash = "sha256:2584f7cf844ac4d970fba483a717dbe10c1c1c96a969bf65d61ffe94df1b2863"}, ] [package.dependencies] @@ -4480,15 +4505,18 @@ files = [ [[package]] name = "pyreadline3" -version = "3.4.3" +version = "3.5.4" description = "A python implementation of GNU readline." optional = true -python-versions = "*" +python-versions = ">=3.8" files = [ - {file = "pyreadline3-3.4.3-py3-none-any.whl", hash = "sha256:f832c5898f4f9a0f81d48a8c499b39d0179de1a465ea3def1a7e7231840b4ed6"}, - {file = "pyreadline3-3.4.3.tar.gz", hash = "sha256:ebab0baca37f50e2faa1dd99a6da1c75de60e0d68a3b229c134bbd12786250e2"}, + {file = "pyreadline3-3.5.4-py3-none-any.whl", hash = "sha256:eaf8e6cc3c49bcccf145fc6067ba8643d1df34d604a1ec0eccbf7a18e6d3fae6"}, + {file = "pyreadline3-3.5.4.tar.gz", hash = "sha256:8d57d53039a1c75adba8e50dd3d992b28143480816187ea5efbd5c78e6c885b7"}, ] +[package.extras] +dev = ["build", "flake8", "mypy", "pytest", "twine"] + [[package]] name = "pytest" version = "8.3.3" @@ -5407,18 +5435,18 @@ test = ["Cython", "array-api-strict (>=2.0)", "asv", "gmpy2", "hypothesis (>=6.3 [[package]] name = "setuptools" -version = "74.1.2" +version = "75.1.0" description = "Easily download, build, install, upgrade, and uninstall Python packages" optional = false python-versions = ">=3.8" files = [ - {file = "setuptools-74.1.2-py3-none-any.whl", hash = "sha256:5f4c08aa4d3ebcb57a50c33b1b07e94315d7fc7230f7115e47fc99776c8ce308"}, - {file = "setuptools-74.1.2.tar.gz", hash = "sha256:95b40ed940a1c67eb70fc099094bd6e99c6ee7c23aa2306f4d2697ba7916f9c6"}, + {file = "setuptools-75.1.0-py3-none-any.whl", hash = "sha256:35ab7fd3bcd95e6b7fd704e4a1539513edad446c097797f2985e0e4b960772f2"}, + {file = "setuptools-75.1.0.tar.gz", hash = "sha256:d59a21b17a275fb872a9c3dae73963160ae079f1049ed956880cd7c09b120538"}, ] [package.extras] check = ["pytest-checkdocs (>=2.4)", "pytest-ruff (>=0.2.1)", "ruff (>=0.5.2)"] -core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.text (>=3.7)", "more-itertools (>=8.8)", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] +core = ["importlib-metadata (>=6)", "importlib-resources (>=5.10.2)", "jaraco.collections", "jaraco.functools", "jaraco.text (>=3.7)", "more-itertools", "more-itertools (>=8.8)", "packaging", "packaging (>=24)", "platformdirs (>=2.6.2)", "tomli (>=2.0.1)", "wheel (>=0.43.0)"] cover = ["pytest-cov"] doc = ["furo", "jaraco.packaging (>=9.3)", "jaraco.tidelift (>=1.4)", "pygments-github-lexers (==0.0.5)", "pyproject-hooks (!=1.1)", "rst.linker (>=1.9)", "sphinx (>=3.5)", "sphinx-favicon", "sphinx-inline-tabs", "sphinx-lint", "sphinx-notfound-page (>=1,<2)", "sphinx-reredirects", "sphinxcontrib-towncrier", "towncrier (<24.7)"] enabler = ["pytest-enabler (>=2.2)"] @@ -5460,48 +5488,60 @@ files = [ [[package]] name = "sqlalchemy" -version = "2.0.34" +version = "2.0.35" description = "Database Abstraction Library" optional = true python-versions = ">=3.7" files = [ - {file = "SQLAlchemy-2.0.34-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:95d0b2cf8791ab5fb9e3aa3d9a79a0d5d51f55b6357eecf532a120ba3b5524db"}, - {file = "SQLAlchemy-2.0.34-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:243f92596f4fd4c8bd30ab8e8dd5965afe226363d75cab2468f2c707f64cd83b"}, - {file = "SQLAlchemy-2.0.34-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:173f5f122d2e1bff8fbd9f7811b7942bead1f5e9f371cdf9e670b327e6703ebd"}, - {file = "SQLAlchemy-2.0.34-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:bd90c221ed4e60ac9d476db967f436cfcecbd4ef744537c0f2d5291439848768"}, - {file = "SQLAlchemy-2.0.34-cp310-cp310-win32.whl", hash = "sha256:3166dfff2d16fe9be3241ee60ece6fcb01cf8e74dd7c5e0b64f8e19fab44911b"}, - {file = "SQLAlchemy-2.0.34-cp310-cp310-win_amd64.whl", hash = "sha256:6831a78bbd3c40f909b3e5233f87341f12d0b34a58f14115c9e94b4cdaf726d3"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:c7db3db284a0edaebe87f8f6642c2b2c27ed85c3e70064b84d1c9e4ec06d5d84"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:430093fce0efc7941d911d34f75a70084f12f6ca5c15d19595c18753edb7c33b"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:fb1b30f31a36c7f3fee848391ff77eebdd3af5750bf95fbf9b8b5323edfdb4ec"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:80bd73ea335203b125cf1d8e50fef06be709619eb6ab9e7b891ea34b5baa2287"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-win32.whl", hash = "sha256:6daeb8382d0df526372abd9cb795c992e18eed25ef2c43afe518c73f8cccb721"}, - {file = "SQLAlchemy-2.0.34-cp311-cp311-win_amd64.whl", hash = "sha256:5bc08e75ed11693ecb648b7a0a4ed80da6d10845e44be0c98c03f2f880b68ff4"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:53e68b091492c8ed2bd0141e00ad3089bcc6bf0e6ec4142ad6505b4afe64163e"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:bcd18441a49499bf5528deaa9dee1f5c01ca491fc2791b13604e8f972877f812"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:c3330415cd387d2b88600e8e26b510d0370db9b7eaf984354a43e19c40df2e2b"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:7cee4c6917857fd6121ed84f56d1dc78eb1d0e87f845ab5a568aba73e78adf83"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-win32.whl", hash = "sha256:fbb034f565ecbe6c530dff948239377ba859420d146d5f62f0271407ffb8c580"}, - {file = "SQLAlchemy-2.0.34-cp312-cp312-win_amd64.whl", hash = "sha256:707c8f44931a4facd4149b52b75b80544a8d824162602b8cd2fe788207307f9a"}, - {file = "SQLAlchemy-2.0.34-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:24af3dc43568f3780b7e1e57c49b41d98b2d940c1fd2e62d65d3928b6f95f021"}, - {file = "SQLAlchemy-2.0.34-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:413c85cd0177c23e32dee6898c67a5f49296640041d98fddb2c40888fe4daa2e"}, - {file = "SQLAlchemy-2.0.34-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:526ce723265643dbc4c7efb54f56648cc30e7abe20f387d763364b3ce7506c82"}, - {file = "SQLAlchemy-2.0.34-cp37-cp37m-win32.whl", hash = "sha256:13be2cc683b76977a700948411a94c67ad8faf542fa7da2a4b167f2244781cf3"}, - {file = "SQLAlchemy-2.0.34-cp37-cp37m-win_amd64.whl", hash = "sha256:e54ef33ea80d464c3dcfe881eb00ad5921b60f8115ea1a30d781653edc2fd6a2"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:43f28005141165edd11fbbf1541c920bd29e167b8bbc1fb410d4fe2269c1667a"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:b68094b165a9e930aedef90725a8fcfafe9ef95370cbb54abc0464062dbf808f"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:203d46bddeaa7982f9c3cc693e5bc93db476ab5de9d4b4640d5c99ff219bee8c"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:9661268415f450c95f72f0ac1217cc6f10256f860eed85c2ae32e75b60278ad8"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-win32.whl", hash = "sha256:895184dfef8708e15f7516bd930bda7e50ead069280d2ce09ba11781b630a434"}, - {file = "SQLAlchemy-2.0.34-cp38-cp38-win_amd64.whl", hash = "sha256:6e7cde3a2221aa89247944cafb1b26616380e30c63e37ed19ff0bba5e968688d"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:dbcdf987f3aceef9763b6d7b1fd3e4ee210ddd26cac421d78b3c206d07b2700b"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:ce119fc4ce0d64124d37f66a6f2a584fddc3c5001755f8a49f1ca0a177ef9796"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ebc11c54c6ecdd07bb4efbfa1554538982f5432dfb8456958b6d46b9f834bb7"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:220574e78ad986aea8e81ac68821e47ea9202b7e44f251b7ed8c66d9ae3f4278"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-win32.whl", hash = "sha256:b75b00083e7fe6621ce13cfce9d4469c4774e55e8e9d38c305b37f13cf1e874c"}, - {file = "SQLAlchemy-2.0.34-cp39-cp39-win_amd64.whl", hash = "sha256:c29d03e0adf3cc1a8c3ec62d176824972ae29b67a66cbb18daff3062acc6faa8"}, - {file = "SQLAlchemy-2.0.34-py3-none-any.whl", hash = "sha256:7286c353ee6475613d8beff83167374006c6b3e3f0e6491bfe8ca610eb1dec0f"}, - {file = "sqlalchemy-2.0.34.tar.gz", hash = "sha256:10d8f36990dd929690666679b0f42235c159a7051534adb135728ee52828dd22"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:67219632be22f14750f0d1c70e62f204ba69d28f62fd6432ba05ab295853de9b"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:4668bd8faf7e5b71c0319407b608f278f279668f358857dbfd10ef1954ac9f90"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:cb8bea573863762bbf45d1e13f87c2d2fd32cee2dbd50d050f83f87429c9e1ea"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f552023710d4b93d8fb29a91fadf97de89c5926c6bd758897875435f2a939f33"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:016b2e665f778f13d3c438651dd4de244214b527a275e0acf1d44c05bc6026a9"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:7befc148de64b6060937231cbff8d01ccf0bfd75aa26383ffdf8d82b12ec04ff"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-win32.whl", hash = "sha256:22b83aed390e3099584b839b93f80a0f4a95ee7f48270c97c90acd40ee646f0b"}, + {file = "SQLAlchemy-2.0.35-cp310-cp310-win_amd64.whl", hash = "sha256:a29762cd3d116585278ffb2e5b8cc311fb095ea278b96feef28d0b423154858e"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:e21f66748ab725ade40fa7af8ec8b5019c68ab00b929f6643e1b1af461eddb60"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:8a6219108a15fc6d24de499d0d515c7235c617b2540d97116b663dade1a54d62"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:042622a5306c23b972192283f4e22372da3b8ddf5f7aac1cc5d9c9b222ab3ff6"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:627dee0c280eea91aed87b20a1f849e9ae2fe719d52cbf847c0e0ea34464b3f7"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:4fdcd72a789c1c31ed242fd8c1bcd9ea186a98ee8e5408a50e610edfef980d71"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:89b64cd8898a3a6f642db4eb7b26d1b28a497d4022eccd7717ca066823e9fb01"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-win32.whl", hash = "sha256:6a93c5a0dfe8d34951e8a6f499a9479ffb9258123551fa007fc708ae2ac2bc5e"}, + {file = "SQLAlchemy-2.0.35-cp311-cp311-win_amd64.whl", hash = "sha256:c68fe3fcde03920c46697585620135b4ecfdfc1ed23e75cc2c2ae9f8502c10b8"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-macosx_10_9_x86_64.whl", hash = "sha256:eb60b026d8ad0c97917cb81d3662d0b39b8ff1335e3fabb24984c6acd0c900a2"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6921ee01caf375363be5e9ae70d08ce7ca9d7e0e8983183080211a062d299468"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:8cdf1a0dbe5ced887a9b127da4ffd7354e9c1a3b9bb330dce84df6b70ccb3a8d"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:93a71c8601e823236ac0e5d087e4f397874a421017b3318fd92c0b14acf2b6db"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:e04b622bb8a88f10e439084486f2f6349bf4d50605ac3e445869c7ea5cf0fa8c"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:1b56961e2d31389aaadf4906d453859f35302b4eb818d34a26fab72596076bb8"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-win32.whl", hash = "sha256:0f9f3f9a3763b9c4deb8c5d09c4cc52ffe49f9876af41cc1b2ad0138878453cf"}, + {file = "SQLAlchemy-2.0.35-cp312-cp312-win_amd64.whl", hash = "sha256:25b0f63e7fcc2a6290cb5f7f5b4fc4047843504983a28856ce9b35d8f7de03cc"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-macosx_10_9_x86_64.whl", hash = "sha256:f021d334f2ca692523aaf7bbf7592ceff70c8594fad853416a81d66b35e3abf9"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:05c3f58cf91683102f2f0265c0db3bd3892e9eedabe059720492dbaa4f922da1"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:032d979ce77a6c2432653322ba4cbeabf5a6837f704d16fa38b5a05d8e21fa00"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-musllinux_1_2_aarch64.whl", hash = "sha256:2e795c2f7d7249b75bb5f479b432a51b59041580d20599d4e112b5f2046437a3"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-musllinux_1_2_x86_64.whl", hash = "sha256:cc32b2990fc34380ec2f6195f33a76b6cdaa9eecf09f0c9404b74fc120aef36f"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-win32.whl", hash = "sha256:9509c4123491d0e63fb5e16199e09f8e262066e58903e84615c301dde8fa2e87"}, + {file = "SQLAlchemy-2.0.35-cp37-cp37m-win_amd64.whl", hash = "sha256:3655af10ebcc0f1e4e06c5900bb33e080d6a1fa4228f502121f28a3b1753cde5"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-macosx_10_9_x86_64.whl", hash = "sha256:4c31943b61ed8fdd63dfd12ccc919f2bf95eefca133767db6fbbd15da62078ec"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-macosx_11_0_arm64.whl", hash = "sha256:a62dd5d7cc8626a3634208df458c5fe4f21200d96a74d122c83bc2015b333bc1"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:0630774b0977804fba4b6bbea6852ab56c14965a2b0c7fc7282c5f7d90a1ae72"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8d625eddf7efeba2abfd9c014a22c0f6b3796e0ffb48f5d5ab106568ef01ff5a"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-musllinux_1_2_aarch64.whl", hash = "sha256:ada603db10bb865bbe591939de854faf2c60f43c9b763e90f653224138f910d9"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-musllinux_1_2_x86_64.whl", hash = "sha256:c41411e192f8d3ea39ea70e0fae48762cd11a2244e03751a98bd3c0ca9a4e936"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-win32.whl", hash = "sha256:d299797d75cd747e7797b1b41817111406b8b10a4f88b6e8fe5b5e59598b43b0"}, + {file = "SQLAlchemy-2.0.35-cp38-cp38-win_amd64.whl", hash = "sha256:0375a141e1c0878103eb3d719eb6d5aa444b490c96f3fedab8471c7f6ffe70ee"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-macosx_10_9_x86_64.whl", hash = "sha256:ccae5de2a0140d8be6838c331604f91d6fafd0735dbdcee1ac78fc8fbaba76b4"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-macosx_11_0_arm64.whl", hash = "sha256:2a275a806f73e849e1c309ac11108ea1a14cd7058577aba962cd7190e27c9e3c"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:732e026240cdd1c1b2e3ac515c7a23820430ed94292ce33806a95869c46bd139"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:890da8cd1941fa3dab28c5bac3b9da8502e7e366f895b3b8e500896f12f94d11"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-musllinux_1_2_aarch64.whl", hash = "sha256:c0d8326269dbf944b9201911b0d9f3dc524d64779a07518199a58384c3d37a44"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-musllinux_1_2_x86_64.whl", hash = "sha256:b76d63495b0508ab9fc23f8152bac63205d2a704cd009a2b0722f4c8e0cba8e0"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-win32.whl", hash = "sha256:69683e02e8a9de37f17985905a5eca18ad651bf592314b4d3d799029797d0eb3"}, + {file = "SQLAlchemy-2.0.35-cp39-cp39-win_amd64.whl", hash = "sha256:aee110e4ef3c528f3abbc3c2018c121e708938adeeff9006428dd7c8555e9b3f"}, + {file = "SQLAlchemy-2.0.35-py3-none-any.whl", hash = "sha256:2ab3f0336c0387662ce6221ad30ab3a5e6499aab01b9790879b6578fd9b8faa1"}, + {file = "sqlalchemy-2.0.35.tar.gz", hash = "sha256:e11d7ea4d24f0a262bccf9a7cd6284c976c5369dac21db237cff59586045ab9f"}, ] [package.dependencies] @@ -5535,13 +5575,13 @@ sqlcipher = ["sqlcipher3_binary"] [[package]] name = "starlette" -version = "0.36.3" +version = "0.38.5" description = "The little ASGI library that shines." optional = false python-versions = ">=3.8" files = [ - {file = "starlette-0.36.3-py3-none-any.whl", hash = "sha256:13d429aa93a61dc40bf503e8c801db1f1bca3dc706b10ef2434a36123568f044"}, - {file = "starlette-0.36.3.tar.gz", hash = "sha256:90a671733cfb35771d8cc605e0b679d23b992f8dcfad48cc60b38cb29aeb7080"}, + {file = "starlette-0.38.5-py3-none-any.whl", hash = "sha256:632f420a9d13e3ee2a6f18f437b0a9f1faecb0bc42e1942aa2ea0e379a4c4206"}, + {file = "starlette-0.38.5.tar.gz", hash = "sha256:04a92830a9b6eb1442c766199d62260c3d4dc9c4f9188360626b1e0273cb7077"}, ] [package.dependencies] @@ -5552,13 +5592,13 @@ full = ["httpx (>=0.22.0)", "itsdangerous", "jinja2", "python-multipart (>=0.0.7 [[package]] name = "sympy" -version = "1.13.2" +version = "1.13.3" description = "Computer algebra system (CAS) in Python" optional = true python-versions = ">=3.8" files = [ - {file = "sympy-1.13.2-py3-none-any.whl", hash = "sha256:c51d75517712f1aed280d4ce58506a4a88d635d6b5dd48b39102a7ae1f3fcfe9"}, - {file = "sympy-1.13.2.tar.gz", hash = "sha256:401449d84d07be9d0c7a46a64bd54fe097667d5e7181bfe67ec777be9e01cb13"}, + {file = "sympy-1.13.3-py3-none-any.whl", hash = "sha256:54612cf55a62755ee71824ce692986f23c88ffa77207b30c1368eda4a7060f73"}, + {file = "sympy-1.13.3.tar.gz", hash = "sha256:b27fd2c6530e0ab39e275fc9b683895367e51d5da91baa8d3d64db2565fec4d9"}, ] [package.dependencies] @@ -5915,7 +5955,7 @@ scipy = ["scipy"] name = "tqdm" version = "4.66.5" description = "Fast, Extensible Progress Meter" -optional = true +optional = false python-versions = ">=3.7" files = [ {file = "tqdm-4.66.5-py3-none-any.whl", hash = "sha256:90279a3770753eafc9194a0364852159802111925aa30eb3f9d85b0e805ac7cd"}, @@ -6028,18 +6068,29 @@ tutorials = ["matplotlib", "pandas", "tabulate"] [[package]] name = "types-requests" -version = "2.32.0.20240907" +version = "2.32.0.20240914" description = "Typing stubs for requests" optional = false python-versions = ">=3.8" files = [ - {file = "types-requests-2.32.0.20240907.tar.gz", hash = "sha256:ff33935f061b5e81ec87997e91050f7b4af4f82027a7a7a9d9aaea04a963fdf8"}, - {file = "types_requests-2.32.0.20240907-py3-none-any.whl", hash = "sha256:1d1e79faeaf9d42def77f3c304893dea17a97cae98168ac69f3cb465516ee8da"}, + {file = "types-requests-2.32.0.20240914.tar.gz", hash = "sha256:2850e178db3919d9bf809e434eef65ba49d0e7e33ac92d588f4a5e295fffd405"}, + {file = "types_requests-2.32.0.20240914-py3-none-any.whl", hash = "sha256:59c2f673eb55f32a99b2894faf6020e1a9f4a402ad0f192bfee0b64469054310"}, ] [package.dependencies] urllib3 = ">=2" +[[package]] +name = "types-toml" +version = "0.10.8.20240310" +description = "Typing stubs for toml" +optional = false +python-versions = ">=3.8" +files = [ + {file = "types-toml-0.10.8.20240310.tar.gz", hash = "sha256:3d41501302972436a6b8b239c850b26689657e25281b48ff0ec06345b8830331"}, + {file = "types_toml-0.10.8.20240310-py3-none-any.whl", hash = "sha256:627b47775d25fa29977d9c70dc0cbab3f314f32c8d8d0c012f2ef5de7aaec05d"}, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -6096,13 +6147,13 @@ devenv = ["check-manifest", "pytest (>=4.3)", "pytest-cov", "pytest-mock (>=3.3) [[package]] name = "unstructured" -version = "0.15.12" +version = "0.15.13" description = "A library that prepares raw documents for downstream ML tasks." optional = true python-versions = "<3.13,>=3.9.0" files = [ - {file = "unstructured-0.15.12-py3-none-any.whl", hash = "sha256:a789c8bfde6da99bb9985301b19a8ed06e1c226f447921683a939d3412f72403"}, - {file = "unstructured-0.15.12.tar.gz", hash = "sha256:22af44a9c949f9239d2eab2826e002fbbbbdb534a1698d3319a107f982feac2b"}, + {file = "unstructured-0.15.13-py3-none-any.whl", hash = "sha256:6885add1bb1e4e428cf76f160f4a6f5aed6e707b3b22e69c716efb19a4957dbf"}, + {file = "unstructured-0.15.13.tar.gz", hash = "sha256:3d62573d8f0caca9211ce5e7f2705d8c7ab67e4427bd18259e218a19bbb21c25"}, ] [package.dependencies] @@ -6155,7 +6206,7 @@ azure-cognitive-search = ["azure-search-documents"] bedrock = ["boto3", "langchain-community"] biomed = ["bs4"] box = ["boxfs", "fsspec"] -chroma = ["chromadb", "importlib-metadata (>=8.2.0)", "tenacity (==8.5.0)", "typer (<=0.9.0)"] +chroma = ["chromadb (>0.4.14)", "importlib-metadata (>=8.2.0)", "tenacity (==8.5.0)", "typer (<=0.9.0)"] clarifai = ["clarifai"] confluence = ["atlassian-python-api"] csv = ["pandas"] @@ -6214,18 +6265,19 @@ xlsx = ["networkx", "openpyxl", "pandas", "xlrd"] [[package]] name = "unstructured-client" -version = "0.25.8" +version = "0.25.9" description = "Python Client SDK for Unstructured API" optional = false python-versions = ">=3.8" files = [ - {file = "unstructured-client-0.25.8.tar.gz", hash = "sha256:f2023c6110c1fcf84d0086fb0ba2b5a60e93a4d8d6518dd5213bcfe60449493c"}, - {file = "unstructured_client-0.25.8-py3-none-any.whl", hash = "sha256:79b23bce01842ea22d4efd832b015877bac39597f90f1cc8d393d4dc0f811279"}, + {file = "unstructured-client-0.25.9.tar.gz", hash = "sha256:fcc461623f58fefb0e22508e28bf653a8f6934b9779cb4a90dd68d77a39fb5b2"}, + {file = "unstructured_client-0.25.9-py3-none-any.whl", hash = "sha256:c984c01878c8fc243be7c842467d1113a194d885ab6396ae74258ee42717c5b5"}, ] [package.dependencies] certifi = ">=2023.7.22" charset-normalizer = ">=3.2.0" +cryptography = ">=3.1" dataclasses-json = ">=0.6.4" deepdiff = ">=6.0" httpx = ">=0.27.0" @@ -6346,13 +6398,13 @@ text-embedding = ["sentence-transformers (==2.*)"] [[package]] name = "virtualenv" -version = "20.26.4" +version = "20.26.5" description = "Virtual Python Environment builder" optional = false python-versions = ">=3.7" files = [ - {file = "virtualenv-20.26.4-py3-none-any.whl", hash = "sha256:48f2695d9809277003f30776d155615ffc11328e6a0a8c1f0ec80188d7874a55"}, - {file = "virtualenv-20.26.4.tar.gz", hash = "sha256:c17f4e0f3e6036e9f26700446f85c76ab11df65ff6d8a9cbfad9f71aabfcf23c"}, + {file = "virtualenv-20.26.5-py3-none-any.whl", hash = "sha256:4f3ac17b81fba3ce3bd6f4ead2749a72da5929c01774948e243db9ba41df4ff6"}, + {file = "virtualenv-20.26.5.tar.gz", hash = "sha256:ce489cac131aa58f4b25e321d6d186171f78e6cb13fafbf32a840cee67733ff4"}, ] [package.dependencies] @@ -6615,4 +6667,4 @@ core-ingest-movies = ["moviepy", "opencv-python"] [metadata] lock-version = "2.0" python-versions = ">=3.10,<3.13" -content-hash = "c883eaea03a71ab7036c7fac91483483a3cb12d78c626522b2a4f2dd2b370fd6" +content-hash = "cca9a590635991763e49a7f960abf73ba9893c2f24dbe77fdb8e5ae03bd00499" diff --git a/py/pyproject.toml b/py/pyproject.toml index e0dae7448..aeda0ac5e 100644 --- a/py/pyproject.toml +++ b/py/pyproject.toml @@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "r2r" readme = "README.md" -version = "3.1.38" +version = "3.1.40" description = "SciPhi R2R" authors = ["Owen Colegrove "] license = "MIT" @@ -15,6 +15,7 @@ packages = [ { include = "core", from = "." }, { include = "cli", from = "." }, { include = "sdk", from = "." }, + { include = "shared", from = "." }, ] [tool.poetry.dependencies] @@ -23,10 +24,11 @@ python = ">=3.10,<3.13" boto3 = "^1.35.17" # for AWS bedrock support click = "^8.0.0" -fastapi = "^0.109.2" +fastapi = "^0.114.0" httpx = "^0.27.0" nest-asyncio = "^1.6.0" numpy = ">=1.22.4,<1.29.0" +openai = "^1.11.1" python-dotenv = "^1.0.1" hatchet-sdk = "^0.36.17" requests = "^2.31.0" @@ -37,7 +39,6 @@ apscheduler = "^3.10.4" # Core dependencies (optional) future = { version = "^1.0.0", optional = true } -openai = { version = "^1.11.1", optional = true } pydantic = { extras = ["email"], version = "^2.8.2", optional = true } python-multipart = { version = "^0.0.9", optional = true } fire = { version = "^0.5.0", optional = true } @@ -85,6 +86,7 @@ core-ingest-movies = ["opencv-python", "moviepy"] [tool.poetry.group.dev.dependencies] black = "^24.3.0" codecov = "^2.1.13" +colorama = "^0.4.6" flake8 = "6.1.0" isort = "5.12.0" mypy = "^1.5.1" @@ -94,7 +96,7 @@ pytest-asyncio = "^0.23.6" pytest-dependency = "^0.6.0" pytest-mock = "^3.14.0" pytest-cov = "^5.0.0" -colorama = "^0.4.6" +types-toml = "^0.10.8" [tool.black] line-length = 79 diff --git a/py/r2r.toml b/py/r2r.toml index 4a855f5dd..6b2d857fb 100644 --- a/py/r2r.toml +++ b/py/r2r.toml @@ -1,3 +1,7 @@ +[agent] +system_instruction_name = "rag_agent" +tool_names = ["search"] + [auth] provider = "r2r" access_token_lifetime_in_minutes = 60 @@ -43,12 +47,15 @@ add_title_as_prefix = false rerank_model = "None" concurrent_request_limit = 256 +[file] +provider = "postgres" + [kg] provider = "neo4j" batch_size = 256 -kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" [kg.kg_creation_settings] + kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" entity_types = [] # if empty, all entities are extracted relation_types = [] # if empty, all relations are extracted fragment_merge_count = 4 # number of fragments to merge into a single extraction @@ -56,31 +63,27 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot" generation_config = { model = "gpt-4o-mini" } # and other params, model used for triplet extraction [kg.kg_enrichment_settings] + community_reports_prompt = "graphrag_community_reports_prompt" max_description_input_length = 65536 generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/ - [kg.kg_search_config] - model = "gpt-4o-mini" + [kg.kg_search_settings] + map_system_prompt = "graphrag_map_system_prompt" + reduce_system_prompt = "graphrag_reduce_system_prompt" + generation_config = { model = "gpt-4o-mini" } [logging] provider = "local" log_table = "logs" log_info_table = "log_info" +[orchestration] +provider = "hatchet" + [parsing] provider = "unstructured_local" excluded_parsers = ["mp4"] [prompt] provider = "r2r" - -[agent] -system_instruction_name = "rag_agent" -tool_names = ["search"] - -[file] -provider = "postgres" - -[orchestration] -provider = "hatchet" diff --git a/py/r2r/__init__.py b/py/r2r/__init__.py index 5b07351d6..ab98a96ed 100644 --- a/py/r2r/__init__.py +++ b/py/r2r/__init__.py @@ -3,6 +3,7 @@ import toml from sdk import * +from shared import * pyproject_path = Path(__file__).parent.parent / "pyproject.toml" pyproject_data = toml.load(pyproject_path) diff --git a/py/sdk/ingestion.py b/py/sdk/ingestion.py index 818dacf1b..4e663a986 100644 --- a/py/sdk/ingestion.py +++ b/py/sdk/ingestion.py @@ -37,14 +37,13 @@ async def ingest_files( raise ValueError( "Number of metadatas must match number of document IDs." ) - if ( - chunking_config is not None - and chunking_config is not ChunkingConfig + if chunking_config is not None and not isinstance( + chunking_config, ChunkingConfig ): # check if the provided dict maps to a ChunkingConfig ChunkingConfig(**chunking_config) - all_file_paths = [] + all_file_paths: list[str] = [] for path in file_paths: if os.path.isdir(path): for root, _, files in os.walk(path): @@ -55,7 +54,7 @@ async def ingest_files( all_file_paths.append(path) with ExitStack() as stack: - files = [ + files_tuples = [ ( "files", ( @@ -84,8 +83,9 @@ async def ingest_files( else None ), } + return await client._make_request( - "POST", "ingest_files", data=data, files=files + "POST", "ingest_files", data=data, files=files_tuples ) @staticmethod @@ -159,7 +159,7 @@ async def update_files( data["metadatas"] = json.dumps(metadatas) if chunking_config: data["chunking_config"] = ( - chunking_config.model_dump() + chunking_config.model_dump() # type: ignore if isinstance(chunking_config, ChunkingConfig) else chunking_config ) diff --git a/py/sdk/management.py b/py/sdk/management.py index 1feb9ccce..f1ea1d999 100644 --- a/py/sdk/management.py +++ b/py/sdk/management.py @@ -22,7 +22,7 @@ async def update_prompt( Returns: dict: The response from the server. """ - data = {name: name} + data: dict = {name: name} if template is not None: data["template"] = template if input_types is not None: @@ -82,31 +82,6 @@ async def get_prompt( "GET", f"get_prompt/{prompt_name}", params=params ) - @staticmethod - async def update_prompt( - client, - name: str, - template: Optional[str] = None, - input_types: Optional[dict[str, str]] = None, - ) -> dict: - """ - Update an existing prompt in the system. - - Args: - name (str): The name of the prompt to update. - template (Optional[str]): The new template for the prompt. - input_types (Optional[dict[str, str]]): The new input types for the prompt. - - Returns: - dict: The response from the server. - """ - data = {"name": name} - if template is not None: - data["template"] = template - if input_types is not None: - data["input_types"] = input_types - return await client._make_request("POST", "update_prompt", json=data) - @staticmethod async def get_all_prompts(client) -> dict: """ @@ -209,7 +184,7 @@ async def users_overview( Returns: dict: The overview of users in the system. """ - params = {} + params: dict = {} if user_ids is not None: params["user_ids"] = [str(uid) for uid in user_ids] if offset is not None: @@ -274,7 +249,7 @@ async def documents_overview( Returns: dict: The overview of documents in the system. """ - params = {} + params: dict = {} document_ids = ( [str(doc_id) for doc_id in document_ids] if document_ids else None ) @@ -304,7 +279,7 @@ async def document_chunks( Returns: dict: The chunks for the document. """ - params = {} + params: dict = {} if offset is not None: params["offset"] = offset if limit is not None: @@ -360,7 +335,7 @@ async def collections_overview( Returns: dict: The overview of collections in the system. """ - params = {} + params: dict = {} if collection_ids: params["collection_ids"] = collection_ids if offset: @@ -475,7 +450,7 @@ async def delete_user( Returns: dict: The response from the server. """ - params = {} + params: dict = {} if password is not None: params["password"] = password if delete_vector_data: diff --git a/py/sdk/models.py b/py/sdk/models.py index 1dd6cfca7..ce39eaf67 100644 --- a/py/sdk/models.py +++ b/py/sdk/models.py @@ -1,615 +1,56 @@ -from abc import ABC, abstractmethod -from datetime import datetime from enum import Enum -from typing import Any, ClassVar, Dict, Optional, Type, Union -from uuid import UUID - -from pydantic import BaseModel, Field - - -class GenerationConfig(BaseModel): - _defaults: ClassVar[dict] = { - "model": "openai/gpt-4o", - "temperature": 0.1, - "top_p": 1.0, - "max_tokens_to_sample": 1024, - "stream": False, - "functions": None, - "tools": None, - "add_generation_kwargs": None, - "api_base": None, - } - - model: str = Field( - default_factory=lambda: GenerationConfig._defaults["model"] - ) - temperature: float = Field( - default_factory=lambda: GenerationConfig._defaults["temperature"] - ) - top_p: float = Field( - default_factory=lambda: GenerationConfig._defaults["top_p"] - ) - max_tokens_to_sample: int = Field( - default_factory=lambda: GenerationConfig._defaults[ - "max_tokens_to_sample" - ] - ) - stream: bool = Field( - default_factory=lambda: GenerationConfig._defaults["stream"] - ) - functions: Optional[list[dict]] = Field( - default_factory=lambda: GenerationConfig._defaults["functions"] - ) - tools: Optional[list[dict]] = Field( - default_factory=lambda: GenerationConfig._defaults["tools"] - ) - add_generation_kwargs: Optional[dict] = Field( - default_factory=lambda: GenerationConfig._defaults[ - "add_generation_kwargs" - ] - ) - api_base: Optional[str] = Field( - default_factory=lambda: GenerationConfig._defaults["api_base"] - ) - - @classmethod - def set_default(cls, **kwargs): - for key, value in kwargs.items(): - if key in cls._defaults: - cls._defaults[key] = value - else: - raise AttributeError( - f"No default attribute '{key}' in GenerationConfig" - ) - - def __init__(self, **data): - model = data.pop("model", None) - if model is not None: - super().__init__(model=model, **data) - else: - super().__init__(**data) - - -class KGSearchSettings(BaseModel): - use_kg_search: bool = False - kg_search_type: str = "global" # 'global' or 'local' - kg_search_level: Optional[str] = None - kg_search_generation_config: Optional[GenerationConfig] = Field( - default_factory=GenerationConfig - ) - entity_types: list = [] - relationships: list = [] - max_community_description_length: int = 65536 - max_llm_queries_for_global_search: int = 250 - local_search_limits: dict[str, int] = { - "__Entity__": 20, - "__Relationship__": 20, - "__Community__": 20, - } - - -class ProviderConfig(BaseModel, ABC): - """A base provider configuration class""" - - extra_fields: dict[str, Any] = {} - provider: Optional[str] = None - - class Config: - arbitrary_types_allowed = True - ignore_extra = True - - @abstractmethod - def validate(self) -> None: - pass - - @classmethod - def create(cls: Type["ProviderConfig"], **kwargs: Any) -> "ProviderConfig": - base_args = cls.model_fields.keys() - filtered_kwargs = { - k: v if v != "None" else None - for k, v in kwargs.items() - if k in base_args - } - instance = cls(**filtered_kwargs) - for k, v in kwargs.items(): - if k not in base_args: - instance.extra_fields[k] = v - return instance - - @property - @abstractmethod - def supported_providers(self) -> list[str]: - """Define a list of supported providers.""" - pass - - -class MessageType(Enum): - SYSTEM = "system" - USER = "user" - ASSISTANT = "assistant" - FUNCTION = "function" - TOOL = "tool" - - def __str__(self): - return self.value - - -class Message(BaseModel): - role: Union[MessageType, str] - content: Optional[str] = None - name: Optional[str] = None - function_call: Optional[Dict[str, Any]] = None - tool_calls: Optional[list[Dict[str, Any]]] = None - - -class Method(str, Enum): +from typing import Optional + +from shared.abstractions import ( # ChunkingConfig, + GenerationConfig, + HybridSearchSettings, + KGCommunityResult, + KGCreationSettings, + KGEnrichmentSettings, + KGEntityResult, + KGGlobalResult, + KGRelationshipResult, + KGSearchMethod, + KGSearchResult, + KGSearchResultType, + KGSearchSettings, + Message, + MessageType, + R2RException, + R2RSerializable, + Token, + VectorSearchResult, + VectorSearchSettings, +) +from shared.api.models import ( + KGCreationResponse, + KGEnrichmentResponse, + RAGResponse, + SearchResponse, + UserResponse, +) + + +class Strategy(str, Enum): + # Unstructured methods BY_TITLE = "by_title" BASIC = "basic" + # R2R methods RECURSIVE = "recursive" CHARACTER = "character" -class ChunkingConfig(ProviderConfig): - provider: str = "r2r" - method: Method = Method.RECURSIVE - chunk_size: int = 512 - chunk_overlap: int = 20 - max_chunk_size: Optional[int] = None - - def validate(self) -> None: - if self.provider not in self.supported_providers: - raise ValueError(f"Provider {self.provider} is not supported.") - if self.chunk_size <= 0: - raise ValueError("chunk_size must be greater than 0") - if self.chunk_overlap < 0: - raise ValueError("chunk_overlap must be non-negative") - - @property - def supported_providers(self) -> list[str]: - return ["r2r", "unstructured_local", "unstructured_api", None] - - class Config: - json_schema_extra = { - "type": "object", - "properties": { - "provider": {"type": "string"}, - "method": {"type": "string"}, - "chunk_size": {"type": "integer"}, - "chunk_overlap": {"type": "integer"}, - "max_chunk_size": {"type": "integer"}, - }, - "required": ["provider", "method", "chunk_size", "chunk_overlap"], - "example": { - "provider": "r2r", - "method": "recursive", - "chunk_size": 512, - "chunk_overlap": 20, - "max_chunk_size": 1024, - }, - } - - -class KGSearchResultType(str, Enum): - ENTITY = "entity" - RELATIONSHIP = "relationship" - COMMUNITY = "community" - - -class KGSearchMethod(str, Enum): - LOCAL = "local" - GLOBAL = "global" - - -class KGEntityResult(BaseModel): - name: str - description: str - metadata: Optional[dict[str, Any]] = None - - class Config: - json_schema_extra = { - "name": "Entity Name", - "description": "Entity Description", - "metadata": {}, - } - - -class KGRelationshipResult(BaseModel): - name: str - description: str - metadata: Optional[dict[str, Any]] = None - - class Config: - json_schema_extra = { - "name": "Relationship Name", - "description": "Relationship Description", - "metadata": {}, - } - - -class KGCommunityResult(BaseModel): - name: str - description: str - metadata: Optional[dict[str, Any]] = None - - class Config: - json_schema_extra = { - "name": "Community Name", - "description": "Community Description", - "metadata": {}, - } - - -class KGGlobalResult(BaseModel): - name: str - description: str - metadata: Optional[dict[str, Any]] = None - - class Config: - json_schema_extra = { - "name": "Global Result Name", - "description": "Global Result Description", - "metadata": {}, - } - - -class KGSearchResult(BaseModel): - method: KGSearchMethod - content: Union[ - KGEntityResult, KGRelationshipResult, KGCommunityResult, KGGlobalResult - ] - result_type: Optional[KGSearchResultType] = None - fragment_ids: Optional[list[UUID]] = None - document_ids: Optional[list[UUID]] = None - metadata: Optional[dict[str, Any]] = None - - class Config: - json_schema_extra = { - "method": "local", - "content": KGEntityResult.Config.json_schema_extra, - "result_type": "entity", - "fragment_ids": ["c68dc72e-fc23-5452-8f49-d7bd46088a96"], - "document_ids": ["3e157b3a-8469-51db-90d9-52e7d896b49b"], - "metadata": {"associated_query": "What is the capital of France?"}, - } - - -class R2RException(Exception): - def __init__( - self, message: str, status_code: int, detail: Optional[Any] = None - ): - self.message = message - self.status_code = status_code - super().__init__(self.message) - - -class Token(BaseModel): - token: str - token_type: str - - -class IndexMeasure(str, Enum): - """ - An enum representing the types of distance measures available for indexing. - - Attributes: - cosine_distance (str): The cosine distance measure for indexing. - l2_distance (str): The Euclidean (L2) distance measure for indexing. - max_inner_product (str): The maximum inner product measure for indexing. - """ - - cosine_distance = "cosine_distance" - l2_distance = "l2_distance" - max_inner_product = "max_inner_product" - - -class HybridSearchSettings(BaseModel): - full_text_weight: float = Field( - default=1.0, description="Weight to apply to full text search" - ) - semantic_weight: float = Field( - default=5.0, description="Weight to apply to semantic search" - ) - full_text_limit: int = Field( - default=200, - description="Maximum number of results to return from full text search", - ) - rrf_k: int = Field( - default=50, description="K-value for RRF (Rank Reciprocal Fusion)" - ) - - -class VectorSearchSettings(BaseModel): - use_vector_search: bool = Field( - default=True, description="Whether to use vector search" - ) - use_hybrid_search: bool = Field( - default=False, - description="Whether to perform a hybrid search (combining vector and keyword search)", - ) - filters: dict[str, Any] = Field( - default_factory=dict, - description="Complex logic filters to apply to the vector search, such as `{'document_id': {'$eq': '9fbe403b-c11c-5aae-8ade-ef22980c3ad1'}}`.", - ) - search_limit: int = Field( - default=10, - description="Maximum number of results to return", - ge=1, - le=1_000, - ) - selected_collection_ids: list[UUID] = Field( - default_factory=list, - description="Collection IDs to search for", - ) - index_measure: IndexMeasure = Field( - default=IndexMeasure.cosine_distance, - description="The distance measure to use for indexing", - ) - include_values: bool = Field( - default=True, - description="Whether to include search score values in the search results", - ) - include_metadatas: bool = Field( - default=True, - description="Whether to include element metadata in the search results", - ) - probes: Optional[int] = Field( - default=10, - description="Number of ivfflat index lists to query. Higher increases accuracy but decreases speed.", - ) - ef_search: Optional[int] = Field( - default=40, - description="Size of the dynamic candidate list for HNSW index search. Higher increases accuracy but decreases speed.", - ) - hybrid_search_settings: Optional[HybridSearchSettings] = Field( - default=HybridSearchSettings(), - description="Settings for hybrid search", - ) - - class Config: - json_encoders = {UUID: str} - json_schema_extra = { - "use_vector_search": True, - "use_hybrid_search": True, - "filters": {"category": "technology"}, - "search_limit": 20, - "selected_collection_ids": [ - "2acb499e-8428-543b-bd85-0d9098718220", - "3e157b3a-8469-51db-90d9-52e7d896b49b", - ], - "index_measure": "cosine_distance", - "include_metadata": True, - "probes": 10, - "ef_search": 40, - "hybrid_search_settings": { - "full_text_weight": 1.0, - "semantic_weight": 5.0, - "full_text_limit": 200, - "rrf_k": 50, - }, - } - - def model_dump(self, *args, **kwargs): - dump = super().model_dump(*args, **kwargs) - dump["selected_collection_ids"] = [ - str(uuid) for uuid in dump["selected_collection_ids"] - ] - return dump - - -class KGCreationSettings(BaseModel): - - entity_types: list[str] = Field( - default_factory=list, - description="The types of entities to extract.", - ) - - relation_types: list[str] = Field( - default_factory=list, - description="The types of relations to extract.", - ) - - fragment_merge_count: int = Field( - default=4, - description="The number of fragments to merge into a single extraction.", - ) - - max_knowledge_triples: int = Field( - default=100, - description="The maximum number of knowledge triples to extract from each chunk.", - ) - - generation_config: GenerationConfig = Field( - default_factory=GenerationConfig, - description="The generation configuration for the KG creation.", - ) - - def to_dict(self): - return self.model_dump() - - def model_dump_json(self, **kwargs): - return super().model_dump_json(**kwargs) - - -class KGEnrichmentSettings(BaseModel): - - max_summary_input_length: int = Field( - default=65536, - description="The maximum input size that goes inside a .", - ) - - leiden_params: dict = Field( - default_factory=dict, - description="The parameters for the Leiden algorithm.", - ) - - generation_config: GenerationConfig = Field( - default_factory=GenerationConfig, - description="The generation configuration for the KG enrichment.", - ) - - -class KGCreationResponse(BaseModel): - - message: str - task_id: UUID - - def __str__(self) -> str: - return f"KGCreationResponse(message={self.message}, task_id={self.task_id})" - - class Config: - json_schema_extra = { - "message": "Knowledge graph creation task queued successfully.", - "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - } - - -class KGEnrichmentResponse(BaseModel): - - message: str - task_id: UUID - - def __str__(self) -> str: - return f"KGEnrichmentResponse(message={self.message}, task_id={self.task_id})" - - class Config: - json_schema_extra = { - "message": "Knowledge graph enrichment task queued successfully.", - "task_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - } - - -class UserResponse(BaseModel): - id: UUID - email: str - is_active: bool = True - is_superuser: bool = False - created_at: datetime = datetime.now() - updated_at: datetime = datetime.now() - is_verified: bool = False - collection_ids: list[UUID] = [] - - # Optional fields (to update or set at creation) - hashed_password: Optional[str] = None - verification_code_expiry: Optional[datetime] = None - name: Optional[str] = None - bio: Optional[str] = None - profile_picture: Optional[str] = None - - -class VectorSearchResult(BaseModel): - """Result of a search operation.""" - - fragment_id: UUID - extraction_id: UUID - document_id: UUID - user_id: UUID - collection_ids: list[UUID] - score: float - text: str - metadata: dict[str, Any] - - def __str__(self) -> str: - return f"VectorSearchResult(fragment_id={self.fragment_id}, extraction_id={self.extraction_id}, document_id={self.document_id}, score={self.score})" - - def __repr__(self) -> str: - return self.__str__() - - def dict(self) -> dict: - return { - "fragment_id": self.fragment_id, - "extraction_id": self.extraction_id, - "document_id": self.document_id, - "user_id": self.user_id, - "collection_ids": self.collection_ids, - "score": self.score, - "text": self.text, - "metadata": self.metadata, - } - - class Config: - json_schema_extra = { - "fragment_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "collection_ids": [], - "score": 0.23943702876567796, - "text": "Example text from the document", - "metadata": { - "title": "example_document.pdf", - "associated_query": "What is the capital of France?", - }, - } - - -class SearchResponse(BaseModel): - vector_search_results: list[VectorSearchResult] = Field( - ..., - description="List of vector search results", - ) - kg_search_results: Optional[list[KGSearchResult]] = Field( - None, - description="Knowledge graph search results, if applicable", - ) - - class Config: - json_schema_extra = { - "example": { - "vector_search_results": [ - VectorSearchResult.Config.json_schema_extra, - ], - "kg_search_results": [ - KGSearchResult.Config.json_schema_extra, - ], - } - } - - -class RAGResponse(BaseModel): - completion: Any = Field( - ..., - description="The generated completion from the RAG process", - ) - search_results: SearchResponse = Field( - ..., - description="The search results used for the RAG process", - ) - - class Config: - json_schema_extra = { - "example": { - "completion": { - "id": "chatcmpl-example123", - "choices": [ - { - "finish_reason": "stop", - "index": 0, - "logprobs": None, - "message": { - "content": "Paris is the capital of France.", - "role": "assistant", - }, - } - ], - }, - "search_results": { - "vector_search_results": [ - { - "fragment_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "collection_ids": [], - "score": 0.23943702876567796, - "text": "Paris is the capital and most populous city of France.", - "metadata": { - "text": "Paris is the capital and most populous city of France.", - "title": "france_info.pdf", - "associated_query": "What is the capital of France?", - }, - } - ], - "kg_search_results": None, - }, - } - } +# TODO - Remove this class +class ChunkingConfig(R2RSerializable): + provider: str = "unstructured_local" # or unstructured_api + combine_under_n_chars: Optional[int] = 128 + max_characters: Optional[int] = 500 + coordinates: bool = False + encoding: Optional[str] = "utf-8" + extract_image_block_types: Optional[list[str]] = None + gz_uncompressed_content_type: Optional[str] = None + hi_res_model_name: Optional[str] = None + include_orig_elements: Optional[bool] = True + include_page_breaks: bool = False + languages: Optional[list[str]] = None + multipage_sections: bool = True diff --git a/py/sdk/restructure.py b/py/sdk/restructure.py index b66606191..058c3c381 100644 --- a/py/sdk/restructure.py +++ b/py/sdk/restructure.py @@ -1,4 +1,5 @@ -from typing import Union +import json +from typing import Optional, Union from .models import ( KGCreationResponse, @@ -13,17 +14,18 @@ class RestructureMethods: @staticmethod async def create_graph( client, - document_ids: list[str] = None, - kg_creation_settings: Union[dict, KGCreationSettings] = None, + document_ids: Optional[list[str]] = None, + kg_creation_settings: Optional[Union[dict, KGCreationSettings]] = None, ) -> KGCreationResponse: """ Create a graph from the given settings. """ data = { - "document_ids": document_ids, - "kg_creation_settings": kg_creation_settings, + "document_ids": document_ids or [], + "kg_creation_settings": kg_creation_settings or {}, } + response = await client._make_request( "POST", "create_graph", json=data ) @@ -34,7 +36,9 @@ async def enrich_graph( client, skip_clustering: bool = False, force_enrichment: bool = False, - kg_enrichment_settings: Union[dict, KGEnrichmentSettings] = None, + kg_enrichment_settings: Optional[ + Union[dict, KGEnrichmentSettings] + ] = None, ) -> KGEnrichmentResponse: """ Perform graph enrichment over the entire graph. @@ -50,7 +54,7 @@ async def enrich_graph( data = { "skip_clustering": skip_clustering, "force_enrichment": force_enrichment, - "kg_enrichment_settings": kg_enrichment_settings, + "kg_enrichment_settings": kg_enrichment_settings or {}, } response = await client._make_request( "POST", "enrich_graph", json=data diff --git a/py/sdk/retrieval.py b/py/sdk/retrieval.py index 67ca4b6e3..137175d86 100644 --- a/py/sdk/retrieval.py +++ b/py/sdk/retrieval.py @@ -91,7 +91,7 @@ async def rag( "include_title_if_available": include_title_if_available, } - if rag_generation_config and rag_generation_config.get( + if rag_generation_config and rag_generation_config.get( # type: ignore "stream", False ): return client._make_streaming_request("POST", "rag", json=data) @@ -135,13 +135,13 @@ async def agent( if kg_search_settings and not isinstance(kg_search_settings, dict): kg_search_settings = kg_search_settings.model_dump() - messages = [ + cast_messages: list[Message] = [ Message(**msg) if isinstance(msg, dict) else msg for msg in messages ] data = { - "messages": [msg.model_dump() for msg in messages], + "messages": [msg.model_dump() for msg in cast_messages], "rag_generation_config": rag_generation_config, "vector_search_settings": vector_search_settings, "kg_search_settings": kg_search_settings, @@ -149,7 +149,7 @@ async def agent( "include_title_if_available": include_title_if_available, } - if rag_generation_config and rag_generation_config.get( + if rag_generation_config and rag_generation_config.get( # type: ignore "stream", False ): return client._make_streaming_request("POST", "agent", json=data) diff --git a/py/sdk/server.py b/py/sdk/server.py index d703ae6d4..0554ae6cd 100644 --- a/py/sdk/server.py +++ b/py/sdk/server.py @@ -19,8 +19,8 @@ async def server_stats(client) -> dict: @staticmethod async def logs( client, - offset: int = None, - limit: int = None, + offset: Optional[int] = None, + limit: Optional[int] = None, run_type_filter: Optional[str] = None, ) -> dict: """ diff --git a/py/core/base/api/models/auth/__init__.py b/py/shared/__init__.py similarity index 100% rename from py/core/base/api/models/auth/__init__.py rename to py/shared/__init__.py diff --git a/py/shared/abstractions/__init__.py b/py/shared/abstractions/__init__.py new file mode 100644 index 000000000..b9cb2683e --- /dev/null +++ b/py/shared/abstractions/__init__.py @@ -0,0 +1,114 @@ +from .base import AsyncSyncMeta, R2RSerializable, syncable +from .completion import CompletionRecord, MessageType +from .document import ( + DataType, + Document, + DocumentExtraction, + DocumentFragment, + DocumentInfo, + DocumentType, + IngestionStatus, + RestructureStatus, +) +from .embedding import EmbeddingPurpose, default_embedding_prefixes +from .exception import R2RDocumentProcessingError, R2RException +from .graph import ( + Community, + CommunityReport, + Entity, + EntityType, + KGExtraction, + RelationshipType, + Triple, +) +from .llm import ( + GenerationConfig, + LLMChatCompletion, + LLMChatCompletionChunk, + Message, + RAGCompletion, +) +from .prompt import Prompt +from .restructure import KGCreationSettings, KGEnrichmentSettings +from .search import ( + AggregateSearchResult, + HybridSearchSettings, + KGCommunityResult, + KGEntityResult, + KGGlobalResult, + KGRelationshipResult, + KGSearchMethod, + KGSearchResult, + KGSearchResultType, + KGSearchSettings, + VectorSearchResult, + VectorSearchSettings, +) +from .user import Token, TokenData, UserStats +from .vector import StorageResult, Vector, VectorEntry, VectorType + +__all__ = [ + # Base abstractions + "R2RSerializable", + "AsyncSyncMeta", + "syncable", + # Completion abstractions + "CompletionRecord", + "MessageType", + # Document abstractions + "DataType", + "Document", + "DocumentExtraction", + "DocumentFragment", + "DocumentInfo", + "IngestionStatus", + "RestructureStatus", + "DocumentType", + # Embedding abstractions + "EmbeddingPurpose", + "default_embedding_prefixes", + # Exception abstractions + "R2RDocumentProcessingError", + "R2RException", + # Graph abstractions + "Entity", + "EntityType", + "RelationshipType", + "Community", + "CommunityReport", + "KGExtraction", + "Triple", + # LLM abstractions + "GenerationConfig", + "LLMChatCompletion", + "LLMChatCompletionChunk", + "Message", + "RAGCompletion", + # Prompt abstractions + "Prompt", + # Search abstractions + "AggregateSearchResult", + "KGSearchResult", + "KGSearchMethod", + "KGSearchResultType", + "KGEntityResult", + "KGRelationshipResult", + "KGCommunityResult", + "KGGlobalResult", + "KGSearchSettings", + "VectorSearchResult", + "VectorSearchSettings", + "HybridSearchSettings", + # Restructure abstractions + "KGCreationSettings", + "KGEnrichmentSettings", + # User abstractions + "Token", + "TokenData", + "UserStats", + # Vector abstractions + "Vector", + "VectorEntry", + "VectorType", + "StorageResult", +] diff --git a/py/core/base/abstractions/base.py b/py/shared/abstractions/base.py similarity index 100% rename from py/core/base/abstractions/base.py rename to py/shared/abstractions/base.py diff --git a/py/core/base/abstractions/completion.py b/py/shared/abstractions/completion.py similarity index 91% rename from py/core/base/abstractions/completion.py rename to py/shared/abstractions/completion.py index 9c22d5508..3222fd1f8 100644 --- a/py/core/base/abstractions/completion.py +++ b/py/shared/abstractions/completion.py @@ -10,20 +10,10 @@ from pydantic import BaseModel +from .llm import MessageType from .search import AggregateSearchResult -class MessageType(Enum): - SYSTEM = "system" - USER = "user" - ASSISTANT = "assistant" - FUNCTION = "function" - TOOL = "tool" - - def __str__(self): - return self.value - - class CompletionRecord(BaseModel): message_id: UUID message_type: MessageType diff --git a/py/core/base/abstractions/document.py b/py/shared/abstractions/document.py similarity index 95% rename from py/core/base/abstractions/document.py rename to py/shared/abstractions/document.py index 836e8df07..d7e0cd68f 100644 --- a/py/core/base/abstractions/document.py +++ b/py/shared/abstractions/document.py @@ -7,7 +7,7 @@ from typing import Optional, Union from uuid import UUID, uuid4 -from pydantic import Field, validator +from pydantic import Field from .base import R2RSerializable @@ -100,6 +100,7 @@ class Document(R2RSerializable): class Config: arbitrary_types_allowed = True + ignore_extra = False json_encoders = { UUID: str, } @@ -161,8 +162,8 @@ def convert_to_db_entry(self): "title": self.title or "N/A", "version": self.version, "size_in_bytes": self.size_in_bytes, - "ingestion_status": self.ingestion_status, - "restructuring_status": self.restructuring_status, + "ingestion_status": self.ingestion_status.value, + "restructuring_status": self.restructuring_status.value, "created_at": self.created_at or now, "updated_at": self.updated_at or now, "ingestion_attempt_number": self.ingestion_attempt_number or 0, diff --git a/py/core/base/abstractions/embedding.py b/py/shared/abstractions/embedding.py similarity index 100% rename from py/core/base/abstractions/embedding.py rename to py/shared/abstractions/embedding.py diff --git a/py/core/base/abstractions/exception.py b/py/shared/abstractions/exception.py similarity index 100% rename from py/core/base/abstractions/exception.py rename to py/shared/abstractions/exception.py diff --git a/py/core/base/abstractions/graph.py b/py/shared/abstractions/graph.py similarity index 98% rename from py/core/base/abstractions/graph.py rename to py/shared/abstractions/graph.py index ab612dc9f..dbf795e27 100644 --- a/py/core/base/abstractions/graph.py +++ b/py/shared/abstractions/graph.py @@ -46,6 +46,7 @@ class RelationshipType(R2RSerializable): class Entity(R2RSerializable): """An entity extracted from a document.""" + id: Optional[int] = None # given by neo4j category: str name: str description: Optional[str] = None @@ -78,6 +79,8 @@ def __init__(self, **kwargs): class Triple(BaseModel): """A relationship between two entities. This is a generic relationship, and can be used to represent any type of relationship between any two entities.""" + id: Optional[int] = None # given by neo4j + subject: str | None = None """The source entity name.""" @@ -102,7 +105,7 @@ class Triple(BaseModel): document_ids: list[str] = [] """List of document IDs in which the relationship appears (optional).""" - attributes: dict[str, Any] | str = None + attributes: dict[str, Any] | str = {} """Additional attributes associated with the relationship (optional). To be included in the search prompt""" def __init__(self, **kwargs): diff --git a/py/core/base/abstractions/llm.py b/py/shared/abstractions/llm.py similarity index 98% rename from py/core/base/abstractions/llm.py rename to py/shared/abstractions/llm.py index ee6b4932b..05d39ad03 100644 --- a/py/core/base/abstractions/llm.py +++ b/py/shared/abstractions/llm.py @@ -7,7 +7,7 @@ from openai.types.chat import ChatCompletion, ChatCompletionChunk from pydantic import BaseModel, Field -from core.base.abstractions.base import R2RSerializable +from .base import R2RSerializable if TYPE_CHECKING: from .search import AggregateSearchResult diff --git a/py/core/base/abstractions/prompt.py b/py/shared/abstractions/prompt.py similarity index 89% rename from py/core/base/abstractions/prompt.py rename to py/shared/abstractions/prompt.py index 7d9d723b7..d24fe00e8 100644 --- a/py/core/base/abstractions/prompt.py +++ b/py/shared/abstractions/prompt.py @@ -1,11 +1,14 @@ """Abstraction for a prompt that can be formatted with inputs.""" +import logging from datetime import datetime from typing import Any from uuid import UUID, uuid4 from pydantic import BaseModel, Field +logger = logging.getLogger(__name__) + class Prompt(BaseModel): """A prompt that can be formatted with inputs.""" @@ -19,7 +22,8 @@ class Prompt(BaseModel): def format_prompt(self, inputs: dict[str, Any]) -> str: self._validate_inputs(inputs) - return self.template.format(**inputs) + formatted_prompt = self.template.format(**inputs) + return formatted_prompt def _validate_inputs(self, inputs: dict[str, Any]) -> None: for var, expected_type_name in self.input_types.items(): diff --git a/py/core/base/abstractions/restructure.py b/py/shared/abstractions/restructure.py similarity index 82% rename from py/core/base/abstractions/restructure.py rename to py/shared/abstractions/restructure.py index c4f9d4dcf..5d420b965 100644 --- a/py/core/base/abstractions/restructure.py +++ b/py/shared/abstractions/restructure.py @@ -1,5 +1,3 @@ -from uuid import UUID - from pydantic import Field from .base import R2RSerializable @@ -9,6 +7,11 @@ class KGCreationSettings(R2RSerializable): """Settings for knowledge graph creation.""" + kg_extraction_prompt: str = Field( + default="graphrag_triplet_extraction_zero_shot", + description="The prompt to use for knowledge graph extraction.", + ) + entity_types: list[str] = Field( default=[], description="The types of entities to extract.", @@ -38,6 +41,11 @@ class KGCreationSettings(R2RSerializable): class KGEnrichmentSettings(R2RSerializable): """Settings for knowledge graph enrichment.""" + community_reports_prompt: str = Field( + default="graphrag_community_reports_prompt", + description="The prompt to use for knowledge graph enrichment.", + ) + max_description_input_length: int = Field( default=65536, description="The maximum length of the description for a node in the graph.", diff --git a/py/core/base/abstractions/search.py b/py/shared/abstractions/search.py similarity index 83% rename from py/core/base/abstractions/search.py rename to py/shared/abstractions/search.py index 5649e8a15..8199d8394 100644 --- a/py/core/base/abstractions/search.py +++ b/py/shared/abstractions/search.py @@ -4,12 +4,13 @@ from typing import Any, Optional, Union from uuid import UUID -from pydantic import BaseModel, Field +from pydantic import Field +from .base import R2RSerializable from .llm import GenerationConfig -class VectorSearchResult(BaseModel): +class VectorSearchResult(R2RSerializable): """Result of a search operation.""" fragment_id: UUID @@ -27,7 +28,7 @@ def __str__(self) -> str: def __repr__(self) -> str: return self.__str__() - def dict(self) -> dict: + def as_dict(self) -> dict: return { "fragment_id": self.fragment_id, "extraction_id": self.extraction_id, @@ -66,7 +67,7 @@ class KGSearchMethod(str, Enum): GLOBAL = "global" -class KGEntityResult(BaseModel): +class KGEntityResult(R2RSerializable): name: str description: str metadata: Optional[dict[str, Any]] = None @@ -79,7 +80,7 @@ class Config: } -class KGRelationshipResult(BaseModel): +class KGRelationshipResult(R2RSerializable): name: str description: str metadata: Optional[dict[str, Any]] = None @@ -92,7 +93,7 @@ class Config: } -class KGCommunityResult(BaseModel): +class KGCommunityResult(R2RSerializable): name: str description: str metadata: Optional[dict[str, Any]] = None @@ -105,7 +106,7 @@ class Config: } -class KGGlobalResult(BaseModel): +class KGGlobalResult(R2RSerializable): name: str description: str metadata: Optional[dict[str, Any]] = None @@ -118,7 +119,7 @@ class Config: } -class KGSearchResult(BaseModel): +class KGSearchResult(R2RSerializable): method: KGSearchMethod content: Union[ KGEntityResult, KGRelationshipResult, KGCommunityResult, KGGlobalResult @@ -126,7 +127,7 @@ class KGSearchResult(BaseModel): result_type: Optional[KGSearchResultType] = None fragment_ids: Optional[list[UUID]] = None document_ids: Optional[list[UUID]] = None - metadata: Optional[dict[str, Any]] = None + metadata: dict[str, Any] = {} class Config: json_schema_extra = { @@ -139,7 +140,7 @@ class Config: } -class AggregateSearchResult(BaseModel): +class AggregateSearchResult(R2RSerializable): """Result of an aggregate search operation.""" vector_search_results: Optional[list[VectorSearchResult]] @@ -151,10 +152,10 @@ def __str__(self) -> str: def __repr__(self) -> str: return f"AggregateSearchResult(vector_search_results={self.vector_search_results}, kg_search_results={self.kg_search_results})" - def dict(self) -> dict: + def as_dict(self) -> dict: return { "vector_search_results": ( - [result.dict() for result in self.vector_search_results] + [result.as_dict() for result in self.vector_search_results] if self.vector_search_results else [] ), @@ -178,7 +179,7 @@ class IndexMeasure(str, Enum): max_inner_product = "max_inner_product" -class HybridSearchSettings(BaseModel): +class HybridSearchSettings(R2RSerializable): full_text_weight: float = Field( default=1.0, description="Weight to apply to full text search" ) @@ -194,7 +195,7 @@ class HybridSearchSettings(BaseModel): ) -class VectorSearchSettings(BaseModel): +class VectorSearchSettings(R2RSerializable): use_vector_search: bool = Field( default=True, description="Whether to use vector search" ) @@ -212,6 +213,11 @@ class VectorSearchSettings(BaseModel): ge=1, le=1_000, ) + offset: int = Field( + default=0, + ge=0, + description="Offset to paginate search results", + ) selected_collection_ids: list[UUID] = Field( default_factory=list, description="Collection IDs to search for", @@ -240,7 +246,7 @@ class VectorSearchSettings(BaseModel): default=HybridSearchSettings(), description="Settings for hybrid search", ) - search_strategy: Optional[str] = Field( + search_strategy: str = Field( default="vanilla", description="Search strategy to use (e.g., 'default', 'query_fusion', 'hyde')", ) @@ -251,7 +257,8 @@ class Config: "use_vector_search": True, "use_hybrid_search": True, "filters": {"category": "technology"}, - "search_limit": 20, + "limit": 20, + "offset": 0, "selected_collection_ids": [ "2acb499e-8428-543b-bd85-0d9098718220", "3e157b3a-8469-51db-90d9-52e7d896b49b", @@ -276,12 +283,30 @@ def model_dump(self, *args, **kwargs): return dump -class KGSearchSettings(BaseModel): - use_kg_search: bool = False - kg_search_type: str = "global" # 'global' or 'local' - kg_search_level: Optional[str] = None - kg_search_generation_config: Optional[GenerationConfig] = Field( - default_factory=GenerationConfig +class KGSearchSettings(R2RSerializable): + + graphrag_map_system_prompt: str = Field( + default="graphrag_map_system_prompt", + description="The system prompt for the graphrag map prompt.", + ) + + graphrag_reduce_system_prompt: str = Field( + default="graphrag_reduce_system_prompt", + description="The system prompt for the graphrag reduce prompt.", + ) + + use_kg_search: bool = Field( + default=False, description="Whether to use KG search" + ) + kg_search_type: str = Field( + default="local", description="KG search type" + ) # 'global' or 'local' + kg_search_level: Optional[str] = Field( + default=None, description="KG search level" + ) + generation_config: GenerationConfig = Field( + default_factory=GenerationConfig, + description="Configuration for text generation during graph search.", ) # TODO: add these back in # entity_types: list = [] @@ -300,7 +325,7 @@ class Config: "use_kg_search": True, "kg_search_type": "global", "kg_search_level": "0", - "kg_search_generation_config": GenerationConfig.Config.json_schema_extra, + "generation_config": GenerationConfig.Config.json_schema_extra, "max_community_description_length": 65536, "max_llm_queries_for_global_search": 250, "local_search_limits": { diff --git a/py/core/base/abstractions/user.py b/py/shared/abstractions/user.py similarity index 100% rename from py/core/base/abstractions/user.py rename to py/shared/abstractions/user.py diff --git a/py/core/base/abstractions/vector.py b/py/shared/abstractions/vector.py similarity index 98% rename from py/core/base/abstractions/vector.py rename to py/shared/abstractions/vector.py index d2c9fca06..adb61361a 100644 --- a/py/core/base/abstractions/vector.py +++ b/py/shared/abstractions/vector.py @@ -71,7 +71,7 @@ class StorageResult(R2RSerializable): """A result of a storage operation.""" success: bool - document_id: UUID = None + document_id: UUID num_chunks: int = 0 error_message: Optional[str] = None diff --git a/py/shared/api/models/__init__.py b/py/shared/api/models/__init__.py new file mode 100644 index 000000000..777ad2ee5 --- /dev/null +++ b/py/shared/api/models/__init__.py @@ -0,0 +1,117 @@ +from shared.api.models.auth.responses import ( + GenericMessageResponse, + TokenResponse, + UserResponse, + WrappedGenericMessageResponse, + WrappedTokenResponse, + WrappedUserResponse, +) +from shared.api.models.ingestion.responses import ( + IngestionResponse, + WrappedIngestionResponse, + WrappedUpdateResponse, +) +from shared.api.models.management.responses import ( + AnalyticsResponse, + AppSettingsResponse, + CollectionOverviewResponse, + CollectionResponse, + DocumentChunkResponse, + DocumentOverviewResponse, + KnowledgeGraphResponse, + LogResponse, + PromptResponse, + ScoreCompletionResponse, + ServerStats, + UserOverviewResponse, + WrappedAddUserResponse, + WrappedAnalyticsResponse, + WrappedAppSettingsResponse, + WrappedCollectionListResponse, + WrappedCollectionOverviewResponse, + WrappedCollectionResponse, + WrappedDocumentChunkResponse, + WrappedDocumentOverviewResponse, + WrappedGetPromptsResponse, + WrappedKnowledgeGraphResponse, + WrappedLogResponse, + WrappedPromptMessageResponse, + WrappedScoreCompletionResponse, + WrappedServerStatsResponse, + WrappedUserCollectionResponse, + WrappedUserOverviewResponse, + WrappedUsersInCollectionResponse, +) +from shared.api.models.restructure.responses import ( + KGCreationResponse, + KGEnrichmentResponse, + WrappedKGCreationResponse, + WrappedKGEnrichmentResponse, +) +from shared.api.models.retrieval.responses import ( + RAGAgentResponse, + RAGResponse, + SearchResponse, + WrappedRAGAgentResponse, + WrappedRAGResponse, + WrappedSearchResponse, +) + +__all__ = [ + # Auth Responses + "GenericMessageResponse", + "TokenResponse", + "UserResponse", + "WrappedTokenResponse", + "WrappedUserResponse", + "WrappedGenericMessageResponse", + # Ingestion Responses + "IngestionResponse", + "WrappedIngestionResponse", + "WrappedUpdateResponse", + # Restructure Responses + "KGCreationResponse", + "WrappedKGCreationResponse", + "KGEnrichmentResponse", + "WrappedKGEnrichmentResponse", + # Management Responses + "PromptResponse", + "ServerStats", + "LogResponse", + "AnalyticsResponse", + "AppSettingsResponse", + "ScoreCompletionResponse", + "UserOverviewResponse", + "DocumentOverviewResponse", + "DocumentChunkResponse", + "KnowledgeGraphResponse", + "CollectionResponse", + "CollectionOverviewResponse", + "WrappedPromptMessageResponse", + "WrappedServerStatsResponse", + "WrappedLogResponse", + "WrappedAnalyticsResponse", + "WrappedAppSettingsResponse", + "WrappedScoreCompletionResponse", + "WrappedUserOverviewResponse", + "WrappedDocumentChunkResponse", + "WrappedDocumentOverviewResponse", + "WrappedDocumentChunkResponse", + "WrappedKnowledgeGraphResponse", + "WrappedCollectionResponse", + "WrappedDocumentChunkResponse", + "WrappedCollectionListResponse", + "WrappedAddUserResponse", + "WrappedUsersInCollectionResponse", + "WrappedGetPromptsResponse", + "WrappedUserCollectionResponse", + "WrappedDocumentChunkResponse", + "WrappedCollectionOverviewResponse", + # Retrieval Responses + "SearchResponse", + "RAGResponse", + "RAGAgentResponse", + "WrappedSearchResponse", + "WrappedRAGResponse", + "WrappedRAGAgentResponse", +] diff --git a/py/core/base/api/models/ingestion/__init__.py b/py/shared/api/models/auth/__init__.py similarity index 100% rename from py/core/base/api/models/ingestion/__init__.py rename to py/shared/api/models/auth/__init__.py diff --git a/py/core/base/api/models/auth/responses.py b/py/shared/api/models/auth/responses.py similarity index 87% rename from py/core/base/api/models/auth/responses.py rename to py/shared/api/models/auth/responses.py index 6601fa088..9e868272d 100644 --- a/py/core/base/api/models/auth/responses.py +++ b/py/shared/api/models/auth/responses.py @@ -4,9 +4,8 @@ from pydantic import BaseModel -from core.base.abstractions import Token -from core.base.abstractions.base import R2RSerializable -from core.base.api.models.base import ResultsWrapper +from shared.abstractions import R2RSerializable, Token +from shared.api.models.base import ResultsWrapper class TokenResponse(BaseModel): diff --git a/py/core/base/api/models/base.py b/py/shared/api/models/base.py similarity index 61% rename from py/core/base/api/models/base.py rename to py/shared/api/models/base.py index 5c5af2d63..092728bc3 100644 --- a/py/core/base/api/models/base.py +++ b/py/shared/api/models/base.py @@ -7,3 +7,8 @@ class ResultsWrapper(BaseModel, Generic[T]): results: T + + +class PaginatedResultsWrapper(BaseModel, Generic[T]): + results: T + total_entries: int diff --git a/py/core/base/api/models/management/__init__.py b/py/shared/api/models/ingestion/__init__.py similarity index 100% rename from py/core/base/api/models/management/__init__.py rename to py/shared/api/models/ingestion/__init__.py diff --git a/py/core/base/api/models/ingestion/responses.py b/py/shared/api/models/ingestion/responses.py similarity index 94% rename from py/core/base/api/models/ingestion/responses.py rename to py/shared/api/models/ingestion/responses.py index 4612156b3..7771d3c4c 100644 --- a/py/core/base/api/models/ingestion/responses.py +++ b/py/shared/api/models/ingestion/responses.py @@ -1,9 +1,9 @@ -from typing import Any, TypeVar +from typing import TypeVar from uuid import UUID from pydantic import BaseModel, Field -from core.base.api.models.base import ResultsWrapper +from shared.api.models.base import ResultsWrapper T = TypeVar("T") diff --git a/py/core/base/api/models/restructure/__init__.py b/py/shared/api/models/management/__init__.py similarity index 100% rename from py/core/base/api/models/restructure/__init__.py rename to py/shared/api/models/management/__init__.py diff --git a/py/core/base/api/models/management/responses.py b/py/shared/api/models/management/responses.py similarity index 57% rename from py/core/base/api/models/management/responses.py rename to py/shared/api/models/management/responses.py index d542d7989..303e4dcad 100644 --- a/py/core/base/api/models/management/responses.py +++ b/py/shared/api/models/management/responses.py @@ -1,10 +1,10 @@ from datetime import datetime -from typing import Any, Dict, List, Optional +from typing import Any, Optional from uuid import UUID from pydantic import BaseModel -from core.base.api.models.base import ResultsWrapper +from shared.api.models.base import PaginatedResultsWrapper, ResultsWrapper class UpdatePromptResponse(BaseModel): @@ -16,11 +16,11 @@ class PromptResponse(BaseModel): template: str created_at: datetime updated_at: datetime - input_types: Dict[str, str] + input_types: dict[str, str] class AllPromptsResponse(BaseModel): - prompts: Dict[str, PromptResponse] + prompts: dict[str, PromptResponse] class LogEntry(BaseModel): @@ -32,7 +32,7 @@ class LogEntry(BaseModel): class LogResponse(BaseModel): run_id: UUID run_type: str - entries: List[LogEntry] + entries: list[LogEntry] timestamp: Optional[datetime] user_id: Optional[UUID] @@ -46,12 +46,12 @@ class ServerStats(BaseModel): class AnalyticsResponse(BaseModel): analytics_data: Optional[dict] = None - filtered_logs: Dict[str, Any] + filtered_logs: dict[str, Any] class AppSettingsResponse(BaseModel): - config: Dict[str, Any] - prompts: Dict[str, Any] + config: dict[str, Any] + prompts: dict[str, Any] class ScoreCompletionResponse(BaseModel): @@ -62,7 +62,25 @@ class UserOverviewResponse(BaseModel): user_id: UUID num_files: int total_size_in_bytes: int - document_ids: List[UUID] + document_ids: list[UUID] + + +class UserResponse(BaseModel): + id: UUID + email: str + is_active: bool = True + is_superuser: bool = False + created_at: datetime = datetime.now() + updated_at: datetime = datetime.now() + is_verified: bool = False + collection_ids: list[UUID] = [] + + # Optional fields (to update or set at creation) + hashed_password: Optional[str] = None + verification_code_expiry: Optional[datetime] = None + name: Optional[str] = None + bio: Optional[str] = None + profile_picture: Optional[str] = None class DocumentOverviewResponse(BaseModel): @@ -76,7 +94,7 @@ class DocumentOverviewResponse(BaseModel): restructuring_status: str version: str collection_ids: list[UUID] - metadata: Dict[str, Any] + metadata: dict[str, Any] class DocumentChunkResponse(BaseModel): @@ -86,13 +104,13 @@ class DocumentChunkResponse(BaseModel): user_id: UUID collection_ids: list[UUID] text: str - metadata: Dict[str, Any] + metadata: dict[str, Any] KnowledgeGraphResponse = str -class GroupResponse(BaseModel): +class CollectionResponse(BaseModel): collection_id: UUID name: str description: Optional[str] @@ -100,7 +118,7 @@ class GroupResponse(BaseModel): updated_at: datetime -class GroupOverviewResponse(BaseModel): +class CollectionOverviewResponse(BaseModel): collection_id: UUID name: str description: Optional[str] @@ -118,17 +136,27 @@ class AddUserResponse(BaseModel): WrappedPromptMessageResponse = ResultsWrapper[UpdatePromptResponse] WrappedGetPromptsResponse = ResultsWrapper[AllPromptsResponse] WrappedServerStatsResponse = ResultsWrapper[ServerStats] -WrappedLogResponse = ResultsWrapper[List[LogResponse]] +WrappedLogResponse = ResultsWrapper[list[LogResponse]] WrappedAnalyticsResponse = ResultsWrapper[AnalyticsResponse] WrappedAppSettingsResponse = ResultsWrapper[AppSettingsResponse] WrappedScoreCompletionResponse = ResultsWrapper[ScoreCompletionResponse] -WrappedUserOverviewResponse = ResultsWrapper[List[UserOverviewResponse]] -WrappedDocumentOverviewResponse = ResultsWrapper[ - List[DocumentOverviewResponse] +WrappedUserOverviewResponse = PaginatedResultsWrapper[ + list[UserOverviewResponse] +] +WrappedDocumentOverviewResponse = PaginatedResultsWrapper[ + list[DocumentOverviewResponse] ] -WrappedDocumentChunkResponse = ResultsWrapper[List[DocumentChunkResponse]] WrappedKnowledgeGraphResponse = ResultsWrapper[KnowledgeGraphResponse] -WrappedCollectionResponse = ResultsWrapper[GroupResponse] -WrappedCollectionListResponse = ResultsWrapper[List[GroupResponse]] -WrappedCollectionOverviewResponse = ResultsWrapper[List[GroupOverviewResponse]] +WrappedCollectionResponse = ResultsWrapper[CollectionResponse] +WrappedCollectionListResponse = ResultsWrapper[list[CollectionResponse]] +WrappedCollectionOverviewResponse = ResultsWrapper[ + list[CollectionOverviewResponse] +] WrappedAddUserResponse = ResultsWrapper[AddUserResponse] +WrappedUsersInCollectionResponse = PaginatedResultsWrapper[list[UserResponse]] +WrappedUserCollectionResponse = PaginatedResultsWrapper[ + list[CollectionOverviewResponse] +] +WrappedDocumentChunkResponse = PaginatedResultsWrapper[ + list[DocumentChunkResponse] +] diff --git a/py/core/base/api/models/retrieval/__init__.py b/py/shared/api/models/restructure/__init__.py similarity index 100% rename from py/core/base/api/models/retrieval/__init__.py rename to py/shared/api/models/restructure/__init__.py diff --git a/py/core/base/api/models/restructure/responses.py b/py/shared/api/models/restructure/responses.py similarity index 93% rename from py/core/base/api/models/restructure/responses.py rename to py/shared/api/models/restructure/responses.py index 9dc3bf931..0150f4773 100644 --- a/py/core/base/api/models/restructure/responses.py +++ b/py/shared/api/models/restructure/responses.py @@ -1,9 +1,8 @@ -from typing import Any, Dict from uuid import UUID from pydantic import BaseModel, Field -from core.base.api.models.base import ResultsWrapper +from shared.api.models.base import ResultsWrapper class KGCreationResponse(BaseModel): diff --git a/py/tests/__init__.py b/py/shared/api/models/retrieval/__init__.py similarity index 100% rename from py/tests/__init__.py rename to py/shared/api/models/retrieval/__init__.py diff --git a/py/core/base/api/models/retrieval/responses.py b/py/shared/api/models/retrieval/responses.py similarity index 98% rename from py/core/base/api/models/retrieval/responses.py rename to py/shared/api/models/retrieval/responses.py index dbdeb1b62..9030ad3fd 100644 --- a/py/core/base/api/models/retrieval/responses.py +++ b/py/shared/api/models/retrieval/responses.py @@ -2,8 +2,8 @@ from pydantic import BaseModel, Field -from core.base.abstractions import KGSearchResult, Message, VectorSearchResult -from core.base.api.models.base import ResultsWrapper +from shared.abstractions import KGSearchResult, Message, VectorSearchResult +from shared.api.models.base import ResultsWrapper class SearchResponse(BaseModel): diff --git a/py/shared/utils/__init__.py b/py/shared/utils/__init__.py new file mode 100644 index 000000000..bae809ee0 --- /dev/null +++ b/py/shared/utils/__init__.py @@ -0,0 +1,31 @@ +from .base_utils import ( + decrement_version, + format_entity_types, + format_relations, + format_search_results_for_llm, + format_search_results_for_stream, + generate_id_from_label, + generate_run_id, + generate_user_document_id, + increment_version, + run_pipeline, + to_async_generator, +) +from .splitter.text import RecursiveCharacterTextSplitter, TextSplitter + +__all__ = [ + "format_entity_types", + "format_relations", + "generate_id_from_label", + "format_search_results_for_stream", + "format_search_results_for_llm", + "generate_run_id", + "generate_user_document_id", + "increment_version", + "decrement_version", + "run_pipeline", + "to_async_generator", + # Text splitter + "RecursiveCharacterTextSplitter", + "TextSplitter", +] diff --git a/py/core/base/utils/base_utils.py b/py/shared/utils/base_utils.py similarity index 81% rename from py/core/base/utils/base_utils.py rename to py/shared/utils/base_utils.py index 54631c3ec..46412d9be 100644 --- a/py/core/base/utils/base_utils.py +++ b/py/shared/utils/base_utils.py @@ -1,15 +1,15 @@ import asyncio import json -from typing import TYPE_CHECKING, Any, AsyncGenerator, Generator, Iterable +import logging +from typing import TYPE_CHECKING, Any, AsyncGenerator, Iterable from uuid import NAMESPACE_DNS, UUID, uuid4, uuid5 from ..abstractions.graph import EntityType, RelationshipType from ..abstractions.search import AggregateSearchResult -import logging - logger = logging.getLogger(__name__) + def format_search_results_for_llm( results: AggregateSearchResult, ) -> str: @@ -18,16 +18,20 @@ def format_search_results_for_llm( formatted_results += "Vector Search Results:\n" for i, result in enumerate(results.vector_search_results): text = result.text - formatted_results += f"{i+1}. {text}\n" + formatted_results += f"Source [{i+1}]:\n{text}\n" if results.kg_search_results: - for result in results.kg_search_results: - if result.method == "local": - formatted_results += "KG Local Search Results:\n" - formatted_results += str(result.content) - elif result.method == "global": - formatted_results += "KG Global Search Results:\n" - formatted_results += str(result.content) + formatted_results += "KG Local Results:\n" + for j, kg_result in enumerate(results.kg_search_results): + formatted_results += ( + f"Source [{j+1}]: Name - {kg_result.content.name}\n" + ) + formatted_results += ( + f"Description - {kg_result.content.description}\n" + ) + findings = kg_result.metadata.get("findings", None) + if findings: + formatted_results += f"Supporting Findings: {findings}\n" return formatted_results @@ -44,7 +48,7 @@ def format_search_results_for_stream( if result.vector_search_results: context += f"<{VECTOR_SEARCH_STREAM_MARKER}>" vector_results_list = [ - result.dict() for result in result.vector_search_results + result.as_dict() for result in result.vector_search_results ] context += json.dumps(vector_results_list, default=str) context += f"" diff --git a/py/core/base/utils/splitter/__init__.py b/py/shared/utils/splitter/__init__.py similarity index 100% rename from py/core/base/utils/splitter/__init__.py rename to py/shared/utils/splitter/__init__.py diff --git a/py/core/base/utils/splitter/text.py b/py/shared/utils/splitter/text.py similarity index 99% rename from py/core/base/utils/splitter/text.py rename to py/shared/utils/splitter/text.py index ee13b9d24..15a3f475f 100644 --- a/py/core/base/utils/splitter/text.py +++ b/py/shared/utils/splitter/text.py @@ -212,7 +212,7 @@ def to_json( for key in list(secrets): value = secrets[key] if key in this.__fields__: - secrets[this.__fields__[key].alias] = value + secrets[this.__fields__[key].alias] = value # type: ignore lc_kwargs.update(this.lc_attributes) # include all secrets, even if not specified in kwargs diff --git a/py/tests/cli/commands/test_auth.py b/py/tests/cli/commands/test_auth.py deleted file mode 100644 index a2c5631e3..000000000 --- a/py/tests/cli/commands/test_auth.py +++ /dev/null @@ -1,33 +0,0 @@ -import pytest -from click.testing import CliRunner - -from cli.commands.auth import generate_private_key - - -@pytest.fixture -def runner(): - return CliRunner() - - -def test_generate_private_key(runner): - result = runner.invoke(generate_private_key) - assert result.exit_code == 0 - assert "Generated Private Key:" in result.output - assert ( - "Keep this key secure and use it as your R2R_SECRET_KEY." - in result.output - ) - - -def test_generate_private_key_output_format(runner): - result = runner.invoke(generate_private_key) - key_line = [ - line - for line in result.output.split("\n") - if "Generated Private Key:" in line - ][0] - key = key_line.split(":")[1].strip() - assert len(key) > 32 # The key should be reasonably long - assert ( - key.isalnum() or "-" in key or "_" in key - ) # The key should be URL-safe diff --git a/py/tests/cli/commands/test_ingestion.py b/py/tests/cli/commands/test_ingestion.py deleted file mode 100644 index c7bcbe1fe..000000000 --- a/py/tests/cli/commands/test_ingestion.py +++ /dev/null @@ -1,154 +0,0 @@ -import tempfile -from unittest.mock import MagicMock, patch - -import asyncclick as click -import pytest -from click.testing import CliRunner - -from cli.cli import cli - - -@pytest.fixture -def runner(): - return CliRunner() - - -@pytest.fixture -def mock_client(): - return MagicMock() - - -@pytest.fixture -def temp_file(): - with tempfile.NamedTemporaryFile(mode="w", delete=False) as f: - f.write("Test content") - f.flush() - yield f.name - - -@pytest.fixture(autouse=True) -def mock_cli_obj(mock_client): - with patch( - "cli.commands.ingestion.click.get_current_context" - ) as mock_context: - mock_context.return_value.obj = mock_client - yield - - -@pytest.fixture(autouse=True) -def mock_r2r_client(): - with patch( - "cli.command_group.R2RClient", new=MagicMock() - ) as MockR2RClient: - mock_client = MockR2RClient.return_value - mock_client.ingest_files.return_value = {"status": "success"} - mock_client.update_files.return_value = {"status": "updated"} - - original_callback = cli.callback - - def new_callback(*args, **kwargs): - ctx = click.get_current_context() - ctx.obj = mock_client - return original_callback(*args, **kwargs) - - cli.callback = new_callback - - yield mock_client - - cli.callback = original_callback - - -def test_ingest_files(runner, mock_r2r_client, temp_file): - result = runner.invoke(cli, ["ingest-files", temp_file]) - assert result.exit_code == 0 - assert '"status": "success"' in result.output - mock_r2r_client.ingest_files.assert_called_once_with( - [temp_file], None, None - ) - - -def test_ingest_files_with_options(runner, mock_r2r_client, temp_file): - result = runner.invoke( - cli, - [ - "ingest-files", - temp_file, - "--document-ids", - "doc1", - "--metadatas", - '{"key": "value"}', - ], - ) - assert result.exit_code == 0 - assert '"status": "success"' in result.output - assert mock_r2r_client.ingest_files.called, "ingest_files was not called" - mock_r2r_client.ingest_files.assert_called_once_with( - [temp_file], {"key": "value"}, ["doc1"] - ) - - -def test_update_files(runner, mock_r2r_client, temp_file): - result = runner.invoke( - cli, - [ - "update-files", - temp_file, - "--document-ids", - "doc1", - "--metadatas", - '{"key": "new_value"}', - ], - ) - assert result.exit_code == 0 - assert '"status": "updated"' in result.output - assert mock_r2r_client.update_files.called, "update_files was not called" - mock_r2r_client.update_files.assert_called_once_with( - [temp_file], ["doc1"], [{"key": "new_value"}] - ) - - -@patch("cli.commands.ingestion.ingest_files_from_urls") -def test_ingest_sample_file(mock_ingest, runner, mock_r2r_client): - mock_ingest.return_value = ["aristotle.txt"] - result = runner.invoke(cli, ["ingest-sample-file"]) - assert result.exit_code == 0 - assert "Sample file ingestion completed" in result.output - assert "aristotle.txt" in result.output - mock_ingest.assert_called_once() - - -@patch("cli.commands.ingestion.ingest_files_from_urls") -def test_ingest_sample_files(mock_ingest, runner, mock_r2r_client): - mock_ingest.return_value = ["aristotle.txt", "got.txt"] - result = runner.invoke(cli, ["ingest-sample-files"]) - assert result.exit_code == 0 - assert "Sample files ingestion completed" in result.output - assert "aristotle.txt" in result.output - assert "got.txt" in result.output - mock_ingest.assert_called_once() - - -def test_ingest_files_with_invalid_file(runner, mock_r2r_client): - result = runner.invoke(cli, ["ingest-files", "nonexistent_file.txt"]) - assert result.exit_code != 0 - assert "Error" in result.output - assert not mock_r2r_client.ingest_files.called - - -def test_update_files_with_invalid_metadata( - runner, mock_r2r_client, temp_file -): - result = runner.invoke( - cli, - [ - "update-files", - temp_file, - "--document-ids", - "doc1", - "--metadatas", - "invalid_json", - ], - ) - assert result.exit_code != 0 - assert "Error" in result.output - assert not mock_r2r_client.update_files.called diff --git a/py/tests/cli/commands/test_management.py b/py/tests/cli/commands/test_management.py deleted file mode 100644 index 4b3bce595..000000000 --- a/py/tests/cli/commands/test_management.py +++ /dev/null @@ -1,219 +0,0 @@ -import json -from unittest.mock import MagicMock, patch - -import asyncclick as click -import pytest -from click.testing import CliRunner - -from cli.cli import cli - - -@pytest.fixture -def runner(): - return CliRunner() - - -@pytest.fixture(autouse=True) -def mock_r2r_client(): - with patch( - "cli.command_group.R2RClient", new=MagicMock() - ) as MockR2RClient: - mock_client = MockR2RClient.return_value - mock_client.analytics.return_value = { - "status": "success", - "result": "analytics data", - } - mock_client.app_settings.return_value = { - "setting1": "value1", - "setting2": "value2", - } - mock_client.users_overview.return_value = [ - {"id": "user1", "name": "John"}, - {"id": "user2", "name": "Jane"}, - ] - mock_client.delete.return_value = {"deleted": 2} - mock_client.documents_overview.return_value = { - "results": [ - {"id": "doc1", "title": "Document 1"}, - {"id": "doc2", "title": "Document 2"}, - ] - } - mock_client.document_chunks.return_value = { - "results": [ - { - "fragment_id": "chunk1", - "text": "Content 1" * 50, - "metadata": {}, - }, - { - "fragment_id": "chunk2", - "text": "Content 2" * 50, - "metadata": {}, - }, - ] - } - mock_client.inspect_knowledge_graph.return_value = { - "nodes": 100, - "edges": 500, - } - - original_callback = cli.callback - - def new_callback(*args, **kwargs): - ctx = click.get_current_context() - ctx.obj = mock_client - return original_callback(*args, **kwargs) - - cli.callback = new_callback - - yield mock_client - - cli.callback = original_callback - - -def test_analytics(runner, mock_r2r_client): - filters = {"date": "2023-01-01"} - analysis_types = {"type": "user_activity"} - - result = runner.invoke( - cli, - [ - "analytics", - "--filters", - json.dumps(filters), - "--analysis-types", - json.dumps(analysis_types), - ], - ) - - assert result.exit_code == 0 - assert "success" in result.output - mock_r2r_client.analytics.assert_called_once_with(filters, analysis_types) - - -def test_analytics_invalid_json(runner, mock_r2r_client): - result = runner.invoke( - cli, - [ - "analytics", - "--filters", - "invalid_json", - "--analysis-types", - '{"type": "user_activity"}', - ], - ) - - assert result.exit_code == 2 - assert "Invalid value for '--filters'" in result.output - - -def test_app_settings(runner, mock_r2r_client): - result = runner.invoke(cli, ["app-settings"]) - - assert result.exit_code == 0 - assert "setting1" in result.output - assert "value2" in result.output - mock_r2r_client.app_settings.assert_called_once() - - -def test_users_overview(runner, mock_r2r_client): - result = runner.invoke(cli, ["users-overview", "--user-ids", "user1"]) - - assert result.exit_code == 0 - assert "Time taken:" in result.output - mock_r2r_client.users_overview.assert_called_once_with( - ["user1"], None, None - ) - - -def test_users_overview_no_ids(runner, mock_r2r_client): - result = runner.invoke(cli, ["users-overview"]) - - assert result.exit_code == 0 - assert "Time taken:" in result.output - mock_r2r_client.users_overview.assert_called_once_with(None, None, None) - - -def test_delete(runner, mock_r2r_client): - result = runner.invoke( - cli, ["delete", "-f", "date:gt:2023-01-01", "-f", "status:eq:inactive"] - ) - - assert result.exit_code == 0 - assert "deleted" in result.output - expected_filters = { - "date": {"$gt": "2023-01-01"}, - "status": {"$eq": "inactive"}, - } - mock_r2r_client.delete.assert_called_once_with(filters=expected_filters) - - -def test_delete_invalid_filter(runner, mock_r2r_client): - result = runner.invoke(cli, ["delete", "-f", "invalid_filter"]) - - assert result.exit_code != 0 - - -def test_documents_overview_without_document_id(runner, mock_r2r_client): - result = runner.invoke( - cli, - [ - "documents-overview", - ], - ) - - assert result.exit_code == 0 - mock_r2r_client.documents_overview.assert_called_once() - - -def test_documents_overview_with_document_id(runner, mock_r2r_client): - result = runner.invoke( - cli, ["documents-overview", "--document-ids", "doc1"] - ) - - assert result.exit_code == 0 - assert "doc1" in result.output - mock_r2r_client.documents_overview.assert_called_once_with( - ["doc1"], None, None - ) - - -def test_document_chunks(runner, mock_r2r_client): - result = runner.invoke(cli, ["document-chunks", "--document-id", "doc1"]) - - assert result.exit_code == 0 - assert "Number of chunks: 2" in result.output - assert "Fragment ID: chunk1" in result.output - assert "Text: Content 1" in result.output - assert "Content 1" * 5 in result.output - assert "..." in result.output - assert "Fragment ID: chunk2" in result.output - assert "Text: Content 2" in result.output - mock_r2r_client.document_chunks.assert_called_once_with("doc1", None, None) - - -def test_document_chunks_no_id(runner, mock_r2r_client): - result = runner.invoke(cli, ["document-chunks"]) - - assert result.exit_code == 0 - assert "Error: Document ID is required." in result.output - - -def test_inspect_knowledge_graph_no_kg_provider_specified( - runner, mock_r2r_client -): - result = runner.invoke(cli, ["inspect-knowledge-graph", "--limit", "100"]) - - assert result.exit_code == 1 - mock_r2r_client.inspect_knowledge_graph.assert_called_once_with( - None, "100" - ) - - -def test_inspect_knowledge_graph_no_limit_no_kg_provider_specified( - runner, mock_r2r_client -): - result = runner.invoke(cli, ["inspect-knowledge-graph"]) - - assert result.exit_code == 1 - mock_r2r_client.inspect_knowledge_graph.assert_called_once_with(None, None) diff --git a/py/tests/cli/commands/test_restructure.py b/py/tests/cli/commands/test_restructure.py deleted file mode 100644 index f8a6ccf8e..000000000 --- a/py/tests/cli/commands/test_restructure.py +++ /dev/null @@ -1,55 +0,0 @@ -from unittest.mock import MagicMock, patch - -import asyncclick as click -import pytest -from click.testing import CliRunner - -from cli.cli import cli - - -@pytest.fixture -def runner(): - return CliRunner() - - -@pytest.fixture -def mock_client(): - return MagicMock() - - -@pytest.fixture(autouse=True) -def mock_cli_obj(mock_client): - with patch( - "cli.commands.restructure.click.get_current_context" - ) as mock_context: - mock_context.return_value.obj = mock_client - yield - - -@pytest.fixture(autouse=True) -def mock_r2r_client(): - with patch( - "cli.command_group.R2RClient", new=MagicMock() - ) as MockR2RClient: - mock_client = MockR2RClient.return_value - - original_callback = cli.callback - - def new_callback(*args, **kwargs): - ctx = click.get_current_context() - ctx.obj = mock_client - return original_callback(*args, **kwargs) - - cli.callback = new_callback - - yield mock_client - - cli.callback = original_callback - - -def test_enrich_graph(runner, mock_r2r_client): - result = runner.invoke(cli, ["enrich-graph"]) - - assert result.exit_code == 0 - assert "Time taken" in result.output - mock_r2r_client.enrich_graph.assert_called_once() diff --git a/py/tests/cli/commands/test_templates.py b/py/tests/cli/commands/test_templates.py deleted file mode 100644 index 3c54227b2..000000000 --- a/py/tests/cli/commands/test_templates.py +++ /dev/null @@ -1,72 +0,0 @@ -from unittest.mock import MagicMock, patch - -import pytest -from click.testing import CliRunner - -from cli import cli - - -@pytest.fixture -def runner(): - return CliRunner() - - -@pytest.fixture -def mock_get_templates(): - with patch("cli.commands.templates.get_templates") as mock: - mock.return_value = ["template1", "template2"] - yield mock - - -@pytest.fixture -def mock_clone_operation(): - with patch("cli.commands.templates.clone_template") as mock: - yield mock - - -def test_list_templates(runner, mock_get_templates): - result = runner.invoke(cli, ["list-templates"]) - assert result.exit_code == 0 - assert "Available templates:" in result.output - assert "template1" in result.output - assert "template2" in result.output - - -def test_list_templates_error(runner, mock_get_templates): - mock_get_templates.side_effect = Exception("Failed to fetch templates") - result = runner.invoke(cli, ["list-templates"]) - assert result.exit_code != 0 - assert "Error: Failed to fetch templates" in result.output - - -def test_clone_success(runner, mock_clone_operation): - result = runner.invoke(cli, ["clone", "template1"]) - assert result.exit_code == 0 - assert "Successfully cloned template 'template1'" in result.output - mock_clone_operation.assert_called_once_with("template1", None) - - -def test_clone_with_location(runner, mock_clone_operation): - result = runner.invoke(cli, ["clone", "template1", "custom_location"]) - assert result.exit_code == 0 - assert ( - "Successfully cloned template 'template1' to custom_location" - in result.output - ) - mock_clone_operation.assert_called_once_with( - "template1", "custom_location" - ) - - -def test_clone_template_not_found(runner, mock_clone_operation): - mock_clone_operation.side_effect = ValueError("Template not found") - result = runner.invoke(cli, ["clone", "non_existent_template"]) - assert result.exit_code != 0 - assert "Error: Template not found" in result.output - - -def test_clone_unexpected_error(runner, mock_clone_operation): - mock_clone_operation.side_effect = Exception("Unexpected error") - result = runner.invoke(cli, ["clone", "template1"]) - assert result.exit_code != 0 - assert "Error: An unexpected error occurred" in result.output diff --git a/py/tests/cli/test_cli.py b/py/tests/cli/test_cli.py deleted file mode 100644 index c4be1cd1c..000000000 --- a/py/tests/cli/test_cli.py +++ /dev/null @@ -1,49 +0,0 @@ -import pytest -from click.testing import CliRunner - -from cli.cli import cli, main - - -@pytest.fixture -def runner(): - return CliRunner() - - -def test_cli_group_exists(): - assert callable(cli) - assert cli.name == "cli" - - -def test_main_function(runner): - result = runner.invoke(cli, ["--help"]) - assert result.exit_code == 0 - assert "Usage: cli [OPTIONS] COMMAND [ARGS]..." in result.output - - -def test_commands_added(): - commands = [ - "generate-private-key", - "ingest-files", - "update-files", - "ingest-sample-file", - "ingest-sample-files", - "analytics", - "app-settings", - "users-overview", - "documents-overview", - "document-chunks", - "inspect-knowledge-graph", - "enrich-graph", - "search", - "rag", - "health", - "server-stats", - "logs", - "docker-down", - "generate-report", - "serve", - "update", - "version", - ] - for command in commands: - assert command in cli.commands diff --git a/py/tests/cli/test_command_group.py b/py/tests/cli/test_command_group.py deleted file mode 100644 index 9b9973d0a..000000000 --- a/py/tests/cli/test_command_group.py +++ /dev/null @@ -1,19 +0,0 @@ -import pytest -from click.testing import CliRunner - -from cli.command_group import cli - - -@pytest.fixture -def runner(): - return CliRunner() - - -def test_cli_group_no_options(): - assert len(cli.params) == 1 - - -def test_cli_group_help(runner): - result = runner.invoke(cli, ["--help"]) - assert result.exit_code == 0 - assert "R2R CLI for all core operations." in result.output diff --git a/py/tests/cli/utils/test_docker_utils.py b/py/tests/cli/utils/test_docker_utils.py deleted file mode 100644 index cffe18866..000000000 --- a/py/tests/cli/utils/test_docker_utils.py +++ /dev/null @@ -1,196 +0,0 @@ -import os -import subprocess -import sys -from io import StringIO -from unittest.mock import patch - -import pytest -from click.testing import CliRunner - -from cli.utils.docker_utils import ( - bring_down_docker_compose, - build_docker_command, - check_docker_compose_version, - check_llm_reqs, - parse_version, - remove_r2r_network, -) - - -@pytest.fixture -def runner(): - return CliRunner() - - -def test_bring_down_docker_compose(runner): - with patch("os.system") as mock_system: - mock_system.return_value = 0 - result = bring_down_docker_compose("test_project", True, True) - assert result == 0 - mock_system.assert_called_once() - - -@patch("subprocess.check_output") -@patch("os.system") -def test_remove_r2r_network(mock_system, mock_check_output): - mock_check_output.return_value = b"r2r_test_network\nother_network" - mock_system.return_value = 0 - remove_r2r_network() - mock_system.assert_called_once_with("docker network rm r2r_test_network") - - -@pytest.mark.parametrize( - "llm_provider,model_provider,env_vars,expected_exit", - [ - ("openai", "openai", {"OPENAI_API_KEY": "test"}, False), - ("openai", "openai", {}, True), - ], -) -def test_check_llm_reqs(llm_provider, model_provider, env_vars, expected_exit): - with patch.dict(os.environ, env_vars, clear=True): - with patch("click.confirm", return_value=False): - with ( - pytest.raises(SystemExit) - if expected_exit - else patch("sys.exit") - ) as mock_exit: - check_llm_reqs(llm_provider, model_provider) - if expected_exit: - mock_exit.assert_called_once_with(1) - - -def test_build_docker_command(): - compose_files = { - "base": "base.yaml", - "neo4j": "neo4j.yaml", - "ollama": "ollama.yaml", - "postgres": "postgres.yaml", - "hatchet": "hatchet.yaml", - } - command = build_docker_command( - compose_files, - "localhost", - 7272, - False, - False, - False, - False, - "test_project", - "test_image", - None, - None, - ) - assert ( - "docker compose -f base.yaml -f neo4j.yaml -f ollama.yaml -f postgres.yaml -f hatchet.yaml" - in command - ) - assert "--project-name test_project" in command - assert "up -d" in command - - -@pytest.mark.parametrize( - "version_output,expected_result,expected_message", - [ - ( - "Docker Compose version v2.29.0", - True, - "Docker Compose version 2.29.0 is compatible.", - ), - ( - "Docker Compose version v2.24.5", - True, - "Warning: Docker Compose version 2.24.5 is outdated. Please upgrade to version 2.25.0 or higher.", - ), - ( - "Docker Compose version v3.0.0", - True, - "Docker Compose version 3.0.0 is compatible.", - ), - ( - "Docker Compose version 2.29.0", - True, - "Docker Compose version 2.29.0 is compatible.", - ), - ( - "Docker Compose version 2.29.0-desktop.1", - True, - "Docker Compose version 2.29.0 is compatible.", - ), - ], -) -def test_check_docker_compose_version_success( - version_output, expected_result, expected_message -): - with patch( - "subprocess.check_output", return_value=version_output.encode() - ): - captured_output = StringIO() - sys.stdout = captured_output - result = check_docker_compose_version() - sys.stdout = sys.__stdout__ - assert result == expected_result - assert expected_message in captured_output.getvalue() - - -@pytest.mark.parametrize( - "version_output,expected_message", - [ - ( - "Docker Compose version unknown", - "Unexpected version format: Docker Compose version unknown", - ), - ( - "Not a valid output", - "Unexpected version format: Not a valid output", - ), - ], -) -def test_check_docker_compose_version_invalid_format( - version_output, expected_message -): - with patch( - "subprocess.check_output", return_value=version_output.encode() - ): - captured_output = StringIO() - sys.stdout = captured_output - result = check_docker_compose_version() - sys.stdout = sys.__stdout__ - assert result == False - assert ( - "Error checking Docker Compose version" - in captured_output.getvalue() - ) - assert expected_message in captured_output.getvalue() - - -def test_check_docker_compose_version_not_installed(): - error_message = "docker: command not found" - mock_error = subprocess.CalledProcessError( - 1, "docker compose version", error_message.encode() - ) - with patch("subprocess.check_output", side_effect=mock_error): - captured_output = StringIO() - sys.stdout = captured_output - result = check_docker_compose_version() - sys.stdout = sys.__stdout__ - assert result == False - assert ( - "Error: Docker Compose is not installed or not working properly." - in captured_output.getvalue() - ) - assert error_message in captured_output.getvalue() - - -def test_check_docker_compose_version_unexpected_error(): - with patch( - "subprocess.check_output", side_effect=Exception("Unexpected error") - ): - captured_output = StringIO() - sys.stdout = captured_output - result = check_docker_compose_version() - sys.stdout = sys.__stdout__ - assert result == False - assert ( - "Error checking Docker Compose version: Unexpected error" - in captured_output.getvalue() - ) diff --git a/py/tests/cli/utils/test_param_types.py b/py/tests/cli/utils/test_param_types.py deleted file mode 100644 index cb9f58b71..000000000 --- a/py/tests/cli/utils/test_param_types.py +++ /dev/null @@ -1,69 +0,0 @@ -from unittest.mock import MagicMock - -import asyncclick as click -import pytest - -from cli.utils.param_types import JSON, JsonParamType - - -def test_json_param_type_convert_valid_json(): - json_type = JsonParamType() - result = json_type.convert('{"key": "value"}', None, None) - assert result == {"key": "value"} - - -def test_json_param_type_convert_invalid_json(): - json_type = JsonParamType() - with pytest.raises(click.BadParameter) as excinfo: - json_type.convert("{invalid json}", None, None) - assert "is not a valid JSON string" in str(excinfo.value) - - -def test_json_param_type_convert_dict(): - json_type = JsonParamType() - input_dict = {"key": "value"} - result = json_type.convert(input_dict, None, None) - assert result == input_dict - - -def test_json_param_type_name(): - json_type = JsonParamType() - assert json_type.name == "json" - - -def test_json_global_instance(): - assert isinstance(JSON, JsonParamType) - - -def test_json_param_type_convert_with_context(): - json_type = JsonParamType() - mock_ctx = MagicMock() - mock_param = MagicMock() - result = json_type.convert('{"key": "value"}', mock_param, mock_ctx) - assert result == {"key": "value"} - - -def test_json_param_type_convert_empty_string(): - json_type = JsonParamType() - with pytest.raises(click.BadParameter) as excinfo: - json_type.convert("", None, None) - assert "is not a valid JSON string" in str(excinfo.value) - - -def test_json_param_type_convert_none(): - json_type = JsonParamType() - result = json_type.convert(None, None, None) - assert result is None - - -def test_json_param_type_convert_complex_json(): - json_type = JsonParamType() - complex_json = ( - '{"key1": "value1", "key2": [1, 2, 3], "key3": {"nested": true}}' - ) - result = json_type.convert(complex_json, None, None) - assert result == { - "key1": "value1", - "key2": [1, 2, 3], - "key3": {"nested": True}, - } diff --git a/py/tests/cli/utils/test_timer.py b/py/tests/cli/utils/test_timer.py deleted file mode 100644 index 7081d9b5c..000000000 --- a/py/tests/cli/utils/test_timer.py +++ /dev/null @@ -1,78 +0,0 @@ -import time -from unittest.mock import patch - -import pytest - -from cli.utils.timer import timer - - -def test_timer_measures_time(): - with patch("time.time") as mock_time: - mock_time.side_effect = [0, 0.1] - - @timer() - def dummy_function(): - time.sleep(0.1) - - with patch("click.echo") as mock_echo: - dummy_function() - mock_echo.assert_called_once_with("Time taken: 0.10 seconds") - - -def test_timer_handles_exceptions(): - @timer() - def error_function(): - raise ValueError("Test exception") - - with patch("time.time") as mock_time: - mock_time.side_effect = [0, 0.1] - - with pytest.raises(ValueError, match="Test exception"): - error_function() - - -def test_timer_nested(): - with patch("time.time") as mock_time: - mock_time.side_effect = [0, 0.1, 0.2, 0.3] - - @timer() - def outer_function(): - time.sleep(0.1) - - @timer() - def inner_function(): - time.sleep(0.1) - - inner_function() - - with patch("click.echo") as mock_echo: - outer_function() - assert mock_echo.call_count == 2 - mock_echo.assert_any_call("Time taken: 0.10 seconds") - mock_echo.assert_any_call("Time taken: 0.30 seconds") - - -def test_timer_zero_duration(): - with patch("time.time") as mock_time: - mock_time.side_effect = [0, 0] - - @timer() - def quick_function(): - pass - - with patch("click.echo") as mock_echo: - quick_function() - mock_echo.assert_called_once_with("Time taken: 0.00 seconds") - - -def test_timer_as_context_manager(): - with patch("time.time") as mock_time: - mock_time.side_effect = [0, 0.1] - - def function_with_context(): - with timer(): - time.sleep(0.1) - - with patch("click.echo") as mock_echo: - function_with_context() - mock_echo.assert_called_once_with("Time taken: 0.10 seconds") diff --git a/py/tests/conftest.py b/py/tests/conftest.py new file mode 100644 index 000000000..a1e29f7dc --- /dev/null +++ b/py/tests/conftest.py @@ -0,0 +1,198 @@ +# tests//conftest.py +import os +import random +import uuid + +import pytest + +from core import ( + AuthConfig, + BCryptConfig, + CompletionConfig, + DatabaseConfig, + EmbeddingConfig, + FileConfig, + LocalRunLoggingProvider, + LoggingConfig, + Vector, + VectorEntry, + generate_id_from_label, +) +from core.providers import ( + BCryptProvider, + LiteCompletionProvider, + LiteLLMEmbeddingProvider, + PostgresDBProvider, + PostgresFileProvider, + R2RAuthProvider, +) + + +# Vectors +@pytest.fixture(scope="session") +def dimension(): + return 128 + + +@pytest.fixture(scope="session") +def num_entries(): + return 100 + + +@pytest.fixture(scope="session") +def sample_entries(dimension, num_entries): + def generate_random_vector_entry( + id_value: int, dimension: int + ) -> VectorEntry: + vector_data = [random.random() for _ in range(dimension)] + metadata = {"key": f"value_id_{id_value}", "raw_key": id_value} + return VectorEntry( + fragment_id=generate_id_from_label(f"id_{id_value}"), + extraction_id=uuid.uuid4(), + document_id=uuid.uuid4(), + user_id=uuid.uuid4(), + collection_ids=[uuid.uuid4()], + vector=Vector(data=vector_data), + text=f"Sample text for id_{id_value}", + metadata=metadata, + ) + + return [ + generate_random_vector_entry(i, dimension) for i in range(num_entries) + ] + + +# Crypto +@pytest.fixture(scope="session") +def crypto_config(): + return BCryptConfig() + + +@pytest.fixture(scope="session") +def crypto_provider(crypto_config): + return BCryptProvider(crypto_config) + + +# Postgres +@pytest.fixture(scope="session") +def db_config(): + collection_id = uuid.uuid4() + + random_project_name = f"test_collection_{collection_id.hex}" + return DatabaseConfig.create( + provider="postgres", project_name=random_project_name + ) + + +@pytest.fixture(scope="function") +async def postgres_db_provider( + db_config, dimension, crypto_provider, sample_entries +): + db = PostgresDBProvider( + db_config, dimension=dimension, crypto_provider=crypto_provider + ) + await db.initialize() + db.vector.upsert_entries(sample_entries) + yield db + # Teardown + # TODO - Add teardown methods + # await db.delete_project(db.project_name) + + +@pytest.fixture(scope="function") +def db_config_temporary(): + collection_id = uuid.uuid4() + + random_project_name = f"test_collection_{collection_id.hex}" + return DatabaseConfig.create( + provider="postgres", project_name=random_project_name + ) + + +@pytest.fixture(scope="function") +async def temporary_postgres_db_provider( + db_config_temporary, dimension, crypto_provider, sample_entries +): + db = PostgresDBProvider( + db_config_temporary, + dimension=dimension, + crypto_provider=crypto_provider, + ) + await db.initialize() + db.vector.upsert_entries(sample_entries) + try: + yield db + finally: + await db.relational.close() + db.vector.close() + + +# Auth +@pytest.fixture(scope="session") +def auth_config(): + return AuthConfig( + secret_key="test_secret_key", + access_token_lifetime_in_minutes=15, + refresh_token_lifetime_in_days=1, + require_email_verification=False, + ) + + +@pytest.fixture(scope="function") +async def r2r_auth_provider( + auth_config, crypto_provider, temporary_postgres_db_provider +): + auth_provider = R2RAuthProvider( + auth_config, crypto_provider, temporary_postgres_db_provider + ) + await auth_provider.initialize() + yield auth_provider + + +# Embeddings +@pytest.fixture +def litellm_provider(): + config = EmbeddingConfig( + provider="litellm", + base_model="text-embedding-3-small", + base_dimension=1536, + ) + return LiteLLMEmbeddingProvider(config) + + +# File Provider +@pytest.fixture(scope="function") +def file_config(): + return FileConfig(provider="postgres") + + +@pytest.fixture(scope="function") +async def postgres_file_provider(file_config, temporary_postgres_db_provider): + file_provider = PostgresFileProvider( + file_config, temporary_postgres_db_provider + ) + await file_provider.initialize() + yield file_provider + await file_provider._close_connection() + + +# LLM provider +@pytest.fixture +def litellm_completion_provider(): + config = CompletionConfig(provider="litellm") + return LiteCompletionProvider(config) + + +# Logging +@pytest.fixture(scope="function") +async def local_logging_provider(): + unique_id = str(uuid.uuid4()) + logging_path = f"test_{unique_id}.sqlite" + provider = LocalRunLoggingProvider( + LoggingConfig(logging_path=logging_path) + ) + await provider._init() + yield provider + await provider.close() + if os.path.exists(logging_path): + os.remove(logging_path) diff --git a/py/tests/cli/__init__.py b/py/tests/core/agent/__init__.py similarity index 100% rename from py/tests/cli/__init__.py rename to py/tests/core/agent/__init__.py diff --git a/py/tests/cli/commands/__init__.py b/py/tests/core/parsers/__init__.py similarity index 100% rename from py/tests/cli/commands/__init__.py rename to py/tests/core/parsers/__init__.py diff --git a/py/tests/core/parsers/conftest.py b/py/tests/core/parsers/conftest.py new file mode 100644 index 000000000..0a654bba1 --- /dev/null +++ b/py/tests/core/parsers/conftest.py @@ -0,0 +1,16 @@ +import pytest + +from core.base import ParsingConfig +from core.providers.parsing.unstructured_parsing import ( + UnstructuredParsingProvider, +) + + +@pytest.fixture +def parsing_config(): + return ParsingConfig() + + +@pytest.fixture +def unstructured_parsing_provider(parsing_config): + return UnstructuredParsingProvider(use_api=False, config=parsing_config) diff --git a/py/tests/test_parser.py b/py/tests/core/parsers/test_parsers.py similarity index 100% rename from py/tests/test_parser.py rename to py/tests/core/parsers/test_parsers.py diff --git a/py/tests/core/pipelines/__init__.py b/py/tests/core/pipelines/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/test_pipeline.py b/py/tests/core/pipelines/test_pipeline_logic.py similarity index 100% rename from py/tests/test_pipeline.py rename to py/tests/core/pipelines/test_pipeline_logic.py diff --git a/py/tests/core/pipes/__init__.py b/py/tests/core/pipes/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/core/providers/auth/test_auth_provider.py b/py/tests/core/providers/auth/test_auth_provider.py new file mode 100644 index 000000000..fc44165de --- /dev/null +++ b/py/tests/core/providers/auth/test_auth_provider.py @@ -0,0 +1,60 @@ +# tests/providers/auth/test_r2r_auth_provider.py +import pytest + +from core.base import R2RException + + +@pytest.mark.asyncio +async def test_register_and_login(r2r_auth_provider): + email = "test@example.com" + password = "password123" + user = await r2r_auth_provider.register(email, password) + assert user.email == email + tokens = await r2r_auth_provider.login(email, password) + assert "access_token" in tokens + assert "refresh_token" in tokens + + +@pytest.mark.asyncio +async def test_invalid_login(r2r_auth_provider): + email = "test@example.com" + password = "password123" + await r2r_auth_provider.register(email, password) + with pytest.raises(R2RException): + await r2r_auth_provider.login(email, "wrong_password") + + +@pytest.mark.asyncio +async def test_refresh_access_token(r2r_auth_provider): + email = "test@example.com" + password = "password123" + await r2r_auth_provider.register(email, password) + tokens = await r2r_auth_provider.login(email, password) + new_tokens = await r2r_auth_provider.refresh_access_token( + tokens["refresh_token"].token + ) + assert "access_token" in new_tokens + assert "refresh_token" in new_tokens + + +@pytest.mark.asyncio +async def test_change_password(r2r_auth_provider): + email = "test@example.com" + password = "password123" + new_password = "new_password456" + user = await r2r_auth_provider.register(email, password) + await r2r_auth_provider.change_password(user, password, new_password) + tokens = await r2r_auth_provider.login(email, new_password) + assert "access_token" in tokens + assert "refresh_token" in tokens + + +@pytest.mark.asyncio +async def test_logout(r2r_auth_provider): + email = "test@example.com" + password = "password123" + await r2r_auth_provider.register(email, password) + tokens = await r2r_auth_provider.login(email, password) + await r2r_auth_provider.logout(tokens["access_token"].token) + with pytest.raises(R2RException): + await r2r_auth_provider.decode_token(tokens["access_token"].token) diff --git a/py/tests/core/providers/database/__init__.py b/py/tests/core/providers/database/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/core/providers/database/relational/test_collection_db.py b/py/tests/core/providers/database/relational/test_collection_db.py new file mode 100644 index 000000000..b43cfa065 --- /dev/null +++ b/py/tests/core/providers/database/relational/test_collection_db.py @@ -0,0 +1,178 @@ +from uuid import UUID + +import pytest + +from core.base import DocumentInfo, DocumentType, R2RException +from core.base.api.models import CollectionResponse + + +@pytest.mark.asyncio +async def test_create_collection(temporary_postgres_db_provider): + collection = ( + await temporary_postgres_db_provider.relational.create_collection( + "Test Collection", "Test Description" + ) + ) + assert isinstance(collection, CollectionResponse) + assert collection.name == "Test Collection" + assert collection.description == "Test Description" + + +@pytest.mark.asyncio +async def test_get_collection(temporary_postgres_db_provider): + created_collection = ( + await temporary_postgres_db_provider.relational.create_collection( + "Test Collection", "Test Description" + ) + ) + retrieved_collection = ( + await temporary_postgres_db_provider.relational.get_collection( + created_collection.collection_id + ) + ) + assert retrieved_collection == created_collection + + +@pytest.mark.asyncio +async def test_update_collection(temporary_postgres_db_provider): + created_collection = ( + await temporary_postgres_db_provider.relational.create_collection( + "Test Collection", "Test Description" + ) + ) + updated_collection = ( + await temporary_postgres_db_provider.relational.update_collection( + created_collection.collection_id, + name="Updated Collection", + description="Updated Description", + ) + ) + assert updated_collection.name == "Updated Collection" + assert updated_collection.description == "Updated Description" + + +@pytest.mark.asyncio +async def test_delete_collection(temporary_postgres_db_provider): + created_collection = ( + await temporary_postgres_db_provider.relational.create_collection( + "Test Collection", "Test Description" + ) + ) + await temporary_postgres_db_provider.relational.delete_collection( + created_collection.collection_id + ) + with pytest.raises(R2RException): + await temporary_postgres_db_provider.relational.get_collection( + created_collection.collection_id + ) + + +@pytest.mark.asyncio +async def test_list_collections(temporary_postgres_db_provider): + await temporary_postgres_db_provider.relational.create_collection( + "Collection 1", "Description 1" + ) + await temporary_postgres_db_provider.relational.create_collection( + "Collection 2", "Description 2" + ) + collections = ( + await temporary_postgres_db_provider.relational.list_collections() + ) + assert len(collections["results"]) >= 2 + assert collections["total_entries"] >= 2 + + +@pytest.mark.asyncio +async def test_get_collections_by_ids(temporary_postgres_db_provider): + collection1 = ( + await temporary_postgres_db_provider.relational.create_collection( + "Collection 1", "Description 1" + ) + ) + collection2 = ( + await temporary_postgres_db_provider.relational.create_collection( + "Collection 2", "Description 2" + ) + ) + collections = ( + await temporary_postgres_db_provider.relational.get_collections_by_ids( + [collection1.collection_id, collection2.collection_id] + ) + ) + assert len(collections) == 2 + assert collections[0].collection_id == collection1.collection_id + assert collections[1].collection_id == collection2.collection_id + + +@pytest.mark.asyncio +async def test_assign_and_remove_document_from_collection( + temporary_postgres_db_provider, +): + collection = ( + await temporary_postgres_db_provider.relational.create_collection( + "Test Collection", "Test Description" + ) + ) + document_id = UUID("00000000-0000-0000-0000-000000000001") + await temporary_postgres_db_provider.relational.upsert_documents_overview( + DocumentInfo( + id=document_id, + collection_ids=[], + user_id=UUID("00000000-0000-0000-0000-000000000002"), + type=DocumentType.PDF, + metadata={}, + version="v1", + size_in_bytes=0, + ) + ) + await temporary_postgres_db_provider.relational.assign_document_to_collection( + document_id, collection.collection_id + ) + document_collections = ( + await temporary_postgres_db_provider.relational.document_collections( + document_id + ) + ) + assert len(document_collections["results"]) == 1 + assert ( + document_collections["results"][0].collection_id + == collection.collection_id + ) + + await temporary_postgres_db_provider.relational.remove_document_from_collection( + document_id, collection.collection_id + ) + document_collections = ( + await temporary_postgres_db_provider.relational.document_collections( + document_id + ) + ) + assert len(document_collections["results"]) == 0 + + +@pytest.mark.asyncio +async def test_get_collections_for_user(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + collection1 = ( + await temporary_postgres_db_provider.relational.create_collection( + "Collection 1", "Description 1" + ) + ) + collection2 = ( + await temporary_postgres_db_provider.relational.create_collection( + "Collection 2", "Description 2" + ) + ) + await temporary_postgres_db_provider.relational.add_user_to_collection( + user.id, collection1.collection_id + ) + await temporary_postgres_db_provider.relational.add_user_to_collection( + user.id, collection2.collection_id + ) + user_collections = await temporary_postgres_db_provider.relational.get_collections_for_user( + user.id + ) + assert len(user_collections["results"]) == 2 + assert user_collections["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_document_db.py b/py/tests/core/providers/database/relational/test_document_db.py new file mode 100644 index 000000000..940cd6370 --- /dev/null +++ b/py/tests/core/providers/database/relational/test_document_db.py @@ -0,0 +1,178 @@ +# tests/providers/database/relational/test_document_db.py +from uuid import UUID + +import pytest + +from core.base import ( + DocumentInfo, + DocumentType, + IngestionStatus, + RestructureStatus, +) + + +@pytest.mark.asyncio +async def test_create_table(temporary_postgres_db_provider): + await temporary_postgres_db_provider.relational.create_table() + # Verify that the table is created with the expected columns and constraints + # You can execute a query to check the table structure or use a database inspection tool + + +@pytest.mark.asyncio +async def test_upsert_documents_overview(temporary_postgres_db_provider): + document_info = DocumentInfo( + id=UUID("00000000-0000-0000-0000-000000000001"), + collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], + user_id=UUID("00000000-0000-0000-0000-000000000003"), + type=DocumentType.PDF, + metadata={}, + title="Test Document", + version="1.0", + size_in_bytes=1024, + ingestion_status=IngestionStatus.PENDING, + restructuring_status=RestructureStatus.PENDING, + ) + await temporary_postgres_db_provider.relational.upsert_documents_overview( + document_info + ) + + # Verify that the document is inserted correctly + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_document_ids=[document_info.id] + ) + ) + assert len(result["results"]) == 1 + inserted_document = result["results"][0] + assert inserted_document.id == document_info.id + assert inserted_document.collection_ids == document_info.collection_ids + assert inserted_document.user_id == document_info.user_id + assert inserted_document.type == document_info.type + assert inserted_document.metadata == document_info.metadata + assert inserted_document.title == document_info.title + assert inserted_document.version == document_info.version + assert inserted_document.size_in_bytes == document_info.size_in_bytes + assert inserted_document.ingestion_status == document_info.ingestion_status + assert ( + inserted_document.restructuring_status + == document_info.restructuring_status + ) + + # Update the document and verify the changes + document_info.title = "Updated Test Document" + document_info.ingestion_status = IngestionStatus.SUCCESS + await temporary_postgres_db_provider.relational.upsert_documents_overview( + document_info + ) + + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_document_ids=[document_info.id] + ) + ) + assert len(result["results"]) == 1 + updated_document = result["results"][0] + assert updated_document.title == "Updated Test Document" + assert updated_document.ingestion_status == IngestionStatus.SUCCESS + + +@pytest.mark.asyncio +async def test_delete_from_documents_overview(temporary_postgres_db_provider): + document_info = DocumentInfo( + id=UUID("00000000-0000-0000-0000-000000000001"), + collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], + user_id=UUID("00000000-0000-0000-0000-000000000003"), + type=DocumentType.PDF, + metadata={}, + title="Test Document", + version="1.0", + size_in_bytes=1024, + ingestion_status=IngestionStatus.PENDING, + restructuring_status=RestructureStatus.PENDING, + ) + await temporary_postgres_db_provider.relational.upsert_documents_overview( + document_info + ) + + await temporary_postgres_db_provider.relational.delete_from_documents_overview( + document_info.id + ) + + # Verify that the document is deleted + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_document_ids=[document_info.id] + ) + ) + assert len(result["results"]) == 0 + + +@pytest.mark.asyncio +async def test_get_documents_overview(temporary_postgres_db_provider): + document_info1 = DocumentInfo( + id=UUID("00000000-0000-0000-0000-000000000001"), + collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], + user_id=UUID("00000000-0000-0000-0000-000000000003"), + type=DocumentType.PDF, + metadata={}, + title="Test Document 1", + version="1.0", + size_in_bytes=1024, + ingestion_status=IngestionStatus.PENDING, + restructuring_status=RestructureStatus.PENDING, + ) + document_info2 = DocumentInfo( + id=UUID("00000000-0000-0000-0000-000000000004"), + collection_ids=[UUID("00000000-0000-0000-0000-000000000002")], + user_id=UUID("00000000-0000-0000-0000-000000000003"), + type=DocumentType.DOCX, + metadata={}, + title="Test Document 2", + version="1.0", + size_in_bytes=2048, + ingestion_status=IngestionStatus.SUCCESS, + restructuring_status=RestructureStatus.PENDING, + ) + await temporary_postgres_db_provider.relational.upsert_documents_overview( + [document_info1, document_info2] + ) + + # Test filtering by user ID + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_user_ids=[UUID("00000000-0000-0000-0000-000000000003")] + ) + ) + assert len(result["results"]) == 2 + assert result["total_entries"] == 2 + + # Test filtering by document ID + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_document_ids=[UUID("00000000-0000-0000-0000-000000000001")] + ) + ) + assert len(result["results"]) == 1 + assert result["results"][0].id == UUID( + "00000000-0000-0000-0000-000000000001" + ) + + # Test filtering by collection ID + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + filter_collection_ids=[ + UUID("00000000-0000-0000-0000-000000000002") + ] + ) + ) + assert len(result["results"]) == 2 + assert result["total_entries"] == 2 + + # Test pagination + result = ( + await temporary_postgres_db_provider.relational.get_documents_overview( + offset=1, limit=1 + ) + ) + assert len(result["results"]) == 1 + assert result["total_entries"] == 2 diff --git a/py/tests/core/providers/database/relational/test_relational_db_provider.py b/py/tests/core/providers/database/relational/test_relational_db_provider.py new file mode 100644 index 000000000..7b631826e --- /dev/null +++ b/py/tests/core/providers/database/relational/test_relational_db_provider.py @@ -0,0 +1,10 @@ +# tests/providers/database/test_relational_db_provider.py +import pytest + +from core.providers.database import PostgresDBProvider + + +@pytest.mark.asyncio +async def test_relational_db_initialization(postgres_db_provider): + assert isinstance(postgres_db_provider, PostgresDBProvider) + assert postgres_db_provider.relational is not None diff --git a/py/tests/core/providers/database/relational/test_user_db.py b/py/tests/core/providers/database/relational/test_user_db.py new file mode 100644 index 000000000..6856e2003 --- /dev/null +++ b/py/tests/core/providers/database/relational/test_user_db.py @@ -0,0 +1,160 @@ +# tests/providers/database/relational/test_user_db.py +from datetime import datetime, timedelta +from uuid import UUID + +import pytest + +from core.base.api.models import UserResponse + + +@pytest.mark.asyncio +async def test_create_user(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + assert isinstance(user, UserResponse) + assert user.email == "test@example.com" + + +@pytest.mark.asyncio +async def test_get_user_by_id(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + retrieved_user = ( + await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + ) + assert retrieved_user == user + + +@pytest.mark.asyncio +async def test_get_user_by_email(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + retrieved_user = ( + await temporary_postgres_db_provider.relational.get_user_by_email( + "test@example.com" + ) + ) + assert retrieved_user == user + + +@pytest.mark.asyncio +async def test_delete_user(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + await temporary_postgres_db_provider.relational.delete_user(user.id) + user = await temporary_postgres_db_provider.relational.get_user_by_id( + user.id + ) + assert user is None + + +@pytest.mark.asyncio +async def test_update_user(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + updated_user = UserResponse( + id=user.id, + email="updated@example.com", + is_superuser=True, + is_active=False, + is_verified=True, + name="Updated Name", + profile_picture="updated_picture.jpg", + bio="Updated bio", + collection_ids=[], + ) + result = await temporary_postgres_db_provider.relational.update_user( + updated_user + ) + assert result.email == updated_user.email + + +@pytest.mark.asyncio +async def test_update_user_password(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + await temporary_postgres_db_provider.relational.update_user_password( + user.id, "new_password" + ) + # Additional assertions can be added based on the expected behavior + + +@pytest.mark.asyncio +async def test_get_all_users(temporary_postgres_db_provider): + await temporary_postgres_db_provider.relational.create_user( + "test1@example.com", "password" + ) + await temporary_postgres_db_provider.relational.create_user( + "test2@example.com", "password" + ) + users = await temporary_postgres_db_provider.relational.get_all_users() + assert len(users) >= 2 + assert any(user.email == "test1@example.com" for user in users) + assert any(user.email == "test2@example.com" for user in users) + + +@pytest.mark.asyncio +async def test_store_and_verify_verification_code( + temporary_postgres_db_provider, +): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + verification_code = "123456" + expiry = datetime.utcnow() + timedelta(hours=1) + await temporary_postgres_db_provider.relational.store_verification_code( + user.id, verification_code, expiry + ) + await temporary_postgres_db_provider.relational.verify_user( + verification_code + ) + updated_user = ( + await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + ) + assert updated_user.is_verified + + +@pytest.mark.asyncio +async def test_store_and_get_reset_token(temporary_postgres_db_provider): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + reset_token = "reset_token" + expiry = datetime.utcnow() + timedelta(hours=1) + await temporary_postgres_db_provider.relational.store_reset_token( + user.id, reset_token, expiry + ) + user_id = await temporary_postgres_db_provider.relational.get_user_id_by_reset_token( + reset_token + ) + assert user_id == user.id + + +@pytest.mark.asyncio +async def test_add_and_remove_user_from_collection( + temporary_postgres_db_provider, +): + user = await temporary_postgres_db_provider.relational.create_user( + "test@example.com", "password" + ) + collection_id = UUID("00000000-0000-0000-0000-000000000001") + await temporary_postgres_db_provider.relational.add_user_to_collection( + user.id, collection_id + ) + updated_user = ( + await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + ) + assert collection_id in updated_user.collection_ids + await temporary_postgres_db_provider.relational.remove_user_from_collection( + user.id, collection_id + ) + updated_user = ( + await temporary_postgres_db_provider.relational.get_user_by_id(user.id) + ) + assert collection_id not in updated_user.collection_ids diff --git a/py/tests/core/providers/database/test_vector_db_provider.py b/py/tests/core/providers/database/test_vector_db_provider.py new file mode 100644 index 000000000..0fe76d383 --- /dev/null +++ b/py/tests/core/providers/database/test_vector_db_provider.py @@ -0,0 +1,289 @@ +import pytest + +from core.providers.database import PostgresDBProvider +from r2r import VectorSearchSettings + + +@pytest.mark.asyncio +async def test_vector_db_initialization(postgres_db_provider): + assert isinstance(postgres_db_provider, PostgresDBProvider) + assert postgres_db_provider.vector is not None + + +@pytest.mark.asyncio +async def test_search_equality_filter(postgres_db_provider, sample_entries): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=10, filters={"key": {"$eq": "value_id_0"}} + ), + ) + assert len(results) == 1 + assert results[0].metadata["key"] == "value_id_0" + + +@pytest.mark.asyncio +async def test_search_not_equal_filter(postgres_db_provider, sample_entries): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=100, filters={"key": {"$ne": "value_id_0"}} + ), + ) + assert len(results) == 99 + assert all(r.metadata["key"] != "value_id_0" for r in results) + + +@pytest.mark.asyncio +async def test_search_greater_than_filter( + postgres_db_provider, sample_entries +): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=100, filters={"raw_key": {"$gt": 50}} + ), + ) + assert len(results) == 49 + assert all(int(r.text.split("_")[-1]) > 50 for r in results) + + +@pytest.mark.asyncio +async def test_search_less_than_or_equal_filter( + postgres_db_provider, sample_entries +): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=10, + filters={"raw_key": {"$lte": 20}}, + ef_search=100, # TODO - Better understand why we need to set this to search the entire database. + ), + ) # TODO - Why is this number not always 10? + assert len(results) == 10 + + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=100, filters={"raw_key": {"$lte": 20}} + ), + ) + assert len(results) == 21 + assert all(int(r.text.split("_")[-1]) <= 20 for r in results) + + +@pytest.mark.asyncio +async def test_search_in_filter(postgres_db_provider, sample_entries): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=10, + filters={"key": {"$in": ["value_id_0", "value_id_1"]}}, + ), + ) + assert len(results) == 2 + assert all( + r.metadata["key"] in ["value_id_0", "value_id_1"] for r in results + ) + + +@pytest.mark.asyncio +async def test_search_complex_and_filter(postgres_db_provider, sample_entries): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=10, + filters={ + "$and": [ + {"key": {"$eq": "value_id_0"}}, + {"raw_key": {"$lt": 50}}, + ] + }, + ), + ) + assert len(results) == 1 + assert results[0].metadata["key"] == "value_id_0" + assert int(results[0].text.split("_")[-1]) < 50 + + +@pytest.mark.asyncio +async def test_search_complex_or_filter(postgres_db_provider, sample_entries): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=11, + ef_search=100, # TODO - Better understand why we need to set this to search the entire database. + filters={ + "$or": [ + {"key": {"$eq": "value_id_0"}}, + {"raw_key": {"$gte": 90}}, + ] + }, + ), + ) + assert len(results) == 11 + assert any(r.metadata["key"] == "value_id_0" for r in results) + assert any(int(r.text.split("_")[-1]) >= 90 for r in results) + + +@pytest.mark.asyncio +async def test_search_nested_and_or_filters( + postgres_db_provider, sample_entries +): + query_vector = sample_entries[0].vector + results = postgres_db_provider.vector.semantic_search( + query_vector.data, + VectorSearchSettings( + search_limit=10, + ef_search=100, # TODO - Better understand why we need to set this to search the entire database. + filters={ + "$and": [ + {"key": {"$eq": "value_id_0"}}, + { + "$or": [ + {"key": {"$in": ["value_id_0", "value_id_1"]}}, + {"raw_key": {"$gt": 98}}, + ] + }, + ] + }, + ), + ) + assert len(results) == 1 + assert results[0].metadata["key"] == "value_id_0" + assert results[0].text == "Sample text for id_0" + + +@pytest.mark.asyncio +async def test_delete_equality(temporary_postgres_db_provider, sample_entries): + deleted_ids = temporary_postgres_db_provider.vector.delete( + {"key": {"$eq": "value_id_0"}} + ) + assert len(deleted_ids) == 1 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 99 + assert all(r.metadata["key"] != "value_id_0" for r in remaining) + + +@pytest.mark.asyncio +async def test_delete_greater_than( + temporary_postgres_db_provider, sample_entries +): + deleted_ids = temporary_postgres_db_provider.vector.delete( + {"raw_key": {"$gt": 90}} + ) + assert len(deleted_ids) == 9 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 91 + assert all(int(r.text.split("_")[-1]) <= 90 for r in remaining) + + +@pytest.mark.asyncio +async def test_delete_in(temporary_postgres_db_provider, sample_entries): + deleted_ids = temporary_postgres_db_provider.vector.delete( + {"key": {"$in": ["value_id_0", "value_id_1"]}} + ) + assert len(deleted_ids) == 2 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 98 + assert all( + r.metadata["key"] not in ["value_id_0", "value_id_1"] + for r in remaining + ) + + +@pytest.mark.asyncio +async def test_delete_complex_and( + temporary_postgres_db_provider, sample_entries +): + deleted_ids = temporary_postgres_db_provider.vector.delete( + { + "$and": [ + {"key": {"$eq": "value_id_0"}}, + {"raw_key": {"$lt": 50}}, + ] + } + ) + assert len(deleted_ids) == 1 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 99 + assert not any( + r.metadata["key"] == "value_id_0" and int(r.text.split("_")[-1]) < 50 + for r in remaining + ) + + +@pytest.mark.asyncio +async def test_delete_complex_or( + temporary_postgres_db_provider, sample_entries +): + deleted_ids = temporary_postgres_db_provider.vector.delete( + { + "$or": [ + {"key": {"$eq": "value_id_0"}}, + {"raw_key": {"$gte": 90}}, + ] + } + ) + assert len(deleted_ids) == 11 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 89 + assert all( + r.metadata["key"] != "value_id_0" and int(r.text.split("_")[-1]) < 90 + for r in remaining + ) + + +@pytest.mark.asyncio +async def test_delete_nested_and_or( + temporary_postgres_db_provider, sample_entries +): + deleted_ids = temporary_postgres_db_provider.vector.delete( + { + "$and": [ + {"key": {"$eq": "value_id_0"}}, + { + "$or": [ + {"key": {"$in": ["value_id_0", "value_id_1"]}}, + {"raw_key": {"$gt": 98}}, + ] + }, + ] + } + ) + assert len(deleted_ids) == 1 + remaining = temporary_postgres_db_provider.vector.semantic_search( + sample_entries[0].vector.data, + VectorSearchSettings(search_limit=100), + ) + assert len(remaining) == 99 + assert not any( + r.metadata["key"] == "value_id_0" + and ( + r.metadata["key"] in ["value_id_0", "value_id_1"] + or int(r.text.split("_")[-1]) > 98 + ) + for r in remaining + ) diff --git a/py/tests/core/providers/embedding/conftest.py b/py/tests/core/providers/embedding/conftest.py new file mode 100644 index 000000000..48cf7fb69 --- /dev/null +++ b/py/tests/core/providers/embedding/conftest.py @@ -0,0 +1,39 @@ +import asyncio + +import pytest + +from core import EmbeddingConfig +from core.providers import OllamaEmbeddingProvider, OpenAIEmbeddingProvider + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function", autouse=True) +async def cleanup_tasks(): + yield + tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + [task.cancel() for task in tasks] + await asyncio.gather(*tasks, return_exceptions=True) + + +@pytest.fixture +def openai_provider(): + config = EmbeddingConfig( + provider="openai", + base_model="text-embedding-ada-002", + base_dimension=1536, + ) + return OpenAIEmbeddingProvider(config) + + +@pytest.fixture +def ollama_provider(): + config = EmbeddingConfig( + provider="ollama", + base_model="mxbai-embed-large", + base_dimension=1024, + ) + return OllamaEmbeddingProvider(config) diff --git a/py/tests/core/providers/embedding/test_litellm_embedding_provider.py b/py/tests/core/providers/embedding/test_litellm_embedding_provider.py new file mode 100644 index 000000000..b3c35a83e --- /dev/null +++ b/py/tests/core/providers/embedding/test_litellm_embedding_provider.py @@ -0,0 +1,136 @@ +import asyncio +import contextlib + +import pytest + +from core import EmbeddingConfig +from core.providers import LiteLLMEmbeddingProvider + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function", autouse=True) +async def cleanup_tasks(): + yield + tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + [task.cancel() for task in tasks] + await asyncio.gather(*tasks, return_exceptions=True) + + +@pytest.fixture +def litellm_provider(): + config = EmbeddingConfig( + provider="litellm", + base_model="openai/text-embedding-3-small", + base_dimension=1536, + ) + + return LiteLLMEmbeddingProvider(config) + + +def test_litellm_initialization(litellm_provider): + assert isinstance(litellm_provider, LiteLLMEmbeddingProvider) + assert litellm_provider.base_model == "openai/text-embedding-3-small" + assert litellm_provider.base_dimension == 1536 + + +def test_litellm_invalid_provider_initialization(): + with pytest.raises(ValueError): + config = EmbeddingConfig(provider="invalid_provider") + LiteLLMEmbeddingProvider(config) + + +def test_litellm_get_embedding(litellm_provider): + embedding = litellm_provider.get_embedding("test text") + assert len(embedding) == 1536 + assert isinstance(embedding, list) + + +@pytest.mark.asyncio +async def test_litellm_async_get_embedding(litellm_provider): + with contextlib.suppress(asyncio.CancelledError): + embedding = await litellm_provider.async_get_embedding("test text") + assert len(embedding) == 1536 + assert isinstance(embedding, list) + + +def test_litellm_get_embeddings(litellm_provider): + embeddings = litellm_provider.get_embeddings(["text1", "text2"]) + assert len(embeddings) == 2 + assert all(len(emb) == 1536 for emb in embeddings) + + +@pytest.mark.asyncio +async def test_litellm_async_get_embeddings(litellm_provider): + with contextlib.suppress(asyncio.CancelledError): + embeddings = await litellm_provider.async_get_embeddings( + ["text1", "text2"] + ) + assert len(embeddings) == 2 + assert all(len(emb) == 1536 for emb in embeddings) + + +def test_litellm_rerank_model_not_supported(): + config = EmbeddingConfig( + provider="litellm", + base_model="openai/text-embedding-3-small", + base_dimension=1536, + rerank_model="some-model", + ) + with pytest.raises( + ValueError, match="does not support separate reranking" + ): + LiteLLMEmbeddingProvider(config) + + +def test_litellm_unsupported_stage(): + config = EmbeddingConfig( + provider="litellm", + base_model="openai/text-embedding-3-small", + base_dimension=1536, + ) + provider = LiteLLMEmbeddingProvider(config) + with pytest.raises( + ValueError, match="LiteLLMEmbeddingProvider only supports search stage" + ): + provider.get_embedding( + "test", stage=LiteLLMEmbeddingProvider.PipeStage.RERANK + ) + + +@pytest.mark.asyncio +async def test_litellm_async_unsupported_stage(): + config = EmbeddingConfig( + provider="litellm", + base_model="openai/text-embedding-3-small", + base_dimension=1536, + ) + provider = LiteLLMEmbeddingProvider(config) + with pytest.raises( + ValueError, match="LiteLLMEmbeddingProvider only supports search stage" + ): + await provider.async_get_embedding( + "test", stage=LiteLLMEmbeddingProvider.PipeStage.RERANK + ) + + +def test_litellm_get_embedding_error(mocker, litellm_provider): + mocker.patch.object( + litellm_provider, "get_embedding", side_effect=Exception("Test error") + ) + with pytest.raises(Exception, match="Test error"): + litellm_provider.get_embedding("test") + + +@pytest.mark.asyncio +async def test_litellm_async_get_embedding_error(mocker, litellm_provider): + mocker.patch.object( + litellm_provider, + "async_get_embedding", + side_effect=Exception("Test error"), + ) + with pytest.raises(Exception, match="Test error"): + await litellm_provider.async_get_embedding("test") diff --git a/py/tests/core/providers/file/test_file_provider.py b/py/tests/core/providers/file/test_file_provider.py new file mode 100644 index 000000000..70988ec77 --- /dev/null +++ b/py/tests/core/providers/file/test_file_provider.py @@ -0,0 +1,71 @@ +import io +import uuid + +import pytest + + +@pytest.mark.asyncio +async def test_store_and_retrieve_file(postgres_file_provider): + document_id = uuid.uuid4() + file_name = "test_file.txt" + file_content = io.BytesIO(b"Test file content") + file_type = "text/plain" + + await postgres_file_provider.store_file( + document_id, file_name, file_content, file_type + ) + retrieved_file = await postgres_file_provider.retrieve_file(document_id) + + assert retrieved_file is not None + assert retrieved_file[0] == file_name + assert retrieved_file[1].read() == b"Test file content" + assert retrieved_file[2] == len(b"Test file content") + + +@pytest.mark.asyncio +async def test_delete_file(postgres_file_provider): + document_id = uuid.uuid4() + file_name = "test_file.txt" + file_content = io.BytesIO(b"Test file content") + file_type = "text/plain" + + await postgres_file_provider.store_file( + document_id, file_name, file_content, file_type + ) + deleted = await postgres_file_provider.delete_file(document_id) + + assert deleted is True + with pytest.raises(Exception): + await postgres_file_provider.retrieve_file(document_id) + + +@pytest.mark.asyncio +async def test_get_files_overview(postgres_file_provider): + document_ids = [uuid.uuid4() for _ in range(5)] + file_names = [f"test_file_{i}.txt" for i in range(5)] + file_contents = [ + io.BytesIO(f"Test file content {i}".encode()) for i in range(5) + ] + file_type = "text/plain" + + for document_id, file_name, file_content in zip( + document_ids, file_names, file_contents + ): + await postgres_file_provider.store_file( + document_id, file_name, file_content, file_type + ) + + files_overview = await postgres_file_provider.get_files_overview(limit=3) + + assert len(files_overview) == 3 + assert all(file["document_id"] in document_ids for file in files_overview) + assert all(file["file_name"] in file_names for file in files_overview) + + filtered_files_overview = await postgres_file_provider.get_files_overview( + filter_document_ids=[document_ids[0], document_ids[1]], + filter_file_names=[file_names[0]], + ) + + assert len(filtered_files_overview) == 1 + assert filtered_files_overview[0]["document_id"] == document_ids[0] + assert filtered_files_overview[0]["file_name"] == file_names[0] diff --git a/py/tests/test_kg.py b/py/tests/core/providers/kg/test_kg.py similarity index 98% rename from py/tests/test_kg.py rename to py/tests/core/providers/kg/test_kg.py index a106612bc..d82fad50f 100644 --- a/py/tests/test_kg.py +++ b/py/tests/core/providers/kg/test_kg.py @@ -18,6 +18,7 @@ @pytest.fixture def kg_extraction_pipe(): return KGTriplesExtractionPipe( + config=MagicMock(), kg_provider=MagicMock(), database_provider=MagicMock(), llm_provider=MagicMock(), @@ -30,10 +31,10 @@ def kg_extraction_pipe(): def document_fragment(): return DocumentFragment( id=uuid4(), + collection_ids=[uuid4()], extraction_id=uuid4(), document_id=uuid4(), user_id=uuid4(), - group_ids=[uuid4()], data="Test data", metadata={}, ) @@ -64,7 +65,7 @@ async def test_extract_kg_success(kg_extraction_pipe, document_fragment): ) ) result = await kg_extraction_pipe.extract_kg( - document_fragment, generation_config=GenerationConfig() + [document_fragment], GenerationConfig(), 10, [], [] ) assert isinstance(result, KGExtraction) diff --git a/py/tests/core/providers/llm/test_litellm_llm_provider.py b/py/tests/core/providers/llm/test_litellm_llm_provider.py new file mode 100644 index 000000000..de651a686 --- /dev/null +++ b/py/tests/core/providers/llm/test_litellm_llm_provider.py @@ -0,0 +1,29 @@ +import asyncio +import contextlib + +import pytest + +from core import CompletionConfig, GenerationConfig +from core.providers import LiteCompletionProvider + + +def test_litellm_initialization(litellm_completion_provider): + assert isinstance(litellm_completion_provider, LiteCompletionProvider) + + +def test_litellm_invalid_provider_initialization(): + with pytest.raises(ValueError): + config = CompletionConfig(provider="invalid_provider") + LiteCompletionProvider(config) + + +@pytest.mark.asyncio +async def test_litellm_async_completion(litellm_completion_provider): + generation_config = GenerationConfig(model="gpt-3.5-turbo") + messages = [{"role": "user", "content": "Hello!"}] + + with contextlib.suppress(asyncio.CancelledError): + response = await litellm_completion_provider.aget_completion( + messages, generation_config + ) + assert len(response.choices) > 0 diff --git a/py/tests/core/providers/logging/conftest.py b/py/tests/core/providers/logging/conftest.py new file mode 100644 index 000000000..56cecfb80 --- /dev/null +++ b/py/tests/core/providers/logging/conftest.py @@ -0,0 +1,16 @@ +import asyncio + +import pytest + + +@pytest.fixture(scope="session", autouse=True) +def event_loop_policy(): + asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) + + +@pytest.fixture(scope="function", autouse=True) +async def cleanup_tasks(): + yield + tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] + [task.cancel() for task in tasks] + await asyncio.gather(*tasks, return_exceptions=True) diff --git a/py/tests/core/providers/logging/test_logging_provider.py b/py/tests/core/providers/logging/test_logging_provider.py new file mode 100644 index 000000000..170b6ed89 --- /dev/null +++ b/py/tests/core/providers/logging/test_logging_provider.py @@ -0,0 +1,148 @@ +import logging +import os +import uuid +from uuid import UUID + +import pytest + +from core import LocalRunLoggingProvider, LoggingConfig, generate_run_id + +logger = logging.getLogger(__name__) + + +@pytest.mark.asyncio +async def test_logging(local_logging_provider): + run_id = generate_run_id() + await local_logging_provider.log(run_id, "key", "value") + logs = await local_logging_provider.get_logs([run_id]) + assert len(logs) == 1 + assert logs[0]["key"] == "key" + assert logs[0]["value"] == "value" + + +async def test_multiple_log_entries(local_logging_provider): + run_ids = [generate_run_id() for _ in range(3)] + entries = [ + (run_id, f"key_{i}", f"value_{i}") for i, run_id in enumerate(run_ids) + ] + for run_id, key, value in entries: + await local_logging_provider.log(run_id, key, value) + + logs = await local_logging_provider.get_logs(run_ids) + assert len(logs) == 3, f"Expected 3 logs, got {len(logs)}" + + for log in logs: + run_id = log.get("run_id") + assert run_id is not None, f"Log entry is missing 'run_id': {log}" + + if isinstance(run_id, str): + run_id = UUID(run_id) + + matching_entries = [entry for entry in entries if entry[0] == run_id] + assert ( + len(matching_entries) == 1 + ), f"No matching entry found for run_id {run_id}" + + selected_entry = matching_entries[0] + assert log["key"] == selected_entry[1] + assert log["value"] == selected_entry[2] + + # Additional check to ensure all entries were logged + logged_ids = set( + ( + UUID(log["run_id"]) + if isinstance(log["run_id"], str) + else log["run_id"] + ) + for log in logs + ) + entry_ids = set(entry[0] for entry in entries) + assert ( + logged_ids == entry_ids + ), f"Mismatch between logged IDs {logged_ids} and entry IDs {entry_ids}" + + +@pytest.mark.asyncio +async def test_log_retrieval_limit(local_logging_provider): + run_ids = [] + for i in range(10): + run_ids.append(generate_run_id()) + await local_logging_provider.log(run_ids[-1], f"key_{i}", f"value_{i}") + + logs = await local_logging_provider.get_logs(run_ids[:5]) + assert len(logs) == 5 + + +async def test_specific_run_type_retrieval(local_logging_provider): + run_id_0, run_id_1 = generate_run_id(), generate_run_id() + + await local_logging_provider.log(run_id_0, "run_type", "RETRIEVAL") + await local_logging_provider.log(run_id_0, "key_0", "value_0") + await local_logging_provider.log(run_id_1, "run_type", "MANAGEMENT") + await local_logging_provider.log(run_id_1, "key_1", "value_1") + + # Log info for both run IDs + await local_logging_provider.info_log(run_id_0, "RETRIEVAL", uuid.uuid4()) + await local_logging_provider.info_log(run_id_1, "MANAGEMENT", uuid.uuid4()) + + run_info = await local_logging_provider.get_info_logs( + run_type_filter="RETRIEVAL" + ) + assert ( + len(run_info) == 1 + ), f"Expected 1 'RETRIEVAL' log, got {len(run_info)}" + assert ( + run_info[0].run_id == run_id_0 + ), f"Expected run_id {run_id_0}, got {run_info[0].run_id}" + + logs = await local_logging_provider.get_logs( + [run.run_id for run in run_info] + ) + assert len(logs) == 2, f"Expected 2 logs for run_id_0, got {len(logs)}" + assert all( + log["run_id"] == run_id_0 for log in logs + ), "All logs should be for run_id_0" + assert any( + log["key"] == "run_type" and log["value"] == "RETRIEVAL" + for log in logs + ), "Should have a 'RETRIEVAL' log" + assert any( + log["key"] == "key_0" and log["value"] == "value_0" for log in logs + ), "Should have a 'key_0' log" + + +@pytest.mark.asyncio +async def test_info_logging(local_logging_provider): + run_id = generate_run_id() + user_id = uuid.uuid4() + run_type = "RETRIEVAL" + await local_logging_provider.info_log(run_id, run_type, user_id) + info_logs = await local_logging_provider.get_info_logs() + assert len(info_logs) == 1 + assert info_logs[0].run_id == run_id + assert info_logs[0].run_type == run_type + assert info_logs[0].user_id == user_id + + +@pytest.mark.asyncio +async def test_get_info_logs_with_user_filter(local_logging_provider): + user_id_1, user_id_2 = uuid.uuid4(), uuid.uuid4() + await local_logging_provider.info_log( + generate_run_id(), "RETRIEVAL", user_id_1 + ) + await local_logging_provider.info_log( + generate_run_id(), "MANAGEMENT", user_id_2 + ) + + info_logs = await local_logging_provider.get_info_logs( + user_ids=[user_id_1] + ) + assert len(info_logs) == 1 + assert info_logs[0].user_id == user_id_1 + + info_logs = await local_logging_provider.get_info_logs( + run_type_filter="MANAGEMENT", user_ids=[user_id_2] + ) + assert len(info_logs) == 1 + assert info_logs[0].user_id == user_id_2 + assert info_logs[0].run_type == "MANAGEMENT" diff --git a/py/tests/core/providers/parsing/conftest.py b/py/tests/core/providers/parsing/conftest.py new file mode 100644 index 000000000..0a654bba1 --- /dev/null +++ b/py/tests/core/providers/parsing/conftest.py @@ -0,0 +1,16 @@ +import pytest + +from core.base import ParsingConfig +from core.providers.parsing.unstructured_parsing import ( + UnstructuredParsingProvider, +) + + +@pytest.fixture +def parsing_config(): + return ParsingConfig() + + +@pytest.fixture +def unstructured_parsing_provider(parsing_config): + return UnstructuredParsingProvider(use_api=False, config=parsing_config) diff --git a/py/tests/core/providers/parsing/test_unstructured_parsing.py b/py/tests/core/providers/parsing/test_unstructured_parsing.py new file mode 100644 index 000000000..c6dd36d14 --- /dev/null +++ b/py/tests/core/providers/parsing/test_unstructured_parsing.py @@ -0,0 +1,73 @@ +from io import BytesIO +from unittest.mock import AsyncMock, MagicMock, patch + +import httpx +import pytest + +from core import generate_id_from_label +from core.base import Document, DocumentExtraction, DocumentType +from core.providers.parsing.unstructured_parsing import FallbackElement + + +@pytest.mark.asyncio +async def test_parse_fallback(unstructured_parsing_provider): + document = Document( + id=generate_id_from_label("test_parse_fallback"), + collection_ids=[], + user_id=generate_id_from_label("test_user"), + type=DocumentType.PNG, + metadata={}, + ) + chunk_size = 128 + + async def mock_ingest(file_content, chunk_size): + for text in ["text1", "text2"]: + yield text + + mock_parser = AsyncMock() + mock_parser.ingest = mock_ingest + unstructured_parsing_provider.parsers[DocumentType.PNG] = mock_parser + + elements = [ + element + async for element in unstructured_parsing_provider.parse_fallback( + b"test_data", document, chunk_size + ) + ] + + assert len(elements) == 2 + assert isinstance(elements[0], FallbackElement) + assert elements[0].text == "text1" + assert elements[0].metadata == {"chunk_id": 0} + assert isinstance(elements[1], FallbackElement) + assert elements[1].text == "text2" + assert elements[1].metadata == {"chunk_id": 1} + + +@pytest.mark.asyncio +async def test_parse_with_local_unstructured(unstructured_parsing_provider): + document = Document( + id=generate_id_from_label("test_parse_with_local_unstructured"), + collection_ids=[], + user_id=generate_id_from_label("test_user"), + type=DocumentType.DOCX, + metadata={"title": "test_title"}, + ) + + async def mock_post(url, json, **kwargs): + return MagicMock( + json=MagicMock(return_value={"elements": [{"text": "test_text"}]}) + ) + + with patch.object(httpx.AsyncClient, "post", side_effect=mock_post): + extractions = [ + extraction + async for extraction in unstructured_parsing_provider.parse( + BytesIO(b"test_data"), document + ) + ] + + assert len(extractions) == 1 + assert isinstance(extractions[0], DocumentExtraction) + assert extractions[0].data == "test_text" + assert extractions[0].metadata["partitioned_by_unstructured"] is True diff --git a/py/tests/main/api/__init__.py b/py/tests/main/api/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/main/app/__init__.py b/py/tests/main/app/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/main/assembly/__init__.py b/py/tests/main/assembly/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/main/sdk/__init__.py b/py/tests/main/sdk/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/main/services/__init__.py b/py/tests/main/services/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/regression/__init__.py b/py/tests/regression/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/regression/expected_outputs/test_document_management.json b/py/tests/regression/expected_outputs/test_document_management.json index 74175a24f..7114b58d2 100644 --- a/py/tests/regression/expected_outputs/test_document_management.json +++ b/py/tests/regression/expected_outputs/test_document_management.json @@ -1,1183 +1,1026 @@ { "ingest_sample_files": { - "results": { - "processed_documents": [ - { - "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_1.html", - "version": "v0", - "size_in_bytes": 131503, - "status": "success", - "created_at": "2024-08-13T17:43:28.344768", - "updated_at": "2024-08-13T17:43:28.344768" - }, - { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "got.txt", - "version": "v0", - "size_in_bytes": 9491, - "status": "success", - "created_at": "2024-08-13T17:43:28.344841", - "updated_at": "2024-08-13T17:43:28.344841" - }, - { - "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample.mp3", - "version": "v0", - "size_in_bytes": 121671, - "status": "success", - "created_at": "2024-08-13T17:43:28.344851", - "updated_at": "2024-08-13T17:43:28.344851" - }, - { - "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", - "metadata": {}, - "title": "uber_2021.pdf", - "version": "v0", - "size_in_bytes": 1880483, - "status": "success", - "created_at": "2024-08-13T17:43:28.344856", - "updated_at": "2024-08-13T17:43:28.344856" - }, - { - "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample2.mp3", - "version": "v0", - "size_in_bytes": 72456, - "status": "success", - "created_at": "2024-08-13T17:43:28.344867", - "updated_at": "2024-08-13T17:43:28.344867" - }, - { - "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle_v2.txt", - "version": "v0", - "size_in_bytes": 7443, - "status": "success", - "created_at": "2024-08-13T17:43:28.344871", - "updated_at": "2024-08-13T17:43:28.344871" - }, - { - "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_4.html", - "version": "v0", - "size_in_bytes": 118113, - "status": "success", - "created_at": "2024-08-13T17:43:28.344874", - "updated_at": "2024-08-13T17:43:28.344874" - }, - { - "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_5.html", - "version": "v0", - "size_in_bytes": 123779, - "status": "success", - "created_at": "2024-08-13T17:43:28.344879", - "updated_at": "2024-08-13T17:43:28.344879" - }, - { - "id": "2f576170-c4f9-5141-a910-a0924f341de4", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", - "metadata": {}, - "title": "lyft_2021.pdf", - "version": "v0", - "size_in_bytes": 1440303, - "status": "success", - "created_at": "2024-08-13T17:43:28.344884", - "updated_at": "2024-08-13T17:43:28.344884" - }, - { - "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "test.txt", - "version": "v0", - "size_in_bytes": 20, - "status": "success", - "created_at": "2024-08-13T17:43:28.344889", - "updated_at": "2024-08-13T17:43:28.344889" - }, - { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_2.html", - "version": "v0", - "size_in_bytes": 125112, - "status": "success", - "created_at": "2024-08-13T17:43:28.344893", - "updated_at": "2024-08-13T17:43:28.344893" - }, - { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 73353, - "status": "success", - "created_at": "2024-08-13T17:43:28.344898", - "updated_at": "2024-08-13T17:43:28.344898" - }, - { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "yc_companies.txt", - "version": "v0", - "size_in_bytes": 47209, - "status": "success", - "created_at": "2024-08-13T17:43:28.344901", - "updated_at": "2024-08-13T17:43:28.344901" - }, - { - "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "png", - "metadata": {}, - "title": "screen_shot.png", - "version": "v0", - "size_in_bytes": 791764, - "status": "success", - "created_at": "2024-08-13T17:43:28.344905", - "updated_at": "2024-08-13T17:43:28.344905" - }, - { - "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_3.html", - "version": "v0", - "size_in_bytes": 124915, - "status": "success", - "created_at": "2024-08-13T17:43:28.344908", - "updated_at": "2024-08-13T17:43:28.344908" - } - ], - "failed_documents": [], - "skipped_documents": [] - } + "results": [ + { + "message": "Ingestion task queued successfully.", + "task_id": "77ba1665-2646-457c-a892-da3c791f5e88", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "ae150ce4-1f91-4fa6-affb-7dd8ab1ad063", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "1d733a99-2c79-4d3e-bb0c-a0d49b26dc37", + "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "c7d0b32c-00d9-4f0c-9f0d-240f86056665", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "da16c185-57bf-4f30-bc8a-598e127824a1", + "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "460abc23-60de-4d11-8f10-81079d7d4990", + "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "aa01d8e2-c866-4e60-8190-3c7894afc263", + "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "6da2d33b-7b00-4cd3-9ede-9748ca5fd936", + "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "b16a158c-85da-4290-92ae-6f0a4881c279", + "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "fcdb9219-9d4f-4de2-aeb0-4fc688ffb7b4", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "082de59f-0252-4f2e-b038-90253b8066f5", + "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "8864c244-3372-4196-80c2-0d9dd667c3fd", + "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "2fe08ebb-7157-452c-811c-7ede2b05e750", + "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "78ea4936-8bf6-4539-9db3-68f0e5b5bbe5", + "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "ad23dd8d-f0c1-4641-a6a2-e909ede1b3d1", + "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "c100a64b-c975-4a5e-bde4-7f1a0718b851", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" + } + ] }, "reingest_sample_file": { - "results": "All provided documents already exist. Use the `update_files` endpoint instead to update these documents." - }, - "documents_overview": { "results": [ { - "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_1.html", - "version": "v0", - "size_in_bytes": 131503, - "status": "success", - "created_at": "2024-08-13T17:43:28.344768Z", - "updated_at": "2024-08-13T17:43:28.344768Z" + "message": "Ingestion task queued successfully.", + "task_id": "d0c84b68-407c-4329-b7ce-fb32df9f196b", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" }, { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "got.txt", - "version": "v0", - "size_in_bytes": 9491, - "status": "success", - "created_at": "2024-08-13T17:43:28.344841Z", - "updated_at": "2024-08-13T17:43:28.344841Z" + "message": "Ingestion task queued successfully.", + "task_id": "8134708a-ac7e-436c-977a-4ba1434aeb94", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" }, { - "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample.mp3", - "version": "v0", - "size_in_bytes": 121671, - "status": "success", - "created_at": "2024-08-13T17:43:28.344851Z", - "updated_at": "2024-08-13T17:43:28.344851Z" + "message": "Ingestion task queued successfully.", + "task_id": "fcf12fcf-092c-43c6-85fa-3c02b62231a1", + "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" }, { - "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", - "metadata": {}, - "title": "uber_2021.pdf", - "version": "v0", - "size_in_bytes": 1880483, - "status": "success", - "created_at": "2024-08-13T17:43:28.344856Z", - "updated_at": "2024-08-13T17:43:28.344856Z" + "message": "Ingestion task queued successfully.", + "task_id": "ab4f1bc7-7d62-47ee-9e21-6b4fab7ccfb7", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" }, { - "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample2.mp3", - "version": "v0", - "size_in_bytes": 72456, - "status": "success", - "created_at": "2024-08-13T17:43:28.344867Z", - "updated_at": "2024-08-13T17:43:28.344867Z" + "message": "Ingestion task queued successfully.", + "task_id": "ef38bf7b-5688-4bb4-b861-6e06dd4603dc", + "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" }, { - "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle_v2.txt", - "version": "v0", - "size_in_bytes": 7443, - "status": "success", - "created_at": "2024-08-13T17:43:28.344871Z", - "updated_at": "2024-08-13T17:43:28.344871Z" + "message": "Ingestion task queued successfully.", + "task_id": "8bb837ed-d586-4d8b-8f80-e69e6d848dba", + "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" }, { - "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_4.html", - "version": "v0", - "size_in_bytes": 118113, - "status": "success", - "created_at": "2024-08-13T17:43:28.344874Z", - "updated_at": "2024-08-13T17:43:28.344874Z" + "message": "Ingestion task queued successfully.", + "task_id": "873adc8b-c630-49e8-9859-b49e2588e72c", + "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" }, { - "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_5.html", - "version": "v0", - "size_in_bytes": 123779, - "status": "success", - "created_at": "2024-08-13T17:43:28.344879Z", - "updated_at": "2024-08-13T17:43:28.344879Z" + "message": "Ingestion task queued successfully.", + "task_id": "e2552c6f-884d-42eb-b279-ae881d6a8338", + "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" }, { - "id": "2f576170-c4f9-5141-a910-a0924f341de4", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", - "metadata": {}, - "title": "lyft_2021.pdf", - "version": "v0", - "size_in_bytes": 1440303, - "status": "success", - "created_at": "2024-08-13T17:43:28.344884Z", - "updated_at": "2024-08-13T17:43:28.344884Z" + "message": "Ingestion task queued successfully.", + "task_id": "ff2a206b-f9cb-4369-ad7f-be5f930ab6f0", + "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" }, { - "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "test.txt", - "version": "v0", - "size_in_bytes": 20, - "status": "success", - "created_at": "2024-08-13T17:43:28.344889Z", - "updated_at": "2024-08-13T17:43:28.344889Z" + "message": "Ingestion task queued successfully.", + "task_id": "2c26fd9d-e35b-4597-821e-c1e4031df1f8", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_2.html", - "version": "v0", - "size_in_bytes": 125112, - "status": "success", - "created_at": "2024-08-13T17:43:28.344893Z", - "updated_at": "2024-08-13T17:43:28.344893Z" + "message": "Ingestion task queued successfully.", + "task_id": "1352946d-1a09-4534-ad29-94e6679ee4cf", + "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle.txt", - "version": "v0", - "size_in_bytes": 73353, - "status": "success", - "created_at": "2024-08-13T17:43:28.344898Z", - "updated_at": "2024-08-13T17:43:28.344898Z" + "message": "Ingestion task queued successfully.", + "task_id": "36191afd-7628-4faa-aa7c-4da16cf4ee46", + "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "yc_companies.txt", - "version": "v0", - "size_in_bytes": 47209, - "status": "success", - "created_at": "2024-08-13T17:43:28.344901Z", - "updated_at": "2024-08-13T17:43:28.344901Z" + "message": "Ingestion task queued successfully.", + "task_id": "07acc8f2-48d6-4e1c-8932-3f0c594105bf", + "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" }, { - "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "png", - "metadata": {}, - "title": "screen_shot.png", - "version": "v0", - "size_in_bytes": 791764, - "status": "success", - "created_at": "2024-08-13T17:43:28.344905Z", - "updated_at": "2024-08-13T17:43:28.344905Z" + "message": "Ingestion task queued successfully.", + "task_id": "d0f06ec0-265e-45c8-a768-04411f96a54c", + "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" }, { - "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_3.html", - "version": "v0", - "size_in_bytes": 124915, - "status": "success", - "created_at": "2024-08-13T17:43:28.344908Z", - "updated_at": "2024-08-13T17:43:28.344908Z" + "message": "Ingestion task queued successfully.", + "task_id": "56187544-b442-4e5d-8338-bce2881e0d16", + "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "5a1d66ee-976c-4dd5-a19e-8e20c1470339", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" } ] }, - "document_chunks_test": { + "documents_overview": { "results": [ { - "fragment_id": "cd85f762-5739-5f19-86d9-a4a9fe213e44", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "A Project of One's Own", + "type": "html", "metadata": { - "text": "A Project of One's Own", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "pending", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.535113Z", + "updated_at": "2024-09-20T22:31:28.791381Z", + "ingestion_attempt_number": null }, { - "fragment_id": "cd38c41d-a14a-52ba-8882-83eaafcbb163", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "June 2021A few days ago, on the way home from school, my nine year old son\ntold me he couldn't wait to get home to write more of the story he\nwas working on. This made me as happy as anything I've heard him\nsay \u2014 not just because he was excited about his story, but because\nhe'd discovered this way of working. Working on a project of your\nown is as different from ordinary work as skating is from walking.", + "type": "png", "metadata": { - "text": "June 2021A few days ago, on the way home from school, my nine year old son\ntold me he couldn't wait to get home to write more of the story he\nwas working on. This made me as happy as anything I've heard him\nsay \u2014 not just because he was excited about his story, but because\nhe'd discovered this way of working. Working on a project of your\nown is as different from ordinary work as skating is from walking.", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.522881Z", + "updated_at": "2024-09-20T22:26:27.522885Z", + "ingestion_attempt_number": null }, { - "fragment_id": "fb968ad0-15ab-50b1-982c-08d1e1a1cbd9", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "It's more fun, but also much more productive.What proportion of great work has been done by people who were\nskating in this sense? If not all of it, certainly a lot.There is something special about working on a project of your own.\nI wouldn't say exactly that you're happier. A better word would be\nexcited, or engaged. You're happy when things are going well, but\noften they aren't. When I'm writing an essay, most of the time I'm\nworried and puzzled: worried that the essay will turn out badly,", + "type": "txt", "metadata": { - "text": "It's more fun, but also much more productive.What proportion of great work has been done by people who were\nskating in this sense? If not all of it, certainly a lot.There is something special about working on a project of your own.\nI wouldn't say exactly that you're happier. A better word would be\nexcited, or engaged. You're happy when things are going well, but\noften they aren't. When I'm writing an essay, most of the time I'm\nworried and puzzled: worried that the essay will turn out badly,", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.639855Z", + "updated_at": "2024-09-20T22:31:28.766869Z", + "ingestion_attempt_number": null }, { - "fragment_id": "263476d3-7650-52c9-99ad-ec9d5406c581", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "and puzzled because I'm groping for some idea that I can't see\nclearly enough. Will I be able to pin it down with words? In the\nend I usually can, if I take long enough, but I'm never sure; the\nfirst few attempts often fail.You have moments of happiness when things work out, but they don't\nlast long, because then you're on to the next problem. So why do\nit at all? Because to the kind of people who like working this way,\nnothing else feels as right. You feel as if you're an animal in its", + "type": "txt", "metadata": { - "text": "and puzzled because I'm groping for some idea that I can't see\nclearly enough. Will I be able to pin it down with words? In the\nend I usually can, if I take long enough, but I'm never sure; the\nfirst few attempts often fail.You have moments of happiness when things work out, but they don't\nlast long, because then you're on to the next problem. So why do\nit at all? Because to the kind of people who like working this way,\nnothing else feels as right. You feel as if you're an animal in its", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.525053Z", + "updated_at": "2024-09-20T22:31:28.806823Z", + "ingestion_attempt_number": null }, { - "fragment_id": "28c70d00-ee77-5e5e-9c0e-ff79332ffabb", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "natural habitat, doing what you were meant to do \u2014 not always\nhappy, maybe, but awake and alive.Many kids experience the excitement of working on projects of their\nown. The hard part is making this converge with the work you do as\nan adult. And our customs make it harder. We treat \"playing\" and\n\"hobbies\" as qualitatively different from \"work\". It's not clear\nto a kid building a treehouse that there's a direct (though long)\nroute from that to architecture or engineering. And instead of", + "type": "txt", "metadata": { - "text": "natural habitat, doing what you were meant to do \u2014 not always\nhappy, maybe, but awake and alive.Many kids experience the excitement of working on projects of their\nown. The hard part is making this converge with the work you do as\nan adult. And our customs make it harder. We treat \"playing\" and\n\"hobbies\" as qualitatively different from \"work\". It's not clear\nto a kid building a treehouse that there's a direct (though long)\nroute from that to architecture or engineering. And instead of", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "pending", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.521911Z", + "updated_at": "2024-09-20T22:31:28.835132Z", + "ingestion_attempt_number": null }, { - "fragment_id": "67a4ddfc-d362-5b4b-afaf-5b3f53ec7ac5", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "pointing out the route, we conceal it, by implicitly treating the\nstuff kids do as different from real work.\n[1]Instead of telling kids that their treehouses could be on the path\nto the work they do as adults, we tell them the path goes through\nschool. And unfortunately schoolwork tends to be very different from\nworking on projects of one's own. It's usually neither a project,\nnor one's own. So as school gets more serious, working on projects", + "type": "html", "metadata": { - "text": "pointing out the route, we conceal it, by implicitly treating the\nstuff kids do as different from real work.\n[1]Instead of telling kids that their treehouses could be on the path\nto the work they do as adults, we tell them the path goes through\nschool. And unfortunately schoolwork tends to be very different from\nworking on projects of one's own. It's usually neither a project,\nnor one's own. So as school gets more serious, working on projects", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.517088Z", + "updated_at": "2024-09-20T22:31:28.836602Z", + "ingestion_attempt_number": null }, { - "fragment_id": "001c1335-737c-5585-9ec1-441ee9c49348", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "of one's own is something that survives, if at all, as a thin thread\noff to the side.It's a bit sad to think of all the high school kids turning their\nbacks on building treehouses and sitting in class dutifully learning\nabout Darwin or Newton to pass some exam, when the work that made\nDarwin and Newton famous was actually closer in spirit to building\ntreehouses than studying for exams.If I had to choose between my kids getting good grades and\nworking on ambitious projects of their own, I'd pick", + "type": "pdf", "metadata": { - "text": "of one's own is something that survives, if at all, as a thin thread\noff to the side.It's a bit sad to think of all the high school kids turning their\nbacks on building treehouses and sitting in class dutifully learning\nabout Darwin or Newton to pass some exam, when the work that made\nDarwin and Newton famous was actually closer in spirit to building\ntreehouses than studying for exams.If I had to choose between my kids getting good grades and\nworking on ambitious projects of their own, I'd pick", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.513642Z", + "updated_at": "2024-09-20T22:26:26.513644Z", + "ingestion_attempt_number": null }, { - "fragment_id": "c3759b2a-fbec-55db-b850-511d4730de7e", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "techniques for preserving that excitement.\n[4]The team that made the original Macintosh were a great example of\nthis phenomenon. People like Burrell Smith and Andy Hertzfeld and\nBill Atkinson and Susan Kare were not just following orders. They\nwere not tennis balls hit by Steve Jobs, but rockets let loose by\nSteve Jobs. There was a lot of collaboration between them, but\nthey all seem to have individually felt the excitement of", + "type": "html", "metadata": { - "text": "techniques for preserving that excitement.\n[4]The team that made the original Macintosh were a great example of\nthis phenomenon. People like Burrell Smith and Andy Hertzfeld and\nBill Atkinson and Susan Kare were not just following orders. They\nwere not tennis balls hit by Steve Jobs, but rockets let loose by\nSteve Jobs. There was a lot of collaboration between them, but\nthey all seem to have individually felt the excitement of", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.511219Z", + "updated_at": "2024-09-20T22:31:28.786697Z", + "ingestion_attempt_number": null }, { - "fragment_id": "540d1c52-6dae-5fb4-b4ad-3d5bbd938abe", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "the projects. And not because I'm an indulgent parent, but because\nI've been on the other end and I know which has more predictive\nvalue. When I was picking startups for Y Combinator, I didn't care\nabout applicants' grades. But if they'd worked on projects of their\nown, I wanted to hear all about those.\n[2]It may be inevitable that school is the way it is. I'm not saying\nwe have to redesign it (though I'm not saying we don't), just that\nwe should understand what it does to our attitudes to work \u2014 that", + "type": "pdf", "metadata": { - "text": "the projects. And not because I'm an indulgent parent, but because\nI've been on the other end and I know which has more predictive\nvalue. When I was picking startups for Y Combinator, I didn't care\nabout applicants' grades. But if they'd worked on projects of their\nown, I wanted to hear all about those.\n[2]It may be inevitable that school is the way it is. I'm not saying\nwe have to redesign it (though I'm not saying we don't), just that\nwe should understand what it does to our attitudes to work \u2014 that", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "pending", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.504459Z", + "updated_at": "2024-09-20T22:31:28.767620Z", + "ingestion_attempt_number": null }, { - "fragment_id": "fee3d54e-dd02-5f46-9edc-ee4f5d7a70d4", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "it steers us toward the dutiful plodding kind of work, often using\ncompetition as bait, and away from skating.There are occasionally times when schoolwork becomes a project of\none's own. Whenever I had to write a paper, that would become a\nproject of my own \u2014 except in English classes, ironically, because\nthe things one has to write in English classes are so\nbogus. And\nwhen I got to college and started taking CS classes, the programs\nI had to write became projects of my own. Whenever I was writing", + "type": "html", "metadata": { - "text": "it steers us toward the dutiful plodding kind of work, often using\ncompetition as bait, and away from skating.There are occasionally times when schoolwork becomes a project of\none's own. Whenever I had to write a paper, that would become a\nproject of my own \u2014 except in English classes, ironically, because\nthe things one has to write in English classes are so\nbogus. And\nwhen I got to college and started taking CS classes, the programs\nI had to write became projects of my own. Whenever I was writing", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.528586Z", + "updated_at": "2024-09-20T22:31:28.789182Z", + "ingestion_attempt_number": null }, { - "fragment_id": "05f02dc2-53ab-539b-9b4d-899eada47a3c", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "or programming, I was usually skating, and that has been true ever\nsince.So where exactly is the edge of projects of one's own? That's an\ninteresting question, partly because the answer is so complicated,\nand partly because there's so much at stake. There turn out to be\ntwo senses in which work can be one's own: 1) that you're doing it\nvoluntarily, rather than merely because someone told you to, and\n2) that you're doing it by yourself.The edge of the former is quite sharp. People who care a lot about", + "type": "txt", "metadata": { - "text": "or programming, I was usually skating, and that has been true ever\nsince.So where exactly is the edge of projects of one's own? That's an\ninteresting question, partly because the answer is so complicated,\nand partly because there's so much at stake. There turn out to be\ntwo senses in which work can be one's own: 1) that you're doing it\nvoluntarily, rather than merely because someone told you to, and\n2) that you're doing it by yourself.The edge of the former is quite sharp. People who care a lot about", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.524513Z", + "updated_at": "2024-09-20T22:31:28.766240Z", + "ingestion_attempt_number": null }, { - "fragment_id": "42b9d617-6d85-5ba4-a8a6-bb3489faf930", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "their work are usually very sensitive to the difference between\npulling, and being pushed, and work tends to fall into one category\nor the other. But the test isn't simply whether you're told to do\nsomething. You can choose to do something you're told to do. Indeed,\nyou can own it far more thoroughly than the person who told you to\ndo it.For example, math homework is for most people something they're\ntold to do. But for my father, who was a mathematician, it wasn't.", + "type": "txt", "metadata": { - "text": "their work are usually very sensitive to the difference between\npulling, and being pushed, and work tends to fall into one category\nor the other. But the test isn't simply whether you're told to do\nsomething. You can choose to do something you're told to do. Indeed,\nyou can own it far more thoroughly than the person who told you to\ndo it.For example, math homework is for most people something they're\ntold to do. But for my father, who was a mathematician, it wasn't.", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "got.txt", + "version": "v0", + "size_in_bytes": 12656, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.517526Z", + "updated_at": "2024-09-20T22:31:28.765359Z", + "ingestion_attempt_number": null }, { - "fragment_id": "d9876b6c-42ab-5852-a98f-15032c7687c7", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "Most of us think of the problems in a math book as a way to test\nor develop our knowledge of the material explained in each section.\nBut to my father the problems were the part that mattered, and the\ntext was merely a sort of annotation. Whenever he got a new math\nbook it was to him like being given a puzzle: here was a new set\nof problems to solve, and he'd immediately set about solving all\nof them.The other sense of a project being one's own \u2014 working on it by", + "type": "pdf", "metadata": { - "text": "Most of us think of the problems in a math book as a way to test\nor develop our knowledge of the material explained in each section.\nBut to my father the problems were the part that mattered, and the\ntext was merely a sort of annotation. Whenever he got a new math\nbook it was to him like being given a puzzle: here was a new set\nof problems to solve, and he'd immediately set about solving all\nof them.The other sense of a project being one's own \u2014 working on it by", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.515504Z", + "updated_at": "2024-09-20T22:31:28.782778Z", + "ingestion_attempt_number": null }, { - "fragment_id": "bec313c9-3fe3-5197-8792-078069a484e2", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "oneself \u2014 has a much softer edge. It shades gradually into\ncollaboration. And interestingly, it shades into collaboration in\ntwo different ways. One way to collaborate is to share a single\nproject. For example, when two mathematicians collaborate on a proof\nthat takes shape in the course of a conversation between them. The\nother way is when multiple people work on separate projects of their\nown that fit together like a jigsaw puzzle. For example, when one", + "type": "mp3", "metadata": { - "text": "oneself \u2014 has a much softer edge. It shades gradually into\ncollaboration. And interestingly, it shades into collaboration in\ntwo different ways. One way to collaborate is to share a single\nproject. For example, when two mathematicians collaborate on a proof\nthat takes shape in the course of a conversation between them. The\nother way is when multiple people work on separate projects of their\nown that fit together like a jigsaw puzzle. For example, when one", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.514341Z", + "updated_at": "2024-09-20T22:31:28.764706Z", + "ingestion_attempt_number": null }, { - "fragment_id": "8626ebe9-8132-5222-ba90-eef6a380ea65", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "person writes the text of a book and another does the graphic design.\n[3]These two paths into collaboration can of course be combined. But\nunder the right conditions, the excitement of working on a project\nof one's own can be preserved for quite a while before disintegrating\ninto the turbulent flow of work in a large organization. Indeed,\nthe history of successful organizations is partly the history of\ntechniques for preserving that excitement.", + "type": "mp3", "metadata": { - "text": "person writes the text of a book and another does the graphic design.\n[3]These two paths into collaboration can of course be combined. But\nunder the right conditions, the excitement of working on a project\nof one's own can be preserved for quite a while before disintegrating\ninto the turbulent flow of work in a large organization. Indeed,\nthe history of successful organizations is partly the history of\ntechniques for preserving that excitement.", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.511994Z", + "updated_at": "2024-09-20T22:31:28.770493Z", + "ingestion_attempt_number": null }, { - "fragment_id": "759018be-fcfc-5e53-bc2e-ce2e6e677d61", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_1.html", + "version": "v0", + "size_in_bytes": 175340, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:21.855243Z", + "updated_at": "2024-09-20T22:31:28.769460Z", + "ingestion_attempt_number": null + } + ], + "total_entries": 16 + }, + "document_chunks_test": { + "results": [ + { + "fragment_id": "67e7ab57-eaa0-57d8-9276-da273abcdabd", + "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "working on a project of one's own.In Andy Hertzfeld's book on the Macintosh, he describes how they'd\ncome back into the office after dinner and work late into the night.\nPeople who've never experienced the thrill of working on a project\nthey're excited about can't distinguish this kind of working long\nhours from the kind that happens in sweatshops and boiler rooms,\nbut they're at opposite ends of the spectrum. That's why it's a\nmistake to insist dogmatically on \"work/life balance.\" Indeed, the", + "collection_ids": [], + "text": "June 2021 A few days ago, on the way home from school, my nine year old son", "metadata": { - "text": "working on a project of one's own.In Andy Hertzfeld's book on the Macintosh, he describes how they'd\ncome back into the office after dinner and work late into the night.\nPeople who've never experienced the thrill of working on a project\nthey're excited about can't distinguish this kind of working long\nhours from the kind that happens in sweatshops and boiler rooms,\nbut they're at opposite ends of the spectrum. That's why it's a\nmistake to insist dogmatically on \"work/life balance.\" Indeed, the", - "title": "pg_essay_1.html" + "version": "v0", + "chunk_order": 0, + "document_type": "html", + "unstructured_filetype": "text/html", + "unstructured_languages": [ + "eng" + ], + "unstructured_text_as_html": "
June 2021 A few days ago, on the way home from school, my nine year old son
", + "partitioned_by_unstructured": true, + "unstructured_is_continuation": true } + } + ], + "total_entries": 1 + }, + "update_document_test": { + "results": { + "message": "Update task queued successfully.", + "task_id": "1828ac12-0804-47cf-9623-8110324a52b1", + "document_ids": [ + "db02076e-989a-59cd-98d5-e24e15a0bd27" + ] + } + }, + "rerun_documents_overview_test_1": { + "results": [ + { + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.535113Z", + "updated_at": "2024-09-20T22:31:28.791381Z", + "ingestion_attempt_number": null }, { - "fragment_id": "4f01bc15-500e-586f-97f0-a89b36248ea2", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "mere expression \"work/life\" embodies a mistake: it assumes work and\nlife are distinct. For those to whom the word \"work\" automatically\nimplies the dutiful plodding kind, they are. But for the skaters,\nthe relationship between work and life would be better represented\nby a dash than a slash. I wouldn't want to work on anything that I didn't\nwant to take over my life.Of course, it's easier to achieve this level of motivation when\nyou're making something like the Macintosh. It's easy for something", + "type": "png", "metadata": { - "text": "mere expression \"work/life\" embodies a mistake: it assumes work and\nlife are distinct. For those to whom the word \"work\" automatically\nimplies the dutiful plodding kind, they are. But for the skaters,\nthe relationship between work and life would be better represented\nby a dash than a slash. I wouldn't want to work on anything that I didn't\nwant to take over my life.Of course, it's easier to achieve this level of motivation when\nyou're making something like the Macintosh. It's easy for something", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.522881Z", + "updated_at": "2024-09-20T22:31:28.779863Z", + "ingestion_attempt_number": null }, { - "fragment_id": "31e60d4e-e223-53ae-aa46-f30821fd7a92", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "new to feel like a project of your own. That's one of the reasons\nfor the tendency programmers have to rewrite things that don't need\nrewriting, and to write their own versions of things that already\nexist. This sometimes alarms managers, and measured by total number\nof characters typed, it's rarely the optimal solution. But it's not\nalways driven simply by arrogance or cluelessness.\nWriting code from scratch is also much more rewarding \u2014 so much", + "type": "txt", "metadata": { - "text": "new to feel like a project of your own. That's one of the reasons\nfor the tendency programmers have to rewrite things that don't need\nrewriting, and to write their own versions of things that already\nexist. This sometimes alarms managers, and measured by total number\nof characters typed, it's rarely the optimal solution. But it's not\nalways driven simply by arrogance or cluelessness.\nWriting code from scratch is also much more rewarding \u2014 so much", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.639855Z", + "updated_at": "2024-09-20T22:31:28.766869Z", + "ingestion_attempt_number": null }, { - "fragment_id": "194d9ee6-13f4-527a-ba02-595131ace02a", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "more rewarding that a good programmer can end up net ahead, despite\nthe shocking waste of characters. Indeed, it may be one of the\nadvantages of capitalism that it encourages such rewriting. A company\nthat needs software to do something can't use the software already\nwritten to do it at another company, and thus has to write their\nown, which often turns out better.\n[5]The natural alignment between skating and solving new problems is\none of the reasons the payoffs from startups are so high. Not only", + "type": "txt", "metadata": { - "text": "more rewarding that a good programmer can end up net ahead, despite\nthe shocking waste of characters. Indeed, it may be one of the\nadvantages of capitalism that it encourages such rewriting. A company\nthat needs software to do something can't use the software already\nwritten to do it at another company, and thus has to write their\nown, which often turns out better.\n[5]The natural alignment between skating and solving new problems is\none of the reasons the payoffs from startups are so high. Not only", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.525053Z", + "updated_at": "2024-09-20T22:31:28.806823Z", + "ingestion_attempt_number": null }, { - "fragment_id": "d3fee2f7-a7bd-523e-957e-427712013146", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "is the market price of unsolved problems higher, you also get a\ndiscount on productivity when you work on them. In fact, you get a\ndouble increase in productivity: when you're doing a clean-sheet\ndesign, it's easier to recruit skaters, and they get to spend all\ntheir time skating.Steve Jobs knew a thing or two about skaters from having watched\nSteve Wozniak. If you can find the right people, you only have to\ntell them what to do at the highest level. They'll handle the", + "type": "txt", "metadata": { - "text": "is the market price of unsolved problems higher, you also get a\ndiscount on productivity when you work on them. In fact, you get a\ndouble increase in productivity: when you're doing a clean-sheet\ndesign, it's easier to recruit skaters, and they get to spend all\ntheir time skating.Steve Jobs knew a thing or two about skaters from having watched\nSteve Wozniak. If you can find the right people, you only have to\ntell them what to do at the highest level. They'll handle the", - "title": "pg_essay_1.html" - } + "title": "aristotle_v2.txt", + "version": "v1" + }, + "title": "aristotle_v2.txt", + "version": "v1", + "size_in_bytes": 2534, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.521911Z", + "updated_at": "2024-09-20T22:31:45.500072Z", + "ingestion_attempt_number": null }, { - "fragment_id": "e78bb470-5e0e-5b05-9f76-b60b9e43e90d", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "details. Indeed, they insist on it. For a project to feel like your\nown, you must have sufficient autonomy. You can't be working to\norder, or slowed down\nby bureaucracy.One way to ensure autonomy is not to have a boss at all. There are\ntwo ways to do that: to be the boss yourself, and to work on projects\noutside of work. Though they're at opposite ends of the scale\nfinancially, startups and open source projects have a lot in common,\nincluding the fact that they're often run by skaters. And indeed,", + "type": "html", "metadata": { - "text": "details. Indeed, they insist on it. For a project to feel like your\nown, you must have sufficient autonomy. You can't be working to\norder, or slowed down\nby bureaucracy.One way to ensure autonomy is not to have a boss at all. There are\ntwo ways to do that: to be the boss yourself, and to work on projects\noutside of work. Though they're at opposite ends of the scale\nfinancially, startups and open source projects have a lot in common,\nincluding the fact that they're often run by skaters. And indeed,", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.517088Z", + "updated_at": "2024-09-20T22:31:28.836602Z", + "ingestion_attempt_number": null }, { - "fragment_id": "abed8147-50aa-5c90-8f66-cdbfe02d068f", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "there's a wormhole from one end of the scale to the other: one of\nthe best ways to discover\nstartup ideas is to work on a project\njust for fun.If your projects are the kind that make money, it's easy to work\non them. It's harder when they're not. And the hardest part, usually,\nis morale. That's where adults have it harder than kids. Kids just\nplunge in and build their treehouse without worrying about whether\nthey're wasting their time, or how it compares to other treehouses.", + "type": "pdf", "metadata": { - "text": "there's a wormhole from one end of the scale to the other: one of\nthe best ways to discover\nstartup ideas is to work on a project\njust for fun.If your projects are the kind that make money, it's easy to work\non them. It's harder when they're not. And the hardest part, usually,\nis morale. That's where adults have it harder than kids. Kids just\nplunge in and build their treehouse without worrying about whether\nthey're wasting their time, or how it compares to other treehouses.", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.513642Z", + "updated_at": "2024-09-20T22:31:28.773388Z", + "ingestion_attempt_number": null }, { - "fragment_id": "80fbb24a-999e-593f-93a2-f9c42b46c98b", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "And frankly we could learn a lot from kids here. The high standards\nmost grownups have for \"real\" work do not always serve us well.The most important phase in a project of one's own is at the\nbeginning: when you go from thinking it might be cool to do x to\nactually doing x. And at that point high standards are not merely\nuseless but positively harmful. There are a few people who start\ntoo many new projects, but far more, I suspect, who are deterred", + "type": "html", "metadata": { - "text": "And frankly we could learn a lot from kids here. The high standards\nmost grownups have for \"real\" work do not always serve us well.The most important phase in a project of one's own is at the\nbeginning: when you go from thinking it might be cool to do x to\nactually doing x. And at that point high standards are not merely\nuseless but positively harmful. There are a few people who start\ntoo many new projects, but far more, I suspect, who are deterred", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.511219Z", + "updated_at": "2024-09-20T22:31:28.786697Z", + "ingestion_attempt_number": null }, { - "fragment_id": "5be9b626-b309-5502-941e-250efd5d23be", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "by fear of failure from starting projects that would have succeeded\nif they had.But if we couldn't benefit as kids from the knowledge that our\ntreehouses were on the path to grownup projects, we can at least\nbenefit as grownups from knowing that our projects are on a path\nthat stretches back to treehouses. Remember that careless confidence\nyou had as a kid when starting something new? That would be a\npowerful thing to recapture.If it's harder as adults to retain that kind of confidence, we at", + "type": "pdf", "metadata": { - "text": "by fear of failure from starting projects that would have succeeded\nif they had.But if we couldn't benefit as kids from the knowledge that our\ntreehouses were on the path to grownup projects, we can at least\nbenefit as grownups from knowing that our projects are on a path\nthat stretches back to treehouses. Remember that careless confidence\nyou had as a kid when starting something new? That would be a\npowerful thing to recapture.If it's harder as adults to retain that kind of confidence, we at", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.504459Z", + "updated_at": "2024-09-20T22:31:28.767620Z", + "ingestion_attempt_number": null }, { - "fragment_id": "6139c5ce-049d-5f31-86b5-efd3a2be1e5d", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "least tend to be more aware of what we're doing. Kids bounce, or\nare herded, from one kind of work to the next, barely realizing\nwhat's happening to them. Whereas we know more about different types\nof work and have more control over which we do. Ideally we can have\nthe best of both worlds: to be deliberate in choosing to work on\nprojects of our own, and carelessly confident in starting new ones.\nNotes[1]\n\"Hobby\" is a curious word. Now it means work that isn't real", + "type": "html", "metadata": { - "text": "least tend to be more aware of what we're doing. Kids bounce, or\nare herded, from one kind of work to the next, barely realizing\nwhat's happening to them. Whereas we know more about different types\nof work and have more control over which we do. Ideally we can have\nthe best of both worlds: to be deliberate in choosing to work on\nprojects of our own, and carelessly confident in starting new ones.\nNotes[1]\n\"Hobby\" is a curious word. Now it means work that isn't real", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.528586Z", + "updated_at": "2024-09-20T22:31:28.789182Z", + "ingestion_attempt_number": null }, { - "fragment_id": "d3027d33-3720-5a1e-8563-7249b05ac5c5", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "work \u2014 work that one is not to be judged by \u2014 but originally it just\nmeant an obsession in a fairly general sense (even a political\nopinion, for example) that one metaphorically rode as a child rides\na hobby-horse. It's hard to say if its recent, narrower meaning is\na change for the better or the worse. For sure there are lots of\nfalse positives \u2014 lots of projects that end up being important but\nare dismissed initially as mere hobbies. But on the other hand, the", + "type": "txt", "metadata": { - "text": "work \u2014 work that one is not to be judged by \u2014 but originally it just\nmeant an obsession in a fairly general sense (even a political\nopinion, for example) that one metaphorically rode as a child rides\na hobby-horse. It's hard to say if its recent, narrower meaning is\na change for the better or the worse. For sure there are lots of\nfalse positives \u2014 lots of projects that end up being important but\nare dismissed initially as mere hobbies. But on the other hand, the", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.524513Z", + "updated_at": "2024-09-20T22:31:28.766240Z", + "ingestion_attempt_number": null }, { - "fragment_id": "b6587cfc-01dc-5324-af32-08c059b099cd", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "concept provides valuable cover for projects in the early, ugly\nduckling phase.[2]\nTiger parents, as parents so often do, are fighting the last\nwar. Grades mattered more in the old days when the route to success\nwas to acquire\ncredentials\nwhile ascending some predefined ladder.\nBut it's just as well that their tactics are focused on grades. How\nawful it would be if they invaded the territory of projects, and\nthereby gave their kids a distaste for this kind of work by forcing", + "type": "txt", "metadata": { - "text": "concept provides valuable cover for projects in the early, ugly\nduckling phase.[2]\nTiger parents, as parents so often do, are fighting the last\nwar. Grades mattered more in the old days when the route to success\nwas to acquire\ncredentials\nwhile ascending some predefined ladder.\nBut it's just as well that their tactics are focused on grades. How\nawful it would be if they invaded the territory of projects, and\nthereby gave their kids a distaste for this kind of work by forcing", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "got.txt", + "version": "v0", + "size_in_bytes": 12656, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.517526Z", + "updated_at": "2024-09-20T22:31:28.765359Z", + "ingestion_attempt_number": null }, { - "fragment_id": "57b17831-a919-5d01-b7c1-1077285ffd38", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "them to do it. Grades are already a grim, fake world, and aren't\nharmed much by parental interference, but working on one's own\nprojects is a more delicate, private thing that could be damaged\nvery easily.[3]\nThe complicated, gradual edge between working on one's own\nprojects and collaborating with others is one reason there is so\nmuch disagreement about the idea of the \"lone genius.\" In practice\npeople collaborate (or not) in all kinds of different ways, but the", + "type": "pdf", "metadata": { - "text": "them to do it. Grades are already a grim, fake world, and aren't\nharmed much by parental interference, but working on one's own\nprojects is a more delicate, private thing that could be damaged\nvery easily.[3]\nThe complicated, gradual edge between working on one's own\nprojects and collaborating with others is one reason there is so\nmuch disagreement about the idea of the \"lone genius.\" In practice\npeople collaborate (or not) in all kinds of different ways, but the", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.515504Z", + "updated_at": "2024-09-20T22:31:28.782778Z", + "ingestion_attempt_number": null }, { - "fragment_id": "8e943dae-6cbf-543f-9092-16600569621c", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "idea of the lone genius is definitely not a myth. There's a core\nof truth to it that goes with a certain way of working.[4]\nCollaboration is powerful too. The optimal organization would\ncombine collaboration and ownership in such a way as to do the least\ndamage to each. Interestingly, companies and university departments\napproach this ideal from opposite directions: companies insist on\ncollaboration, and occasionally also manage both to recruit skaters", + "type": "mp3", "metadata": { - "text": "idea of the lone genius is definitely not a myth. There's a core\nof truth to it that goes with a certain way of working.[4]\nCollaboration is powerful too. The optimal organization would\ncombine collaboration and ownership in such a way as to do the least\ndamage to each. Interestingly, companies and university departments\napproach this ideal from opposite directions: companies insist on\ncollaboration, and occasionally also manage both to recruit skaters", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.514341Z", + "updated_at": "2024-09-20T22:31:28.764706Z", + "ingestion_attempt_number": null }, { - "fragment_id": "8ad34f88-fe21-598e-b6d7-1d66224075c3", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "and allow them to skate, and university departments insist on the\nability to do independent research (which is by custom treated as\nskating, whether it is or not), and the people they hire collaborate\nas much as they choose.[5]\nIf a company could design its software in such a way that the\nbest newly arrived programmers always got a clean sheet, it could\nhave a kind of eternal youth. That might not be impossible. If you\nhad a software backbone defining a game with sufficiently clear", + "type": "mp3", "metadata": { - "text": "and allow them to skate, and university departments insist on the\nability to do independent research (which is by custom treated as\nskating, whether it is or not), and the people they hire collaborate\nas much as they choose.[5]\nIf a company could design its software in such a way that the\nbest newly arrived programmers always got a clean sheet, it could\nhave a kind of eternal youth. That might not be impossible. If you\nhad a software backbone defining a game with sufficiently clear", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.511994Z", + "updated_at": "2024-09-20T22:31:28.770493Z", + "ingestion_attempt_number": null }, { - "fragment_id": "5127d109-8748-5615-92c1-082bd728df43", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "text": "rules, individual programmers could write their own players.\nThanks to Trevor Blackwell, Paul Buchheit, Andy Hertzfeld, Jessica\nLivingston, and Peter Norvig for reading drafts of this.", + "type": "html", "metadata": { - "text": "rules, individual programmers could write their own players.\nThanks to Trevor Blackwell, Paul Buchheit, Andy Hertzfeld, Jessica\nLivingston, and Peter Norvig for reading drafts of this.", - "title": "pg_essay_1.html" - } + "version": "v0" + }, + "title": "pg_essay_1.html", + "version": "v0", + "size_in_bytes": 175340, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:21.855243Z", + "updated_at": "2024-09-20T22:31:28.769460Z", + "ingestion_attempt_number": null } - ] - }, - "update_document_test": { - "results": { - "processed_documents": [ - { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "group_ids": [], - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle_v2.txt", - "version": "v1", - "size_in_bytes": 7443, - "status": "success", - "created_at": "2024-08-13T17:47:20.150851", - "updated_at": "2024-08-13T17:47:20.150851" - } - ], - "failed_documents": [], - "skipped_documents": [] - } + ], + "total_entries": 16 }, "delete_document_test": { - "results": { - "cd85f762-5739-5f19-86d9-a4a9fe213e44": { - "fragment_id": "cd85f762-5739-5f19-86d9-a4a9fe213e44", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "A Project of One's Own" - }, - "cd38c41d-a14a-52ba-8882-83eaafcbb163": { - "fragment_id": "cd38c41d-a14a-52ba-8882-83eaafcbb163", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "June 2021A few days ago, on the way home from school, my nine year old son\ntold me he couldn't wait to get home to write more of the story he\nwas working on. This made me as happy as anything I've heard him\nsay \u2014 not just because he was excited about his story, but because\nhe'd discovered this way of working. Working on a project of your\nown is as different from ordinary work as skating is from walking." - }, - "fb968ad0-15ab-50b1-982c-08d1e1a1cbd9": { - "fragment_id": "fb968ad0-15ab-50b1-982c-08d1e1a1cbd9", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "It's more fun, but also much more productive.What proportion of great work has been done by people who were\nskating in this sense? If not all of it, certainly a lot.There is something special about working on a project of your own.\nI wouldn't say exactly that you're happier. A better word would be\nexcited, or engaged. You're happy when things are going well, but\noften they aren't. When I'm writing an essay, most of the time I'm\nworried and puzzled: worried that the essay will turn out badly," - }, - "263476d3-7650-52c9-99ad-ec9d5406c581": { - "fragment_id": "263476d3-7650-52c9-99ad-ec9d5406c581", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "and puzzled because I'm groping for some idea that I can't see\nclearly enough. Will I be able to pin it down with words? In the\nend I usually can, if I take long enough, but I'm never sure; the\nfirst few attempts often fail.You have moments of happiness when things work out, but they don't\nlast long, because then you're on to the next problem. So why do\nit at all? Because to the kind of people who like working this way,\nnothing else feels as right. You feel as if you're an animal in its" - }, - "28c70d00-ee77-5e5e-9c0e-ff79332ffabb": { - "fragment_id": "28c70d00-ee77-5e5e-9c0e-ff79332ffabb", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "natural habitat, doing what you were meant to do \u2014 not always\nhappy, maybe, but awake and alive.Many kids experience the excitement of working on projects of their\nown. The hard part is making this converge with the work you do as\nan adult. And our customs make it harder. We treat \"playing\" and\n\"hobbies\" as qualitatively different from \"work\". It's not clear\nto a kid building a treehouse that there's a direct (though long)\nroute from that to architecture or engineering. And instead of" - }, - "67a4ddfc-d362-5b4b-afaf-5b3f53ec7ac5": { - "fragment_id": "67a4ddfc-d362-5b4b-afaf-5b3f53ec7ac5", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "pointing out the route, we conceal it, by implicitly treating the\nstuff kids do as different from real work.\n[1]Instead of telling kids that their treehouses could be on the path\nto the work they do as adults, we tell them the path goes through\nschool. And unfortunately schoolwork tends to be very different from\nworking on projects of one's own. It's usually neither a project,\nnor one's own. So as school gets more serious, working on projects" - }, - "001c1335-737c-5585-9ec1-441ee9c49348": { - "fragment_id": "001c1335-737c-5585-9ec1-441ee9c49348", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "of one's own is something that survives, if at all, as a thin thread\noff to the side.It's a bit sad to think of all the high school kids turning their\nbacks on building treehouses and sitting in class dutifully learning\nabout Darwin or Newton to pass some exam, when the work that made\nDarwin and Newton famous was actually closer in spirit to building\ntreehouses than studying for exams.If I had to choose between my kids getting good grades and\nworking on ambitious projects of their own, I'd pick" - }, - "c3759b2a-fbec-55db-b850-511d4730de7e": { - "fragment_id": "c3759b2a-fbec-55db-b850-511d4730de7e", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "techniques for preserving that excitement.\n[4]The team that made the original Macintosh were a great example of\nthis phenomenon. People like Burrell Smith and Andy Hertzfeld and\nBill Atkinson and Susan Kare were not just following orders. They\nwere not tennis balls hit by Steve Jobs, but rockets let loose by\nSteve Jobs. There was a lot of collaboration between them, but\nthey all seem to have individually felt the excitement of" - }, - "540d1c52-6dae-5fb4-b4ad-3d5bbd938abe": { - "fragment_id": "540d1c52-6dae-5fb4-b4ad-3d5bbd938abe", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "the projects. And not because I'm an indulgent parent, but because\nI've been on the other end and I know which has more predictive\nvalue. When I was picking startups for Y Combinator, I didn't care\nabout applicants' grades. But if they'd worked on projects of their\nown, I wanted to hear all about those.\n[2]It may be inevitable that school is the way it is. I'm not saying\nwe have to redesign it (though I'm not saying we don't), just that\nwe should understand what it does to our attitudes to work \u2014 that" - }, - "fee3d54e-dd02-5f46-9edc-ee4f5d7a70d4": { - "fragment_id": "fee3d54e-dd02-5f46-9edc-ee4f5d7a70d4", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "it steers us toward the dutiful plodding kind of work, often using\ncompetition as bait, and away from skating.There are occasionally times when schoolwork becomes a project of\none's own. Whenever I had to write a paper, that would become a\nproject of my own \u2014 except in English classes, ironically, because\nthe things one has to write in English classes are so\nbogus. And\nwhen I got to college and started taking CS classes, the programs\nI had to write became projects of my own. Whenever I was writing" - }, - "05f02dc2-53ab-539b-9b4d-899eada47a3c": { - "fragment_id": "05f02dc2-53ab-539b-9b4d-899eada47a3c", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "or programming, I was usually skating, and that has been true ever\nsince.So where exactly is the edge of projects of one's own? That's an\ninteresting question, partly because the answer is so complicated,\nand partly because there's so much at stake. There turn out to be\ntwo senses in which work can be one's own: 1) that you're doing it\nvoluntarily, rather than merely because someone told you to, and\n2) that you're doing it by yourself.The edge of the former is quite sharp. People who care a lot about" - }, - "42b9d617-6d85-5ba4-a8a6-bb3489faf930": { - "fragment_id": "42b9d617-6d85-5ba4-a8a6-bb3489faf930", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "their work are usually very sensitive to the difference between\npulling, and being pushed, and work tends to fall into one category\nor the other. But the test isn't simply whether you're told to do\nsomething. You can choose to do something you're told to do. Indeed,\nyou can own it far more thoroughly than the person who told you to\ndo it.For example, math homework is for most people something they're\ntold to do. But for my father, who was a mathematician, it wasn't." - }, - "d9876b6c-42ab-5852-a98f-15032c7687c7": { - "fragment_id": "d9876b6c-42ab-5852-a98f-15032c7687c7", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "Most of us think of the problems in a math book as a way to test\nor develop our knowledge of the material explained in each section.\nBut to my father the problems were the part that mattered, and the\ntext was merely a sort of annotation. Whenever he got a new math\nbook it was to him like being given a puzzle: here was a new set\nof problems to solve, and he'd immediately set about solving all\nof them.The other sense of a project being one's own \u2014 working on it by" - }, - "bec313c9-3fe3-5197-8792-078069a484e2": { - "fragment_id": "bec313c9-3fe3-5197-8792-078069a484e2", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "oneself \u2014 has a much softer edge. It shades gradually into\ncollaboration. And interestingly, it shades into collaboration in\ntwo different ways. One way to collaborate is to share a single\nproject. For example, when two mathematicians collaborate on a proof\nthat takes shape in the course of a conversation between them. The\nother way is when multiple people work on separate projects of their\nown that fit together like a jigsaw puzzle. For example, when one" - }, - "8626ebe9-8132-5222-ba90-eef6a380ea65": { - "fragment_id": "8626ebe9-8132-5222-ba90-eef6a380ea65", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "person writes the text of a book and another does the graphic design.\n[3]These two paths into collaboration can of course be combined. But\nunder the right conditions, the excitement of working on a project\nof one's own can be preserved for quite a while before disintegrating\ninto the turbulent flow of work in a large organization. Indeed,\nthe history of successful organizations is partly the history of\ntechniques for preserving that excitement." - }, - "759018be-fcfc-5e53-bc2e-ce2e6e677d61": { - "fragment_id": "759018be-fcfc-5e53-bc2e-ce2e6e677d61", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "working on a project of one's own.In Andy Hertzfeld's book on the Macintosh, he describes how they'd\ncome back into the office after dinner and work late into the night.\nPeople who've never experienced the thrill of working on a project\nthey're excited about can't distinguish this kind of working long\nhours from the kind that happens in sweatshops and boiler rooms,\nbut they're at opposite ends of the spectrum. That's why it's a\nmistake to insist dogmatically on \"work/life balance.\" Indeed, the" - }, - "4f01bc15-500e-586f-97f0-a89b36248ea2": { - "fragment_id": "4f01bc15-500e-586f-97f0-a89b36248ea2", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "mere expression \"work/life\" embodies a mistake: it assumes work and\nlife are distinct. For those to whom the word \"work\" automatically\nimplies the dutiful plodding kind, they are. But for the skaters,\nthe relationship between work and life would be better represented\nby a dash than a slash. I wouldn't want to work on anything that I didn't\nwant to take over my life.Of course, it's easier to achieve this level of motivation when\nyou're making something like the Macintosh. It's easy for something" - }, - "31e60d4e-e223-53ae-aa46-f30821fd7a92": { - "fragment_id": "31e60d4e-e223-53ae-aa46-f30821fd7a92", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "new to feel like a project of your own. That's one of the reasons\nfor the tendency programmers have to rewrite things that don't need\nrewriting, and to write their own versions of things that already\nexist. This sometimes alarms managers, and measured by total number\nof characters typed, it's rarely the optimal solution. But it's not\nalways driven simply by arrogance or cluelessness.\nWriting code from scratch is also much more rewarding \u2014 so much" - }, - "194d9ee6-13f4-527a-ba02-595131ace02a": { - "fragment_id": "194d9ee6-13f4-527a-ba02-595131ace02a", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "more rewarding that a good programmer can end up net ahead, despite\nthe shocking waste of characters. Indeed, it may be one of the\nadvantages of capitalism that it encourages such rewriting. A company\nthat needs software to do something can't use the software already\nwritten to do it at another company, and thus has to write their\nown, which often turns out better.\n[5]The natural alignment between skating and solving new problems is\none of the reasons the payoffs from startups are so high. Not only" - }, - "d3fee2f7-a7bd-523e-957e-427712013146": { - "fragment_id": "d3fee2f7-a7bd-523e-957e-427712013146", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "is the market price of unsolved problems higher, you also get a\ndiscount on productivity when you work on them. In fact, you get a\ndouble increase in productivity: when you're doing a clean-sheet\ndesign, it's easier to recruit skaters, and they get to spend all\ntheir time skating.Steve Jobs knew a thing or two about skaters from having watched\nSteve Wozniak. If you can find the right people, you only have to\ntell them what to do at the highest level. They'll handle the" - }, - "e78bb470-5e0e-5b05-9f76-b60b9e43e90d": { - "fragment_id": "e78bb470-5e0e-5b05-9f76-b60b9e43e90d", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "details. Indeed, they insist on it. For a project to feel like your\nown, you must have sufficient autonomy. You can't be working to\norder, or slowed down\nby bureaucracy.One way to ensure autonomy is not to have a boss at all. There are\ntwo ways to do that: to be the boss yourself, and to work on projects\noutside of work. Though they're at opposite ends of the scale\nfinancially, startups and open source projects have a lot in common,\nincluding the fact that they're often run by skaters. And indeed," - }, - "abed8147-50aa-5c90-8f66-cdbfe02d068f": { - "fragment_id": "abed8147-50aa-5c90-8f66-cdbfe02d068f", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "there's a wormhole from one end of the scale to the other: one of\nthe best ways to discover\nstartup ideas is to work on a project\njust for fun.If your projects are the kind that make money, it's easy to work\non them. It's harder when they're not. And the hardest part, usually,\nis morale. That's where adults have it harder than kids. Kids just\nplunge in and build their treehouse without worrying about whether\nthey're wasting their time, or how it compares to other treehouses." - }, - "80fbb24a-999e-593f-93a2-f9c42b46c98b": { - "fragment_id": "80fbb24a-999e-593f-93a2-f9c42b46c98b", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "And frankly we could learn a lot from kids here. The high standards\nmost grownups have for \"real\" work do not always serve us well.The most important phase in a project of one's own is at the\nbeginning: when you go from thinking it might be cool to do x to\nactually doing x. And at that point high standards are not merely\nuseless but positively harmful. There are a few people who start\ntoo many new projects, but far more, I suspect, who are deterred" - }, - "5be9b626-b309-5502-941e-250efd5d23be": { - "fragment_id": "5be9b626-b309-5502-941e-250efd5d23be", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "by fear of failure from starting projects that would have succeeded\nif they had.But if we couldn't benefit as kids from the knowledge that our\ntreehouses were on the path to grownup projects, we can at least\nbenefit as grownups from knowing that our projects are on a path\nthat stretches back to treehouses. Remember that careless confidence\nyou had as a kid when starting something new? That would be a\npowerful thing to recapture.If it's harder as adults to retain that kind of confidence, we at" - }, - "6139c5ce-049d-5f31-86b5-efd3a2be1e5d": { - "fragment_id": "6139c5ce-049d-5f31-86b5-efd3a2be1e5d", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "least tend to be more aware of what we're doing. Kids bounce, or\nare herded, from one kind of work to the next, barely realizing\nwhat's happening to them. Whereas we know more about different types\nof work and have more control over which we do. Ideally we can have\nthe best of both worlds: to be deliberate in choosing to work on\nprojects of our own, and carelessly confident in starting new ones.\nNotes[1]\n\"Hobby\" is a curious word. Now it means work that isn't real" - }, - "d3027d33-3720-5a1e-8563-7249b05ac5c5": { - "fragment_id": "d3027d33-3720-5a1e-8563-7249b05ac5c5", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "work \u2014 work that one is not to be judged by \u2014 but originally it just\nmeant an obsession in a fairly general sense (even a political\nopinion, for example) that one metaphorically rode as a child rides\na hobby-horse. It's hard to say if its recent, narrower meaning is\na change for the better or the worse. For sure there are lots of\nfalse positives \u2014 lots of projects that end up being important but\nare dismissed initially as mere hobbies. But on the other hand, the" - }, - "b6587cfc-01dc-5324-af32-08c059b099cd": { - "fragment_id": "b6587cfc-01dc-5324-af32-08c059b099cd", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "concept provides valuable cover for projects in the early, ugly\nduckling phase.[2]\nTiger parents, as parents so often do, are fighting the last\nwar. Grades mattered more in the old days when the route to success\nwas to acquire\ncredentials\nwhile ascending some predefined ladder.\nBut it's just as well that their tactics are focused on grades. How\nawful it would be if they invaded the territory of projects, and\nthereby gave their kids a distaste for this kind of work by forcing" - }, - "57b17831-a919-5d01-b7c1-1077285ffd38": { - "fragment_id": "57b17831-a919-5d01-b7c1-1077285ffd38", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "them to do it. Grades are already a grim, fake world, and aren't\nharmed much by parental interference, but working on one's own\nprojects is a more delicate, private thing that could be damaged\nvery easily.[3]\nThe complicated, gradual edge between working on one's own\nprojects and collaborating with others is one reason there is so\nmuch disagreement about the idea of the \"lone genius.\" In practice\npeople collaborate (or not) in all kinds of different ways, but the" - }, - "8e943dae-6cbf-543f-9092-16600569621c": { - "fragment_id": "8e943dae-6cbf-543f-9092-16600569621c", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "idea of the lone genius is definitely not a myth. There's a core\nof truth to it that goes with a certain way of working.[4]\nCollaboration is powerful too. The optimal organization would\ncombine collaboration and ownership in such a way as to do the least\ndamage to each. Interestingly, companies and university departments\napproach this ideal from opposite directions: companies insist on\ncollaboration, and occasionally also manage both to recruit skaters" - }, - "8ad34f88-fe21-598e-b6d7-1d66224075c3": { - "fragment_id": "8ad34f88-fe21-598e-b6d7-1d66224075c3", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "and allow them to skate, and university departments insist on the\nability to do independent research (which is by custom treated as\nskating, whether it is or not), and the people they hire collaborate\nas much as they choose.[5]\nIf a company could design its software in such a way that the\nbest newly arrived programmers always got a clean sheet, it could\nhave a kind of eternal youth. That might not be impossible. If you\nhad a software backbone defining a game with sufficiently clear" - }, - "5127d109-8748-5615-92c1-082bd728df43": { - "fragment_id": "5127d109-8748-5615-92c1-082bd728df43", - "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "extraction_id": "36474213-c9fa-5fb7-b99e-c3720743da53", - "text": "rules, individual programmers could write their own players.\nThanks to Trevor Blackwell, Paul Buchheit, Andy Hertzfeld, Jessica\nLivingston, and Peter Norvig for reading drafts of this." - } - } + "results": {} }, - "rerun_documents_overview_test": { + "rerun_documents_overview_test_2": { "results": [ { - "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "group_ids": [], + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "got.txt", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", "version": "v0", - "size_in_bytes": 9491, - "status": "success", - "created_at": "2024-08-13T17:43:28.344841Z", - "updated_at": "2024-08-13T17:43:28.344841Z" + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.535113Z", + "updated_at": "2024-09-20T22:31:28.791381Z", + "ingestion_attempt_number": null }, { - "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "group_ids": [], + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample.mp3", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", "version": "v0", - "size_in_bytes": 121671, - "status": "success", - "created_at": "2024-08-13T17:43:28.344851Z", - "updated_at": "2024-08-13T17:43:28.344851Z" + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:27.522881Z", + "updated_at": "2024-09-20T22:31:28.779863Z", + "ingestion_attempt_number": null }, { - "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "group_ids": [], + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "pdf", - "metadata": {}, - "title": "uber_2021.pdf", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", "version": "v0", - "size_in_bytes": 1880483, - "status": "success", - "created_at": "2024-08-13T17:43:28.344856Z", - "updated_at": "2024-08-13T17:43:28.344856Z" + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.639855Z", + "updated_at": "2024-09-20T22:31:28.766869Z", + "ingestion_attempt_number": null }, { - "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "group_ids": [], + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "mp3", - "metadata": {}, - "title": "sample2.mp3", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", "version": "v0", - "size_in_bytes": 72456, - "status": "success", - "created_at": "2024-08-13T17:43:28.344867Z", - "updated_at": "2024-08-13T17:43:28.344867Z" + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.525053Z", + "updated_at": "2024-09-20T22:31:28.806823Z", + "ingestion_attempt_number": null }, { - "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", - "group_ids": [], + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", - "metadata": {}, + "metadata": { + "title": "aristotle_v2.txt", + "version": "v1" + }, "title": "aristotle_v2.txt", - "version": "v0", - "size_in_bytes": 7443, - "status": "success", - "created_at": "2024-08-13T17:43:28.344871Z", - "updated_at": "2024-08-13T17:43:28.344871Z" + "version": "v1", + "size_in_bytes": 2534, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.521911Z", + "updated_at": "2024-09-20T22:31:45.500072Z", + "ingestion_attempt_number": null }, { - "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "group_ids": [], + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "html", - "metadata": {}, - "title": "pg_essay_4.html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", "version": "v0", - "size_in_bytes": 118113, - "status": "success", - "created_at": "2024-08-13T17:43:28.344874Z", - "updated_at": "2024-08-13T17:43:28.344874Z" + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.517088Z", + "updated_at": "2024-09-20T22:31:28.836602Z", + "ingestion_attempt_number": null + }, + { + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.513642Z", + "updated_at": "2024-09-20T22:31:28.773388Z", + "ingestion_attempt_number": null }, { "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "group_ids": [], + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "html", - "metadata": {}, + "metadata": { + "version": "v0" + }, "title": "pg_essay_5.html", "version": "v0", - "size_in_bytes": 123779, - "status": "success", - "created_at": "2024-08-13T17:43:28.344879Z", - "updated_at": "2024-08-13T17:43:28.344879Z" + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.511219Z", + "updated_at": "2024-09-20T22:31:28.786697Z", + "ingestion_attempt_number": null }, { - "id": "2f576170-c4f9-5141-a910-a0924f341de4", - "group_ids": [], + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "pdf", - "metadata": {}, - "title": "lyft_2021.pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", "version": "v0", - "size_in_bytes": 1440303, - "status": "success", - "created_at": "2024-08-13T17:43:28.344884Z", - "updated_at": "2024-08-13T17:43:28.344884Z" + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:26.504459Z", + "updated_at": "2024-09-20T22:31:28.767620Z", + "ingestion_attempt_number": null }, { - "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "group_ids": [], + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "test.txt", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", "version": "v0", - "size_in_bytes": 20, - "status": "success", - "created_at": "2024-08-13T17:43:28.344889Z", - "updated_at": "2024-08-13T17:43:28.344889Z" + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.528586Z", + "updated_at": "2024-09-20T22:31:28.789182Z", + "ingestion_attempt_number": null }, { - "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "group_ids": [], + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_2.html", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", "version": "v0", - "size_in_bytes": 125112, - "status": "success", - "created_at": "2024-08-13T17:43:28.344893Z", - "updated_at": "2024-08-13T17:43:28.344893Z" + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.524513Z", + "updated_at": "2024-09-20T22:31:28.766240Z", + "ingestion_attempt_number": null }, { - "id": "d421207a-d799-5806-8d67-46b2005b15d4", - "group_ids": [], + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", "type": "txt", - "metadata": {}, - "title": "yc_companies.txt", + "metadata": { + "version": "v0" + }, + "title": "got.txt", "version": "v0", - "size_in_bytes": 47209, - "status": "success", - "created_at": "2024-08-13T17:43:28.344901Z", - "updated_at": "2024-08-13T17:43:28.344901Z" + "size_in_bytes": 12656, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.517526Z", + "updated_at": "2024-09-20T22:31:28.765359Z", + "ingestion_attempt_number": null }, { - "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", - "group_ids": [], + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "png", - "metadata": {}, - "title": "screen_shot.png", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", "version": "v0", - "size_in_bytes": 791764, - "status": "success", - "created_at": "2024-08-13T17:43:28.344905Z", - "updated_at": "2024-08-13T17:43:28.344905Z" + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.515504Z", + "updated_at": "2024-09-20T22:31:28.782778Z", + "ingestion_attempt_number": null }, { - "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", - "group_ids": [], + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "html", - "metadata": {}, - "title": "pg_essay_3.html", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", "version": "v0", - "size_in_bytes": 124915, - "status": "success", - "created_at": "2024-08-13T17:43:28.344908Z", - "updated_at": "2024-08-13T17:43:28.344908Z" + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.514341Z", + "updated_at": "2024-09-20T22:31:28.764706Z", + "ingestion_attempt_number": null }, { - "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", - "group_ids": [], + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "type": "txt", - "metadata": {}, - "title": "aristotle_v2.txt", - "version": "v1", - "size_in_bytes": 7443, - "status": "success", - "created_at": "2024-08-13T17:43:28.344898Z", - "updated_at": "2024-08-13T17:47:20.150851Z" + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:26:22.511994Z", + "updated_at": "2024-09-20T22:31:28.770493Z", + "ingestion_attempt_number": null } - ] + ], + "total_entries": 15 }, "rerun_document_chunks_test": { - "results": "No chunks found for the given document ID." + "results": "{\"detail\":{\"message\":\"An error 'list index out of range' occurred during document_chunks_app\",\"error\":\"list index out of range\",\"error_type\":\"IndexError\"}}" } } diff --git a/py/tests/regression/expected_outputs/test_retrieval.json b/py/tests/regression/expected_outputs/test_retrieval.json index 4f4a7171c..b84c439ad 100644 --- a/py/tests/regression/expected_outputs/test_retrieval.json +++ b/py/tests/regression/expected_outputs/test_retrieval.json @@ -3,160 +3,230 @@ "results": { "vector_search_results": [ { - "fragment_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", + "fragment_id": "392ab9b4-c4bc-5894-8edf-332fcd9245bb", + "extraction_id": "cd49a88d-92e5-59f1-8331-3d3d3ecb7f3a", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.23943702876567796, - "text": "Alternate Base Rate means, for any day, a rate per annum equal to the greatest of (i) the Prime Rate in effect on such day, (ii) the Federal Funds Effective Rate in effect on such day \nplus \u00bd of 1% and (iii) the sum of (a) the Adjusted LIBO Rate that would be payable onsuch day for a Eurodollar Borrowing with a one-month interest period", + "collection_ids": [], + "score": 0.2381485473557875, + "text": "Finance Leases\n\n2022 2023 2024 2025 2026 Thereafter Total undiscounted lease payments Less: imputed interest\n\n$\n\n280 $ 312 264 214 198 2,067 3,335 (1,506) 1,829 $\n\n140 60 34 9 \u2014 1 244 (10) 234\n\nTotal lease liabilities\n\n$\n\nAs of December 31, 2021, we had additional operating leases and finance leases, primarily for corporate offices and servers, that have not yet commenced of $421 million and $19 million, respectively. These operating and finance leases will commence between fiscal year 2022 and fiscal year 2023 with lease terms of 2 years to 13 years.\n\nMission Bay 1 & 2\n\nIn 2015, we entered into a joint venture (\u201cJV\u201d) agreement with a real estate developer (\u201cJV Partner\u201d) to develop land (\u201cthe Land\u201d) in San Francisco to construct our new headquarters (the \u201cHeadquarters\u201d). The Headquarters consists of two adjacent office buildings totaling approximately 423,000 rentable square feet. In connection with the JV arrangement, we acquired a 49% interest in the JV, the principal asset of which was the Land.", "metadata": { - "text": "Alternate Base Rate means, for any day, a rate per annum equal to the greatest of (i) the Prime Rate in effect on such day, (ii) the Federal Funds Effective Rate in effect on such day \nplus \u00bd of 1% and (iii) the sum of (a) the Adjusted LIBO Rate that would be payable onsuch day for a Eurodollar Borrowing with a one-month interest period", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 759, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 109, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "f0b40c99-e200-507b-a4b9-e931e0b5f321", - "extraction_id": "0348ae71-bccb-58d1-8b5f-36810e46245a", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "fragment_id": "fd8508db-c444-5ed9-afce-67340354fb1e", + "extraction_id": "7f16fa20-9bc1-5841-ba74-95cdbb27e9fb", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.22033508121967305, - "text": "s, could also restrict our future access to the capital markets.ITEM 1B. UNRESOLVED STAFF\n COMMENTSNot applicable.\nITEM 2. PROPERTIES\nAs\n of December 31, 2021, we leased and owned office facilities around the world totaling 10.6 million square feet, including 2.6 million square feet for ourcorporate headquarte\nrs in the San Francisco Bay Area, California.We", + "collection_ids": [], + "score": 0.21556836366653442, + "text": "(c)\n\nCommuting and Corporate Housing Expenses. During your Employment, the Company shall cover the cost of your reasonable and substantiated expenses for travel between your primary residence and the Company\u2019s headquarters in San Francisco and corporate housing in the San Francisco Bay Area, up to a pre-tax maximum of $200,000 per year in the aggregate. All expense reimbursements shall be made in accordance with the Company\u2019s expense reimbursement policy.\n\n(d)", "metadata": { - "text": "s, could also restrict our future access to the capital markets.ITEM 1B. UNRESOLVED STAFF\n COMMENTSNot applicable.\nITEM 2. PROPERTIES\nAs\n of December 31, 2021, we leased and owned office facilities around the world totaling 10.6 million square feet, including 2.6 million square feet for ourcorporate headquarte\nrs in the San Francisco Bay Area, California.We", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 971, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 170, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "967c4291-0629-55b6-9323-e2291de8730d", - "extraction_id": "7595cdf2-d1b0-5f13-b853-8ce6857ca5f5", + "fragment_id": "e74d31b4-5de3-581c-abaf-8d28f48f924b", + "extraction_id": "f4aa1be1-c0fa-5edd-a536-d5af7f023b31", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.21763332188129403, - "text": "RFR means, for any RFR Loan denominated in (a) British Pounds, SONIA and (b) Swiss Francs, SARON. \nRFR Borrowing means, as to any Borrowing, the RFR Loans comprising such Borrowing. \nRFR Business Day means, for any Loan denominated in (a) British Pounds, any day except for (i) a Saturday, (ii) a Sunday or (iii) a day on which banks are closed for general business in London and (b) Swiss Francs, any day except for (i) a Saturday, (ii) a Sunday or", + "collection_ids": [], + "score": 0.21189823746681213, + "text": "\u201cPlatform\u201d has the meaning set forth in Section 9.01(d).\n\n\u201cPrime Rate\u201d means the rate of interest the rate of interest published by the Wall Street Journal, from time to time, as the prime rate.\n\nThe Prime Rate is a reference rate and does not necessarily represent the lowest or best rate actually charged to any customer. The Administrative Agent or any other Lender may make commercial loans or other loans at rates of interest at, above or below the Prime Rate.\n\n\u201cPrincipal Office\u201d for each of the Administrative Agent and any Issuing Bank, means the office of the Administrative Agent and such Issuing Bank as set forth in Section 9.01(a), or such other office or office of a third party or sub-agent, as appropriate, as such Person may from time to time designate to Borrower and each Lender upon two Business Days\u2019 written notice.", "metadata": { - "text": "RFR means, for any RFR Loan denominated in (a) British Pounds, SONIA and (b) Swiss Francs, SARON. \nRFR Borrowing means, as to any Borrowing, the RFR Loans comprising such Borrowing. \nRFR Business Day means, for any Loan denominated in (a) British Pounds, any day except for (i) a Saturday, (ii) a Sunday or (iii) a day on which banks are closed for general business in London and (b) Swiss Francs, any day except for (i) a Saturday, (ii) a Sunday or", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 1210, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 205, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "188b7dd5-9554-53cb-9e68-b97e5ee71342", - "extraction_id": "7595cdf2-d1b0-5f13-b853-8ce6857ca5f5", + "fragment_id": "76d010b2-1498-531a-bf89-66aa17331203", + "extraction_id": "f31920df-e1db-5a2c-9b8f-9c7b845a21c1", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.21672844886779796, - "text": "RFR Interest Day has the meaning specified in the definition of Daily Simple RFR. \nRFR Loan means a Loan that bears interest at a rate based on the Adjusted Daily Simple RFR. For the avoidance of doubt, only Loans denominated in British Pounds and Swiss Francs shall bear interest at a rate based on the Adjusted Daily Simple RFR.", + "collection_ids": [], + "score": 0.19467422366142273, + "text": "45-2647441 (I.R.S. Employer Identification No.)\n\n1515 3rd Street San Francisco, California 94158 (Address of principal executive offices, including zip code) (415) 612-8582 (Registrant\u2019s telephone number, including area code) ____________________________________________\n\nSecurities registered pursuant to Section 12(b) of the Act:\n\nTitle of each class Common Stock, par value $0.00001 per share", "metadata": { - "text": "RFR Interest Day has the meaning specified in the definition of Daily Simple RFR. \nRFR Loan means a Loan that bears interest at a rate based on the Adjusted Daily Simple RFR. For the avoidance of doubt, only Loans denominated in British Pounds and Swiss Francs shall bear interest at a rate based on the Adjusted Daily Simple RFR.", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 1, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 1, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "8e88e5b1-9785-5b26-a296-15150737dc10", - "extraction_id": "dd07e341-17f4-5ff7-9985-3e4719a4e145", + "fragment_id": "c96ee688-6e36-5abb-b066-d87779be1cf6", + "extraction_id": "eb08b70d-2e82-5de3-90ee-98537a761ea8", "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.21409947956201303, - "text": "Restricted\n cash and cash equivalents consist primarily of amounts held in separate trust accounts and restricted bank accounts as collateral for insurance purposesand amounts pledged\n to secure certain letters of credit.Investments\nDebt Securities\nThe Companys accounting for its investme\nnts in debt securities is based on the legal form of the security, the Companys intended holding period for the security,and", + "collection_ids": [], + "score": 0.19264961779117584, + "text": "Item 2. Properties.\n\nOur corporate headquarters are located in San Francisco, California, and consist of approximately 420,000 square feet under lease agreements through May 31,\n\n2030. We maintain additional offices in multiple locations in the U.S. and internationally in Montreal, Canada, Munich, Germany and Minsk, Belarus.\n\nWe lease all of our facilities and do not own any real property. We believe our facilities are adequate and suitable for our current needs and that, should it be\n\nneeded, suitable additional or alternative space will be available to accommodate our operations.\n\n53\n\nItem 3. Legal Proceedings.\n\nSee discussion under the heading Legal Proceedings in Note 9 to the consolidated financial statements included in Part II, Item 8 of this report.\n\nItem 4. Mine Safety Disclosures.\n\nNot applicable.\n\n54\n\nPART II\n\nItem 5. Market for Registrant\u2019s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities.", "metadata": { - "text": "Restricted\n cash and cash equivalents consist primarily of amounts held in separate trust accounts and restricted bank accounts as collateral for insurance purposesand amounts pledged\n to secure certain letters of credit.Investments\nDebt Securities\nThe Companys accounting for its investme\nnts in debt securities is based on the legal form of the security, the Companys intended holding period for the security,and", - "title": "lyft_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 434, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 53, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "cbc7d77c-ca2f-5e66-89c9-706588902653", - "extraction_id": "1b94a71e-a37d-5270-ba32-11b728014017", + "fragment_id": "2abca1b8-f005-59dd-9716-adf883ec3aca", + "extraction_id": "edd4f1f9-f6c8-5341-a1e7-ce57cac7f2fb", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.20865452782457905, - "text": "Fax: \n212-701-5165 (iv)\nif to MSSF, in its capacity as a Lender, to it at: Morgan Stanley Senior Funding, Inc.\n1 New York Plaza, 41st Floor\nNew York, New York, 10004\nAttention: Agency Team\nFax: (212) 507-6680\n(iv) if to any other Lender or any other Issuing Bank, to it at its address (or telecopy number) set forth in its Administrative\nQuestionnaire.\n112", + "collection_ids": [], + "score": 0.19248970195712467, + "text": "(b)\n\nSolely for purposes of Article II and related definitional provisions to the extent used therein, the applicable amount of any\n\ncurrency (other than dollars) for purposes of the Loan Documents shall be such Dollar Equivalent amount as determined by the Administrative Agent and notified to the applicable Issuing Bank and the Borrower in accordance with Section 1.06(a). Amounts denominated in a Permitted Foreign Currency will be converted to dollars for the purposes of calculating the Senior Secured Net Leverage Ratio at the Exchange Rate as of the date of calculation.", "metadata": { - "text": "Fax: \n212-701-5165 (iv)\nif to MSSF, in its capacity as a Lender, to it at: Morgan Stanley Senior Funding, Inc.\n1 New York Plaza, 41st Floor\nNew York, New York, 10004\nAttention: Agency Team\nFax: (212) 507-6680\n(iv) if to any other Lender or any other Issuing Bank, to it at its address (or telecopy number) set forth in its Administrative\nQuestionnaire.\n112", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 1266, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 216, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "abb86b50-d271-57ef-a5a0-fafa06d4590b", - "extraction_id": "9dcfaa51-83d4-5119-9fab-c6591ca32cb0", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "fragment_id": "85474903-20cc-58e6-ad3c-a1b64de77557", + "extraction_id": "b69b89e5-48e1-526e-ba04-c9f5c0c56fa6", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.20697565376758598, - "text": "24,757 24,757 Money market deposit accoun\nts162 162 Term deposits\n6,506 6,506 Certificates of deposit\n481,154 213 (3) 481,364 Commercial paper\n469,193 57 (10) 469,240 Corporate bonds\n184,560 67 (26) 184,601 Total restricted cash equivale\nnts and investments1,166,332 337 (39) 1,166,630 Total unrestricted and \nrestricted cash equivalents and investments$ 3,283,497 $ 668 $ (104) $ 3,284,061 _______________\n(1)", + "collection_ids": [], + "score": 0.1894184407418502, + "text": "Interest, net of amount capitalized Income taxes, net of refunds\n\n$\n\nNon-cash investing and financing activities:\n\nConversion of redeemable convertible preferred stock to common stock upon initial public offering Conversion of convertible notes to common stock upon initial public offering Conversion of convertible notes to common stock related to Careem Finance lease obligations Common stock issued in connection with acquisitions Ownership interest received in exchange for divestitures Issuance of Careem Notes including the holdback amount\n\nThe accompanying notes are an integral part of these consolidated financial statements.\n\n81\n\n2019", "metadata": { - "text": "24,757 24,757 Money market deposit accoun\nts162 162 Term deposits\n6,506 6,506 Certificates of deposit\n481,154 213 (3) 481,364 Commercial paper\n469,193 57 (10) 469,240 Corporate bonds\n184,560 67 (26) 184,601 Total restricted cash equivale\nnts and investments1,166,332 337 (39) 1,166,630 Total unrestricted and \nrestricted cash equivalents and investments$ 3,283,497 $ 668 $ (104) $ 3,284,061 _______________\n(1)", - "title": "lyft_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 590, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 83, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "24dd1910-d09b-5af2-9608-542777d8cbca", - "extraction_id": "7b1e614f-bda0-5c9d-adc5-a8dd25658c4c", - "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "fragment_id": "a2f05c5a-0d43-538d-b4d0-ffd29d215437", + "extraction_id": "f1cb0bd8-0721-59ab-9e39-110efccf33dd", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.20446833968162503, - "text": "our future capital requirements;\nour ability to service our current and future debt, and counterparty risk with respect to our capped call transactions;\nour ability to make and successfully integrate acquisitions and investments or complete divestitures, joint ventures, partnerships or other strategic transactions;\nour tax liabilities, ability to use our net operating loss carryforwards and future changes in tax matters;Governance Risks and Risks related to Ownership o\nf our Capital Stock", + "collection_ids": [], + "score": 0.18724104762077332, + "text": "(1) the rate, or methodology for this rate, and conventions for this rate selected or recommendedby the Relevant Governmental Body\n\nfor determining compounded SOFR; provided that:\n\n(2) if, and to the extent that, the Administrative Agent determines that Compounded SOFRcannot\n\nbe determined in accordance with clause (1) above, then the rate, or methodology for this rate, and conventions for this rate that the Administrative Agent determines are substantially consistent with prevailing market convention for determining Compounded SOFR for U.S. dollar-denominated syndicated credit facilities at such time (as a result of amendment or as originally executed);", "metadata": { - "text": "our future capital requirements;\nour ability to service our current and future debt, and counterparty risk with respect to our capped call transactions;\nour ability to make and successfully integrate acquisitions and investments or complete divestitures, joint ventures, partnerships or other strategic transactions;\nour tax liabilities, ability to use our net operating loss carryforwards and future changes in tax matters;Governance Risks and Risks related to Ownership o\nf our Capital Stock", - "title": "lyft_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 1102, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 186, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "aa169f74-9fdb-5f76-ae71-33230ef0291b", - "extraction_id": "efee52c0-afbf-522e-a82b-1cbe07df4a66", + "fragment_id": "2654f646-222c-50af-bd1c-c7311e6a9dc9", + "extraction_id": "b25d210b-1b58-578a-b038-34f76d77f377", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.20252158030738998, - "text": "UK Financial Institution means any BRRD Undertaking (as such term is defined under the PRA Rulebook (as amended form time to time) promulgated by the United Kingdom Prudential Regulation Authority) or any person falling within IFPRU 11.6 of the FCA\nHandbook (as amended from time to time) promulgated by the United Kingdom Financial Conduct Authority, which includes certain credit\ninstitutions and investment firms, and certain affiliates of such credit institutions or investment firms.", + "collection_ids": [], + "score": 0.18122072927828292, + "text": "\u201cCanadian BA Rate Borrowing\u201d refers to a Borrowing bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n10\n\n\u201cCanadian BA Rate Loan\u201d refers to a Loan bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n\u201cCanadian Dollars\u201d means the lawful currency of Canada.\n\n\u201cCapital Lease Obligations\u201d of any Person means the obligations of such Person to pay rent or other amounts under any lease of (or", "metadata": { - "text": "UK Financial Institution means any BRRD Undertaking (as such term is defined under the PRA Rulebook (as amended form time to time) promulgated by the United Kingdom Prudential Regulation Authority) or any person falling within IFPRU 11.6 of the FCA\nHandbook (as amended from time to time) promulgated by the United Kingdom Financial Conduct Authority, which includes certain credit\ninstitutions and investment firms, and certain affiliates of such credit institutions or investment firms.", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 1085, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 182, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } }, { - "fragment_id": "129a19b8-9164-52f1-990e-0af926045c59", - "extraction_id": "b4000f00-a84f-51bd-abf8-4b6eff9f6ada", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "fragment_id": "309f729d-78eb-569a-837c-50367c20e898", + "extraction_id": "dfc368a6-efaf-5f4d-a20b-0fd6059a5f35", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.20240467786788896, - "text": "l executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.10", + "collection_ids": [], + "score": 0.1806427240371704, + "text": "principal payments on finance lease obligations for $35.5 million.\n\nCash provided by financing activities was $512.6 million for the year ended December 31, 2020, which primarily consisted of proceeds from issuance of our 2025\n\nNotes of $734.1 million offset by the purchase of the Capped Calls for $132.7 million.\n\nLiquidity and Capital Resources", "metadata": { - "text": "l executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.10", - "title": "uber_2021.pdf", - "associatedQuery": "What is the capital of France?" + "version": "v0", + "chunk_order": 531, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 71, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" } } ], - "kg_search_results": [] + "kg_search_results": null } }, "basic_rag": { "results": { "completion": { - "id": "chatcmpl-9vwee7AtlnR6nxgbSG60VOpb9bxtE", + "id": "chatcmpl-A9ggQHI4VJvvkrhyrkBwUtQQ26Ab2", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": null, "message": { - "content": "Uber did not make a profit in 2020. Instead, it reported a net loss attributable to Uber Technologies, Inc. of $6,768 million [4], [5], [9].", + "content": "Uber did not make a profit in 2020. Instead, Uber reported a net loss attributable to Uber Technologies, Inc. of $6.8 billion for the year ended December 31, 2020 [3].", "refusal": null, "role": "assistant", "function_call": null, @@ -164,157 +234,230 @@ } } ], - "created": 1723597192, + "created": 1726872390, "model": "gpt-4o-2024-05-13", "object": "chat.completion", "service_tier": null, - "system_fingerprint": "fp_3aa7262c27", + "system_fingerprint": "fp_3537616b13", "usage": { - "completion_tokens": 40, - "prompt_tokens": 1483, - "total_tokens": 1523 + "completion_tokens": 45, + "prompt_tokens": 2320, + "total_tokens": 2365, + "completion_tokens_details": { + "reasoning_tokens": 0 + } } }, "search_results": { "vector_search_results": [ { - "fragment_id": "13a12fc0-cbce-5e35-b179-d413c15179cb", - "extraction_id": "2b8ff2e9-c135-573d-bf8a-7a2db60a0a11", + "fragment_id": "07aa09c5-81a8-5a48-953a-532064a446f8", + "extraction_id": "d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.744923337991438, - "text": "Net\n loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of ourATG\n Business to Aurora, a $1.6 billion pre-tax net benefit relating to Ubers equity investments, as well as reductions in our fixed cost structure and increasedvariable cost effi\nciencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.Adjusted", + "collection_ids": [], + "score": 0.7445549521115464, + "text": "Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\n\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.", "metadata": { - "text": "Net\n loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of ourATG\n Business to Aurora, a $1.6 billion pre-tax net benefit relating to Ubers equity investments, as well as reductions in our fixed cost structure and increasedvariable cost effi\nciencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.Adjusted", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 445, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 53, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "26fc1632-7d88-5107-91a6-db2ffda6d5eb", - "extraction_id": "bb7ea09e-04a8-5d9d-8133-9408ba51dc0b", + "fragment_id": "2044e305-c042-5f0d-b05d-a2b97181f7a8", + "extraction_id": "4329441a-5faf-5e9d-801f-ebd753ee1bd3", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.671538352966309, - "text": "See the section titled Reconciliations of Non-GAAP Financial Measures for our definition and a reconciliation of net loss attributable toUber Technologies, Inc. to Adjusted EBITDA.\nYear Ended December 31,\n(In millions, except percentages)\n2020 2021 2020 to 2021 %Change\nAdjusted EBITDA\n$ (2,528) $ (774) 69 %2021 Compared to 2020\nAdjusted\n EBITDA loss improved $1.8 billion, or 69%, primarily attributable to a $525 million improvement in Delivery Adjusted EBITDA loss, a $427million", + "collection_ids": [], + "score": 0.7071289420127869, + "text": "Total costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n100 %\n\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \u2014 % (61)% \u2014 % (61)%\n\n100 %\n\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \u2014 % (3)% \u2014 % (3)%\n\nNet loss attributable to Uber Technologies, Inc.\n\n(1)\n\nTotals of percentage of revenues may not foot due to rounding.\n\nComparison of the Years Ended December 31, 2020 and 2021\n\nRevenue\n\nYear Ended December 31,\n\n(In millions, except percentages)\n\n2020\n\n2021\n\n2020 to 2021 % Change\n\nRevenue\n\n$\n\n11,139 $\n\n17,455\n\n57 %", "metadata": { - "text": "See the section titled Reconciliations of Non-GAAP Financial Measures for our definition and a reconciliation of net loss attributable toUber Technologies, Inc. to Adjusted EBITDA.\nYear Ended December 31,\n(In millions, except percentages)\n2020 2021 2020 to 2021 %Change\nAdjusted EBITDA\n$ (2,528) $ (774) 69 %2021 Compared to 2020\nAdjusted\n EBITDA loss improved $1.8 billion, or 69%, primarily attributable to a $525 million improvement in Delivery Adjusted EBITDA loss, a $427million", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 463, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 57, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "fc79cf57-a508-5781-874e-afca763f2efd", - "extraction_id": "cdf49309-5eeb-58df-b62f-15a2fe7444db", + "fragment_id": "3840834b-7c74-5417-9252-9080e609fb2f", + "extraction_id": "cf934fe1-926d-5525-a230-30946961cf28", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.667392492294312, - "text": "24 59 83 Other income (expense), net\n$ 722 $ (1,625) $ 3,292 During\n the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operationsto Zomato\n recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (Sennder) recognized inthe fourth quarter of 2020, pa", + "collection_ids": [], + "score": 0.6809690201571295, + "text": "Year Ended December 31, 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\n\nTotal costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n$\n\n13,000 $\n\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\n\n$\n\nBasic\n\n$\n\n(6.81) $\n\n(3.86) $", "metadata": { - "text": "24 59 83 Other income (expense), net\n$ 722 $ (1,625) $ 3,292 During\n the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operationsto Zomato\n recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (Sennder) recognized inthe fourth quarter of 2020, pa", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 574, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 77, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "8d585c42-a618-50e0-be1d-709f94978ace", - "extraction_id": "b314fcfb-f0d2-5344-8397-8904f96282c7", + "fragment_id": "e2a37b27-0644-59e4-9746-37d48592a299", + "extraction_id": "6b86ac2f-ce33-5126-83e6-a8731ea677c8", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.657597821045186, - "text": "The following table presents a reconciliation of net loss attributable to Uber Technologies, Inc., the most directly comparable GAAP financial measure, toAdjusted EBITDA for each of the periods ind\nicated:Year Ended December 31,\n(In millions)\n2020 2021 Adjusted EBITDA reconciliation:\nNet loss attributable to Ube\nr Technologies, Inc.$ (6,768) $ (496) Add (deduct):\nNet loss attributable to non\n-controlling interests, net of tax(20) (74) Provision for (benefit fro", + "collection_ids": [], + "score": 0.6805637085605776, + "text": "Less: net loss attributable to non-controlling interests, net of tax\n\n$\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\n17,455\n\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\n\nNet loss attributable to Uber Technologies, Inc.\n\n$\n\n54\n\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\n\n(1)\n\n:\n\nYear Ended December 31, 2021 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization", "metadata": { - "text": "The following table presents a reconciliation of net loss attributable to Uber Technologies, Inc., the most directly comparable GAAP financial measure, toAdjusted EBITDA for each of the periods ind\nicated:Year Ended December 31,\n(In millions)\n2020 2021 Adjusted EBITDA reconciliation:\nNet loss attributable to Ube\nr Technologies, Inc.$ (6,768) $ (496) Add (deduct):\nNet loss attributable to non\n-controlling interests, net of tax(20) (74) Provision for (benefit fro", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 462, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 56, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "e11e0d27-9bb6-5db1-97ba-cbb7f082f4f7", - "extraction_id": "2b8ff2e9-c135-573d-bf8a-7a2db60a0a11", + "fragment_id": "de30c3c9-cdfd-5872-bdaf-4859bef5c3a8", + "extraction_id": "33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.627140164375305, - "text": "Financial and Operational HighlightsYear Ended December 31,\nConstant Currency (In millions, except percentages)\n2020 2021 2020 to 2021 %Change\n2020 to 2021 % Change Monthly Active Platform Consumers \n(MAPCs) 93 118 27 %Trips \n5,025 6,368 27 %Gross Bookings \n$ 57,897 $ 90,415 56 %53 % Revenue\n$ 11,139 $ 17,455 57 %54 % Net loss attributable to Ube\nr Technologies, Inc. $ (6,768) $ (496) 93 %Mobility Adjusted EBITDA\n$ 1,169 $ 1,596 37 %Delivery Adjusted EBITDA\n$ (873) $ (348) 60 %Adjusted EBITDA", + "collection_ids": [], + "score": 0.6537506580352783, + "text": "The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\n\nusers to facilitate the completion of a successful transaction.\n\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.", "metadata": { - "text": "Financial and Operational HighlightsYear Ended December 31,\nConstant Currency (In millions, except percentages)\n2020 2021 2020 to 2021 %Change\n2020 to 2021 % Change Monthly Active Platform Consumers \n(MAPCs) 93 118 27 %Trips \n5,025 6,368 27 %Gross Bookings \n$ 57,897 $ 90,415 56 %53 % Revenue\n$ 11,139 $ 17,455 57 %54 % Net loss attributable to Ube\nr Technologies, Inc. $ (6,768) $ (496) 93 %Mobility Adjusted EBITDA\n$ 1,169 $ 1,596 37 %Delivery Adjusted EBITDA\n$ (873) $ (348) 60 %Adjusted EBITDA", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 642, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 90, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "4cc24e78-c6c5-56a6-9a2c-c361a1172116", - "extraction_id": "2b6f0786-5dc4-5581-bb83-b63f1175111a", + "fragment_id": "4b6ee3eb-daca-5930-bafe-946cad56cdcc", + "extraction_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.6232549323591561, - "text": "ments(34) (34) (37) Net loss including non-controlling interests\n(8,512) (6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(6) (20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (8,506) $ (6,768) $ (496) Net loss per share attributable to Uber Technologies, Inc. co\nmmon stockholders:Basic\n$ (6.81) $ (3.86) $ (0.26) Diluted\n$ (6.81) $ (3.86) $ (0.29) Weighted-average shares used to compute net loss per shar\ne attributable to common stockholders:Basic", + "collection_ids": [], + "score": 0.6387766599655151, + "text": "Other income (expense), net\n\n$\n\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\u201cSennder\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\n\n(1)\n\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora", "metadata": { - "text": "ments(34) (34) (37) Net loss including non-controlling interests\n(8,512) (6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(6) (20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (8,506) $ (6,768) $ (496) Net loss per share attributable to Uber Technologies, Inc. co\nmmon stockholders:Basic\n$ (6.81) $ (3.86) $ (0.26) Diluted\n$ (6.81) $ (3.86) $ (0.29) Weighted-average shares used to compute net loss per shar\ne attributable to common stockholders:Basic", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 799, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 118, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "3b9425c9-5147-5559-ac48-2d32a5f2fa50", - "extraction_id": "e0668732-8b42-53c5-a719-39f72634e380", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "fragment_id": "af5f93d3-8b2f-5e71-a358-0dd56c2f68ac", + "extraction_id": "acf12622-2e6e-5234-9768-ba448294a81d", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.62315821647644, - "text": "issued to new investors in close proximity to the time of our transaction with Zomato. The transaction resulted in a gain on disposal of $154 millionrecognized\n in other income (expense), net in the consolidated statements of operations during the first quarter of 2020. The income tax effect of the sale was notmaterial.\n The divestiture of Uber Eats India did not represent a strategic shift that would have had a major effect on our operations and financial results, andtherefore does not quali", + "collection_ids": [], + "score": 0.6321083903312683, + "text": "2019\n\n100.0 %\n\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \u2014 2.8 (71.9) 0.1 (72.0)%\n\n2019 to 2020 % Change\n\n(35) %\n\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\n\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\n\nCost of Revenue\n\n2021", "metadata": { - "text": "issued to new investors in close proximity to the time of our transaction with Zomato. The transaction resulted in a gain on disposal of $154 millionrecognized\n in other income (expense), net in the consolidated statements of operations during the first quarter of 2020. The income tax effect of the sale was notmaterial.\n The divestiture of Uber Eats India did not represent a strategic shift that would have had a major effect on our operations and financial results, andtherefore does not quali", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 493, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 63, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "7d297996-f186-5f08-aa4f-2ff30ad8cc6f", - "extraction_id": "eeadc634-c141-5748-98d0-4c89aa5404b1", + "fragment_id": "d5379124-e7ff-509f-b47f-a79152eec2d4", + "extraction_id": "2562b865-e4df-5376-9e70-927be9afbb7e", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.6223304271698, - "text": "is presented in the following tables for the years ended December 31, 2019, 2020 and 2021, respectively (in millions):Year Ended December 31,\n2019\n2020 2021 Mobility revenue \n$ 10,707 $ 6,089 $ 6,953 Delivery revenue \n1,401 3,904 8,362 Freight revenue\n731 1,011 2,132 All Other revenue\n161 135 8 Total revenue\n$ 13,000 $ 11,139 $ 17,455 We\n offer subscription memberships to end-users including Uber One, Uber Pass, Rides Pass, and Eats Pass (Subscription). We recognize Subscriptionfees", + "collection_ids": [], + "score": 0.63012705682194, + "text": "For additional discussion, see the risk factor titled \u201c\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\n\nAdditional Information\n\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\n\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\n\n10", "metadata": { - "text": "is presented in the following tables for the years ended December 31, 2019, 2020 and 2021, respectively (in millions):Year Ended December 31,\n2019\n2020 2021 Mobility revenue \n$ 10,707 $ 6,089 $ 6,953 Delivery revenue \n1,401 3,904 8,362 Freight revenue\n731 1,011 2,132 All Other revenue\n161 135 8 Total revenue\n$ 13,000 $ 11,139 $ 17,455 We\n offer subscription memberships to end-users including Uber One, Uber Pass, Rides Pass, and Eats Pass (Subscription). We recognize Subscriptionfees", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 77, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 12, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "5d2bb46e-d9ad-5cdc-9658-09e8e6f9bd4d", - "extraction_id": "2a3a3cef-bc9d-5c62-9691-14d892c521c9", + "fragment_id": "98a93be5-13ba-5bd6-9a18-e7ceef0fae88", + "extraction_id": "8ab931e3-8f47-5598-90b8-928f387ec256", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.6163432230744841, - "text": "(458) (483) Other income (expense), net\n(1,625) 3,292 Loss before income taxes and loss from equity me\nthod investments(6,946) (1,025) Provision for (benefit fro\nm) income taxes(192) (492) Loss from equity method invest\nments(34) (37) Net loss including non-controlling interests\n(6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (6,768) $ (496) 54", + "collection_ids": [], + "score": 0.6285917009601995, + "text": "Uber Technologies, Inc. (\u201cUber,\u201d \u201cwe,\u201d \u201cour,\u201d or \u201cus\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\u201cplatform(s)\u201d or \u201cPlatform(s)\u201d). Uber connects consumers (\u201cRider(s)\u201d) with independent providers of ride services (\u201cMobility Driver(s)\u201d) for ridesharing services, and connects Riders and other consumers (\u201cEaters\u201d) with restaurants, grocers and other stores (collectively, \u201cMerchants\u201d) with delivery service providers (\u201cCouriers\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \u201cend-user(s)\u201d or \u201cconsumer(s).\u201d Mobility Drivers and Couriers are collectively referred to as \u201cDriver(s).\u201d Uber also connects consumers with public", "metadata": { - "text": "(458) (483) Other income (expense), net\n(1,625) 3,292 Loss before income taxes and loss from equity me\nthod investments(6,946) (1,025) Provision for (benefit fro\nm) income taxes(192) (492) Loss from equity method invest\nments(34) (37) Net loss including non-controlling interests\n(6,788) (570) Less: net loss attributab\nle to non-controlling interests, net of tax(20) (74) Net loss attributable to Uber Technologies, Inc.\n$ (6,768) $ (496) 54", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 592, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 84, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } }, { - "fragment_id": "b89e13fa-4d7a-5b0a-87db-d51de1ce777f", - "extraction_id": "45650893-257d-5ae5-8a35-35edd54f7699", + "fragment_id": "d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5", + "extraction_id": "8361bf60-bce2-56c2-b982-376a75e47d58", "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.606526911258698, - "text": "UBER TECHNOLOGIES, INC.NOTES TO CONSOLIDATED \nFINANCIAL STATEMENTSNote 1 Description of Business and Summary of Significant Accounting Policies\nDescription of Business\nUber\n Technologies, Inc. (Uber, we, our, or us) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber isa\n technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B.Uber", + "collection_ids": [], + "score": 0.6283430678639979, + "text": "Year Ended December 31, 2020\n\n1,000 49 1,189 (27) \u2014 (138) (34) 8,939 (4) 3,824\n\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\n\n8,209 34 12,067 $\n\n12,067 (349) 7,391 $\n\n332 $ 133\n\n412 $ 82\n\n14,224 4,229 \u2014 251 9 \u2014 \u2014\n\n\u2014 \u2014 \u2014 196 3,898 171 1,634\n\n2021\n\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\n\n7,391 349 7,805\n\n449 87\n\n\u2014 \u2014 232 184 1,868 1,018 \u2014\n\nUBER TECHNOLOGIES, INC.\n\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\n\nNote 1 \u2013 Description of Business and Summary of Significant Accounting Policies\n\nDescription of Business", "metadata": { - "text": "UBER TECHNOLOGIES, INC.NOTES TO CONSOLIDATED \nFINANCIAL STATEMENTSNote 1 Description of Business and Summary of Significant Accounting Policies\nDescription of Business\nUber\n Technologies, Inc. (Uber, we, our, or us) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber isa\n technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B.Uber", - "title": "uber_2021.pdf", - "associatedQuery": "What was Uber's profit in 2020?" + "version": "v0", + "chunk_order": 591, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 83, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" } } ], @@ -325,14 +468,14 @@ "hybrid_rag": { "results": { "completion": { - "id": "chatcmpl-9vwegy0zTZ75kT3Y52UopJ7QYeFFi", + "id": "chatcmpl-A9ggSfpp9sr3LZdfzBYse7JjT8eCK", "choices": [ { "finish_reason": "stop", "index": 0, "logprobs": null, "message": { - "content": "John Snow, referred to as Jon Snow in the context, is Ned Stark\u2019s bastard son. Since Catelyn is not his mother, he is not considered a proper member of the Stark family, which often makes him feel like an outsider. He is also a highly capable swordsman and thinker, known for his piercing observations [1].", + "content": "John Snow is Ned Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].", "refusal": null, "role": "assistant", "function_call": null, @@ -340,197 +483,225 @@ } } ], - "created": 1723597194, + "created": 1726872392, "model": "gpt-4o-2024-05-13", "object": "chat.completion", "service_tier": null, - "system_fingerprint": "fp_3aa7262c27", + "system_fingerprint": "fp_3537616b13", "usage": { - "completion_tokens": 68, - "prompt_tokens": 1147, - "total_tokens": 1215 + "completion_tokens": 57, + "prompt_tokens": 1810, + "total_tokens": 1867, + "completion_tokens_details": { + "reasoning_tokens": 0 + } } }, "search_results": { "vector_search_results": [ { - "fragment_id": "378da9e2-4ba7-503f-b3a5-bbb0c3095f55", + "fragment_id": "7cbdab86-1689-5779-81bd-62f7eb3ab36d", "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0476190476190476, - "text": "Jon Snow\nNed Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\nTyrion (The Imp) Lannister\nA small man with a giant intellect and sharp tongue. Tyrion does not pity himself but rather accepts his shortcomings as a little person and turns them to his advantage. He loves his family but recognizes their greed and ambition.", + "collection_ids": [], + "score": 0.26240772008895874, + "text": "Eddard (Ned) Stark\n\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\n\nCatelyn (Cat) Tully\n\nNed\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\n\nDaenerys Stormborn Targaryen\n\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\n\nJon Snow\n\nNed Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.", "metadata": { - "text": "Jon Snow\nNed Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\nTyrion (The Imp) Lannister\nA small man with a giant intellect and sharp tongue. Tyrion does not pity himself but rather accepts his shortcomings as a little person and turns them to his advantage. He loves his family but recognizes their greed and ambition.", - "title": "got.txt", - "semantic_score": 0.289943260553881, - "semantic_rank": 1, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 0, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "2a4f2509-a218-5919-b5b1-6018760d5c66", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "fragment_id": "8e563fb1-4665-53a8-8a83-63a1f88e2aea", + "extraction_id": "f6bc23b5-bc80-5e49-9b55-25e9abe97073", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0454545454545455, - "text": "Maester Aemon\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\u2019s Watch ahead of any family loyalties.\nSamwell (Sam) Tarly", + "collection_ids": [], + "score": 0.2610799748027318, + "text": "Jeor Mormont (Commander Mormont)\n\nLord Commander of the Night\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \u201cThe Old Bear.\u201d\n\nMaester Aemon\n\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\u2019s Watch ahead of any family loyalties.\n\nSamwell (Sam) Tarly\n\nA new recruit to the Night\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\u2019s closest companions at the Wall.\n\nSer Allister Thorne\n\nCastle Black\u2019s resentful master-at-arms. He hard on the new recruits to the Night\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\u2019s rebellion against the former king, he was a Targaryen loyalist.", "metadata": { - "text": "Maester Aemon\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\u2019s Watch ahead of any family loyalties.\nSamwell (Sam) Tarly", - "title": "got.txt", - "semantic_score": 0.285634236671582, - "semantic_rank": 2, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 7, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "2acc7019-c9dc-5ff9-bcaa-6df98f2831d9", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "fragment_id": "eaf48cfe-592e-55fa-9f07-613a4f221c45", + "extraction_id": "fdf6127b-e623-58bc-a50b-b7e7b040c03a", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0434782608695652, - "text": "Samwell (Sam) Tarly\nA new recruit to the Night\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\u2019s closest companions at the Wall.\nSer Allister Thorne\nCastle Black\u2019s resentful master-at-arms. He hard on the new recruits to the Night\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\u2019s rebellion against the former king, he was a Targaryen loyalist.\nIllyrio Mopatis", + "collection_ids": [], + "score": 0.2572833588977643, + "text": "Varys (The Spider)\n\nThe Red Keep\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\n\nRobert Baratheon\n\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\u2019s sister until she died.\n\nSer Jorah Mormont\n\nAn exiled knight who serves unofficially as Daenerys\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.", "metadata": { - "text": "Samwell (Sam) Tarly\nA new recruit to the Night\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\u2019s closest companions at the Wall.\nSer Allister Thorne\nCastle Black\u2019s resentful master-at-arms. He hard on the new recruits to the Night\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\u2019s rebellion against the former king, he was a Targaryen loyalist.\nIllyrio Mopatis", - "title": "got.txt", - "semantic_score": 0.264474719762802, - "semantic_rank": 3, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 3, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "7f3f6e6f-9ec5-5ae0-8879-e89c5a892fcc", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "fragment_id": "3cb1c2db-01e4-5ea8-a39f-31f5949637f8", + "extraction_id": "02b64e7c-5aa5-5380-8fa0-3d8b64866aa8", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0416666666666667, - "text": "Ser Jorah Mormont\nAn exiled knight who serves unofficially as Daenerys\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\nViserys Targaryen", + "collection_ids": [], + "score": 0.20903720205800558, + "text": "Aerys II Targaryen\n\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\u2019s older brother, Brandon Stark, in the Red Keep\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\n\nRhaegar Targaryen\n\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\u2019s betrothed, helping to set in motion the events that led to Robert\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\n\nJon Arryn\n\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.", "metadata": { - "text": "Ser Jorah Mormont\nAn exiled knight who serves unofficially as Daenerys\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\nViserys Targaryen", - "title": "got.txt", - "semantic_score": 0.259267091751099, - "semantic_rank": 4, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 10, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "b732d643-eb13-5ed3-a31b-ae2ef7877428", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", - "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "fragment_id": "ac15f806-8723-5fe7-832d-ed0427bd3550", + "extraction_id": "416b07ed-cdd6-51fd-8f54-4164c0160860", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.04, - "text": "Daenerys Stormborn Targaryen\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\nJon Snow", + "collection_ids": [], + "score": 0.19556865096092224, + "text": "\u201cSanctions\u201d means economic or financial sanctions or trade embargoes imposed, administered or enforced from time to time by (a)\n\nthe U.S. government, including those administered by the Office of Foreign Assets Control of the U.S. Department of the Treasury or the U.S. Department of State, or (b) the United Nations Security Council, the European Union, any European Union member state, Her Majesty\u2019s Treasury of the United Kingdom or other relevant sanctions authority.\n\n\u201cSARON\u201d means, with respect to any Business Day, a rate per annum equal to the Swiss Average Rate Overnight for such Business\n\nDay published by the SARON Administrator on the SARON Administrator\u2019s Website.\n\n\u201cSARON Administrator\u201d means the SIX Swiss Exchange AG (or any successor administrator of the Swiss Average Rate", "metadata": { - "text": "Daenerys Stormborn Targaryen\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\nJon Snow", - "title": "got.txt", - "semantic_score": 0.253841363237027, - "semantic_rank": 5, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 1221, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 208, + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "2252087b-54d3-5df8-9e0f-f3718e7f42e3", - "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "fragment_id": "5d09c80f-ba83-5204-a6b3-f08831e150b0", + "extraction_id": "f86a905c-8d82-52ff-ad72-a800ca3af6f4", "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0384615384615385, - "text": "Robert Baratheon\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\u2019s sister until she died.\nSer Jorah Mormont", + "collection_ids": [], + "score": 0.19119779765605927, + "text": "Illyrio Mopatis\n\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\n\nSer Barristan Selmy\n\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \u201cThe Bold\u201d is a formidable fighter. He is, and has always been, an honorable knight.\n\nRenly Baratheon\n\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\u2019s royal council.\n\nStannis Baratheon\n\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.", "metadata": { - "text": "Robert Baratheon\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\u2019s sister until she died.\nSer Jorah Mormont", - "title": "got.txt", - "semantic_score": 0.23582898080349, - "semantic_rank": 6, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 8, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "fe72ba41-30c1-5b96-b0ce-d6eebf137d38", - "extraction_id": "9caef060-01ee-51f0-a342-4852adb0ad43", - "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "fragment_id": "b47050a2-9906-5922-b6d4-52e4dedb499f", + "extraction_id": "d436c7f7-d7c7-509e-a383-94a94360e601", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.037037037037037, - "text": "allowed them to work on problems they only half understood (they\nwere literally described as \"clowns\" by one eminent insider), and\ntheir impatience and competitiveness got them to the answer ahead\nof two other groups that would otherwise have found it within the\nnext year, if not the next several months.\n[2]The idea that there could be fierce nerds is an unfamiliar one not\njust to many normal people but even to some young nerds. Especially\nearly on, nerds spend so much of their time in ordinary social", + "collection_ids": [], + "score": 0.18726881596221867, + "text": "121", "metadata": { - "text": "allowed them to work on problems they only half understood (they\nwere literally described as \"clowns\" by one eminent insider), and\ntheir impatience and competitiveness got them to the answer ahead\nof two other groups that would otherwise have found it within the\nnext year, if not the next several months.\n[2]The idea that there could be fierce nerds is an unfamiliar one not\njust to many normal people but even to some young nerds. Especially\nearly on, nerds spend so much of their time in ordinary social", - "title": "pg_essay_2.html", - "semantic_score": 0.234802625620444, - "semantic_rank": 7, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 1719, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 293, + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "d7ed83c7-0a72-5346-bca9-dc1cb0e956ea", - "extraction_id": "8ee4678a-1fa8-58a6-8d6f-ec88cf10a6dc", - "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "fragment_id": "d143b410-f9a4-5f3b-bb46-fb412eda8201", + "extraction_id": "37f940e2-18f4-50f5-93aa-cec422fc9211", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0357142857142857, - "text": "Structure of Scientific Revolutions, but I also recommend his\nCopernican Revolution, where you can see him at work developing the\nidea.[4]\nThis is one reason people with a touch of Asperger's may have\nan advantage in discovering new ideas. They're always flying on\ninstruments.[5]\nHall, Rupert. From Galileo to Newton. Collins, 1963. This\nbook is particularly good at getting into contemporaries' heads.Thanks to Trevor Blackwell, Patrick Collison, Suhail Doshi, Daniel", + "collection_ids": [], + "score": 0.18609081208705902, + "text": "Sandor (The Hound) Clegane\n\nPrince Joff\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\n\nRobb Stark\n\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\n\nMaester Luwin\n\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\n\nTheon Greyjoy\n\nThe Starks\u2019s ward and Robb\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.", "metadata": { - "text": "Structure of Scientific Revolutions, but I also recommend his\nCopernican Revolution, where you can see him at work developing the\nidea.[4]\nThis is one reason people with a touch of Asperger's may have\nan advantage in discovering new ideas. They're always flying on\ninstruments.[5]\nHall, Rupert. From Galileo to Newton. Collins, 1963. This\nbook is particularly good at getting into contemporaries' heads.Thanks to Trevor Blackwell, Patrick Collison, Suhail Doshi, Daniel", - "title": "pg_essay_3.html", - "semantic_score": 0.230954363942146, - "semantic_rank": 8, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 5, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "0de20d43-9e29-5686-9033-3e184a60cf6a", - "extraction_id": "06e9714f-4805-5dc8-a72f-61ecd0241b49", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "fragment_id": "09f57c70-e7c6-548b-897f-fb8e9aba31c8", + "extraction_id": "a021aa95-14d9-5301-9252-b06bcb852956", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0344827586206897, - "text": "to economic, social, weather, and regulatory conditions or other circumstances in each of these large metropolitan areas. Outbreaks of contagiousdiseases\n or other viruses, such as COVID-19, could lead to a sustained decline in the desirability of living, working and congregating in metropolitan areas inwhich we operate. Any short-term o\nr long-term shifts in the travel patterns of consumers away from metropolitan areas, due to health concerns regarding epidemicsor", + "collection_ids": [], + "score": 0.17496788948965758, + "text": "Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled", "metadata": { - "text": "to economic, social, weather, and regulatory conditions or other circumstances in each of these large metropolitan areas. Outbreaks of contagiousdiseases\n or other viruses, such as COVID-19, could lead to a sustained decline in the desirability of living, working and congregating in metropolitan areas inwhich we operate. Any short-term o\nr long-term shifts in the travel patterns of consumers away from metropolitan areas, due to health concerns regarding epidemicsor", - "title": "uber_2021.pdf", - "semantic_score": 0.22635568537448, - "semantic_rank": 9, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 276, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 36, + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" } }, { - "fragment_id": "78e0fcac-92e2-571f-a5bd-54d8c6da69ee", - "extraction_id": "8ee4678a-1fa8-58a6-8d6f-ec88cf10a6dc", - "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "fragment_id": "dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2", + "extraction_id": "14b08757-0819-5105-af37-509686dd6d01", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.0333333333333333, - "text": "completely as soon as it was discovered \u2014 that whatever the church thought\nof the heliocentric model, astronomers must have been convinced as\nsoon as Copernicus proposed it. Far, in fact, from it. Copernicus\npublished the heliocentric model in 1532, but it wasn't till the\nmid seventeenth century that the balance of scientific opinion\nshifted in its favor.\n[5]Few understand how feeble new ideas look when they first appear.\nSo if you want to have new ideas yourself, one of the most valuable", + "collection_ids": [], + "score": 0.16958434879779816, + "text": "s, drivers, and the communities they serve.", "metadata": { - "text": "completely as soon as it was discovered \u2014 that whatever the church thought\nof the heliocentric model, astronomers must have been convinced as\nsoon as Copernicus proposed it. Far, in fact, from it. Copernicus\npublished the heliocentric model in 1532, but it wasn't till the\nmid seventeenth century that the balance of scientific opinion\nshifted in its favor.\n[5]Few understand how feeble new ideas look when they first appear.\nSo if you want to have new ideas yourself, one of the most valuable", - "title": "pg_essay_3.html", - "semantic_score": 0.224857065098637, - "semantic_rank": 10, - "full_text_score": 0.0, - "full_text_rank": null, - "associatedQuery": "Who is John Snow?" + "version": "v0", + "chunk_order": 77, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 13, + "partitioned_by_unstructured": true, + "unstructured_is_continuation": true, + "associated_query": "Who is John Snow?" } } ], @@ -544,7 +715,7 @@ "choices": [ { "message": { - "content": "\"{\\\"fragment_id\\\":\\\"14745860-73d2-5d40-81aa-9f7085c52d25\\\",\\\"extraction_id\\\":\\\"a1ba66bf-a034-5cc6-b88b-0df6f1c38dbc\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.761440396308899,\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Operations\\\\n(in thousands, except for per share data\\\\n) Year Ended December 31,\\\\n2021\\\\n2020 2019 Revenue\\\\n$ 3,208,323 $ 2,364,681 $ 3,615,960 Costs and expenses\\\\nCost of revenue\\\\n1,649,532 1,447,516 2,176,469 Operations and support\\\\n402,233 453,963 636,116 Research and development\\\\n911,946 909,126 1,505,640 Sales and marketing\\\\n411,406 416,331 814,122 General and administrative\\\\n915,638 946,127 1,186,093 Total costs and expenses\\\\n4,290,755 4,173,063 6,318,440 Loss from operations\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Operations\\\\n(in thousands, except for per share data\\\\n) Year Ended December 31,\\\\n2021\\\\n2020 2019 Revenue\\\\n$ 3,208,323 $ 2,364,681 $ 3,615,960 Costs and expenses\\\\nCost of revenue\\\\n1,649,532 1,447,516 2,176,469 Operations and support\\\\n402,233 453,963 636,116 Research and development\\\\n911,946 909,126 1,505,640 Sales and marketing\\\\n411,406 416,331 814,122 General and administrative\\\\n915,638 946,127 1,186,093 Total costs and expenses\\\\n4,290,755 4,173,063 6,318,440 Loss from operations\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"14478ac4-8f43-5f09-bbe3-7b1ddfdece40\\\",\\\"extraction_id\\\":\\\"12cff898-f0fe-59b8-929b-00d3d56fd73d\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.69296767157509,\\\"text\\\":\\\"was attributable to the divestiture of certain assets related to the Level 5 division and how much was attributable to the commercial agreements for theutilization\\\\n of Lyft rideshare and fleet data. For the year ended December 31, 2021, the Company recognized a $119.3 million pre-tax gain for the divestiture of certainassets\\\",\\\"metadata\\\":{\\\"text\\\":\\\"was attributable to the divestiture of certain assets related to the Level 5 division and how much was attributable to the commercial agreements for theutilization\\\\n of Lyft rideshare and fleet data. For the year ended December 31, 2021, the Company recognized a $119.3 million pre-tax gain for the divestiture of certainassets\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"1a76991f-fa85-59b2-b522-700d47b2d809\\\",\\\"extraction_id\\\":\\\"0773cd62-b39f-517f-b6f1-be788b38374d\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.683727383613586,\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Cash Flows\\\\n(in thousands)\\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Cash flows from operating activities\\\\nNet loss\\\\n$ (1,009,359) $ (1,752,857) $ (2,602,241) Adjustments to reconcile net loss to net cash used in operating activities\\\\nDepreciation and amortization\\\\n139,347 157,353 108,429 Stock-based compensation\\\\n724,560 565,807 1,599,311 Amortization of premium on marketable securities\\\\n4,100 6,461 597 Accretion of discount on marketable securities\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Cash Flows\\\\n(in thousands)\\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Cash flows from operating activities\\\\nNet loss\\\\n$ (1,009,359) $ (1,752,857) $ (2,602,241) Adjustments to reconcile net loss to net cash used in operating activities\\\\nDepreciation and amortization\\\\n139,347 157,353 108,429 Stock-based compensation\\\\n724,560 565,807 1,599,311 Amortization of premium on marketable securities\\\\n4,100 6,461 597 Accretion of discount on marketable securities\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"bfc29c59-2925-5621-951a-645df5c892b2\\\",\\\"extraction_id\\\":\\\"94c562ae-c5cc-5622-b91c-34a0effcb6e1\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.671545108304772,\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Comprehensive Loss\\\\n(in thousands) \\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Net loss\\\\n$ (1,009,359) $ (1,752,857) $ (2,602,241) Other comprehensive incom\\\\ne (loss)Foreign currency transla\\\\ntion adjustment(931) (2,187) 162 Unrealized gain (loss) on m\\\\narketable securities, net of taxes(1,107) (1,011) 2,430 Other comprehensive incom\\\\ne (loss)(2,038) (3,198) 2,592 Comprehensive loss\\\\n$ (1,011,397) $ (1,756,055) $ (2,599,649) The accompanying notes are\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Comprehensive Loss\\\\n(in thousands) \\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Net loss\\\\n$ (1,009,359) $ (1,752,857) $ (2,602,241) Other comprehensive incom\\\\ne (loss)Foreign currency transla\\\\ntion adjustment(931) (2,187) 162 Unrealized gain (loss) on m\\\\narketable securities, net of taxes(1,107) (1,011) 2,430 Other comprehensive incom\\\\ne (loss)(2,038) (3,198) 2,592 Comprehensive loss\\\\n$ (1,011,397) $ (1,756,055) $ (2,599,649) The accompanying notes are\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2\\\",\\\"extraction_id\\\":\\\"14b08757-0819-5105-af37-509686dd6d01\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.6639652450074329,\\\"text\\\":\\\"Lyft, Inc.Consolidated Balance Sheets\\\\n(in thousands, except for share and per share da\\\\nta) December 31,\\\\n2021\\\\n2020 Assets\\\\nCurrent assets\\\\nCash and cash equivalents\\\\n$ 457,325 $ 319,734 Short-term investments\\\\n1,796,533 1,931,334 Prepaid expenses and other cur\\\\nrent assets522,212 343,070 Total current assets\\\\n2,776,070 2,594,138 Restricted cash and cash equ\\\\nivalents73,205 118,559 Restricted investments\\\\n1,044,855 1,101,712 Other investments\\\\n80,411 10,000 Property and equipment, net\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Consolidated Balance Sheets\\\\n(in thousands, except for share and per share da\\\\nta) December 31,\\\\n2021\\\\n2020 Assets\\\\nCurrent assets\\\\nCash and cash equivalents\\\\n$ 457,325 $ 319,734 Short-term investments\\\\n1,796,533 1,931,334 Prepaid expenses and other cur\\\\nrent assets522,212 343,070 Total current assets\\\\n2,776,070 2,594,138 Restricted cash and cash equ\\\\nivalents73,205 118,559 Restricted investments\\\\n1,044,855 1,101,712 Other investments\\\\n80,411 10,000 Property and equipment, net\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"ba4bdf94-9b0d-5548-b3b0-b28595c39b2a\\\",\\\"extraction_id\\\":\\\"12cff898-f0fe-59b8-929b-00d3d56fd73d\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.657470505539123,\\\"text\\\":\\\"Lyft rideshare and fleet data. The Company will receive, in total, approximately $515 million in cash in connection with this transaction, with $165 million paid upfrontand\\\\n $350 million to be paid over a five-year period. As the transaction included multiple elements, management had to estimate how much of the arrangementconsideration\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft rideshare and fleet data. The Company will receive, in total, approximately $515 million in cash in connection with this transaction, with $165 million paid upfrontand\\\\n $350 million to be paid over a five-year period. As the transaction included multiple elements, management had to estimate how much of the arrangementconsideration\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"68519877-163f-545b-a96f-77e2595ef720\\\",\\\"extraction_id\\\":\\\"9e45b68a-e773-5903-a9e7-05f860a05cf5\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.644272885067888,\\\"text\\\":\\\"Lyft, Inc.Notes to Consolidated Financial Statements\\\\n1. Description of Business and Basis of Presentation\\\\nOrganization and Description of Business\\\\nLyft,\\\\n Inc. (the Company or Lyft) is incorporated in Delaware with its headquarters in San Francisco, California. The Company operates multimodaltransportation\\\\n networks in the United States and Canada that offer access to a variety of transportation options through the Companys platform and mobile-basedapplications.\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Notes to Consolidated Financial Statements\\\\n1. Description of Business and Basis of Presentation\\\\nOrganization and Description of Business\\\\nLyft,\\\\n Inc. (the Company or Lyft) is incorporated in Delaware with its headquarters in San Francisco, California. The Company operates multimodaltransportation\\\\n networks in the United States and Canada that offer access to a variety of transportation options through the Companys platform and mobile-basedapplications.\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"05ddcc28-fd4b-5318-9224-6f2cd3d16c5c\\\",\\\"extraction_id\\\":\\\"7ce2fcae-d65b-5a4c-b45d-dcd87c4df38e\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.642513496576135,\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Cash Flows\\\\n(in thousands)\\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Reconciliation of cash, cash equivalents and restricted cash and cash equivalents to the consolidated balance sheets\\\\nCash and cash equivalents\\\\n$ 457,325 $ 319,734 $ 358,319 Restricted cash and cash equivalents\\\\n73,205 118,559 204,976 Restricted cash, included in prepaid expenses and other current \\\\nassets663 192 1,170 Total cash, cash equivalents and restricted cash and cash equivalents\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc.Consolidated Statements of Cash Flows\\\\n(in thousands)\\\\nYear Ended December 31,\\\\n2021\\\\n2020 2019 Reconciliation of cash, cash equivalents and restricted cash and cash equivalents to the consolidated balance sheets\\\\nCash and cash equivalents\\\\n$ 457,325 $ 319,734 $ 358,319 Restricted cash and cash equivalents\\\\n73,205 118,559 204,976 Restricted cash, included in prepaid expenses and other current \\\\nassets663 192 1,170 Total cash, cash equivalents and restricted cash and cash equivalents\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"e89d60be-9148-52f1-8907-03187fdd2b65\\\",\\\"extraction_id\\\":\\\"ac9c9f74-c545-586e-8acf-5545cf0864d5\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.633240878582001,\\\"text\\\":\\\"Lyft, Inc. for the fiscal year ended December 31, 2021 fully complies with the requirements of Section 13(a) or 15(d) of the Securities Exchange Act of 1934, asamended,\\\\n and that the information contained in such Annual Report on Form 10-K fairly presents, in all material respects, the financial condition and results of operationsof Lyft, Inc.\\\\nDate:\\\\nFebruary 28, 2022 By: /s/ Logan Green Name:\\\\nLogan Green Title:\\\\nChief Executive Officer (Principal Executive Officer)\\\\nI, Elaine\\\",\\\"metadata\\\":{\\\"text\\\":\\\"Lyft, Inc. for the fiscal year ended December 31, 2021 fully complies with the requirements of Section 13(a) or 15(d) of the Securities Exchange Act of 1934, asamended,\\\\n and that the information contained in such Annual Report on Form 10-K fairly presents, in all material respects, the financial condition and results of operationsof Lyft, Inc.\\\\nDate:\\\\nFebruary 28, 2022 By: /s/ Logan Green Name:\\\\nLogan Green Title:\\\\nChief Executive Officer (Principal Executive Officer)\\\\nI, Elaine\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\",\"{\\\"fragment_id\\\":\\\"b230ec93-1003-5382-98e3-15505dc5bdba\\\",\\\"extraction_id\\\":\\\"9f117275-9d93-5b4a-97d8-bb270db8b94e\\\",\\\"document_id\\\":\\\"2f576170-c4f9-5141-a910-a0924f341de4\\\",\\\"user_id\\\":\\\"2acb499e-8428-543b-bd85-0d9098718220\\\",\\\"group_ids\\\":[],\\\"score\\\":0.621775843010894,\\\"text\\\":\\\"PART IItem 1. Business.\\\\nOur Mission\\\\nImprove peoples lives with \\\\nthe worlds best transportation.Overview\\\\nLyft,\\\\n Inc (the Company or Lyft) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demandridesharing\\\\n and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United Statesand Canada.\\\\nWe\\\",\\\"metadata\\\":{\\\"text\\\":\\\"PART IItem 1. Business.\\\\nOur Mission\\\\nImprove peoples lives with \\\\nthe worlds best transportation.Overview\\\\nLyft,\\\\n Inc (the Company or Lyft) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demandridesharing\\\\n and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United Statesand Canada.\\\\nWe\\\",\\\"title\\\":\\\"lyft_2021.pdf\\\",\\\"associatedQuery\\\":\\\"What was Lyft's profit in 2020?\\\"}}\"Lyft did not make a profit in 2020. Instead, the company reported a net loss of $1,752,857,000 for the year ended December 31, 2020 [3], [4]." + "content": "[{\"fragment_id\": \"94684f2d-fe60-5ba3-b1e8-0a921841bac9\", \"extraction_id\": \"fde39a49-00fc-5622-addd-13eb9c3bad4b\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6675721804840684, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"0d5c5803-8846-59d4-8ae3-3696b718f162\", \"extraction_id\": \"fa3d2549-593a-5a80-88a2-b2d031d79771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6647443571534382, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"5d663f0a-c9a9-580f-818b-5ca0a1ca73f2\", \"extraction_id\": \"12916d2c-0691-528c-86aa-6784c1f35c55\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6628832616140414, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"7133acca-f147-5f43-b2e0-71228282fda0\", \"extraction_id\": \"212d8290-564d-5039-93cc-00cea31a1771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6603014862717096, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"1a76991f-fa85-59b2-b522-700d47b2d809\", \"extraction_id\": \"0773cd62-b39f-517f-b6f1-be788b38374d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6477107388714619, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"17673edc-6fb7-577d-9bca-457c5745382d\", \"extraction_id\": \"bde94416-baaa-573a-9bc7-86ddf28535b1\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6406270265579269, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"8f6299b7-4582-5bac-8c74-7ca57714aefa\", \"extraction_id\": \"310e9e1f-25d3-5287-a905-5446f661d6da\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6394975757149678, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"ff837ea0-0062-59ca-bb4f-aa7a1c9cecd0\", \"extraction_id\": \"41e4db8a-0478-5015-8263-cde0618ec626\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6308713775353904, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"c24d144b-c22d-5c08-876f-a03e43620aa4\", \"extraction_id\": \"2a4caab0-6193-5263-8eab-c7763e8f38e8\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6295160430381335, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"83985ff5-653f-53eb-b137-f616b4292f51\", \"extraction_id\": \"0919e3d5-03b0-5d54-b5f3-7f6ad4534412\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6223346365889701, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was a net loss of $1,752,857,000 [2]." } } ] diff --git a/py/tests/regression/expected_outputs/test_user_management.json b/py/tests/regression/expected_outputs/test_user_management.json index 971498d6e..8ce54e668 100644 --- a/py/tests/regression/expected_outputs/test_user_management.json +++ b/py/tests/regression/expected_outputs/test_user_management.json @@ -1,48 +1,48 @@ { "register_user": { "results": { - "email": "test_72231655@example.com", - "id": "f36f0a8a-9f12-5979-9416-1fbb7d28a80b", - "group_ids": [], - "hashed_password": "$2b$12$uXTZiDl4gRWPueHa1IEC3OCVRsxZFUJjIgOkajwSFDKFzyRcL2ge.", - "is_superuser": false, + "id": "14466e62-fd0a-5ad3-85b6-6965d7f6d336", + "email": "test_4f3ba5b7@example.com", "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:48:31.575356Z", + "updated_at": "2024-09-20T22:48:31.575356Z", "is_verified": false, + "collection_ids": [], + "hashed_password": "$2b$12$91hUOrNh1OhZZODomjxa0Oc/hGNUVwxFh45CXXBBOfhQP98y.pvFm", "verification_code_expiry": null, "name": null, "bio": null, - "profile_picture": null, - "created_at": "2024-08-14T00:26:00.833888Z", - "updated_at": "2024-08-14T00:26:00.833888Z" + "profile_picture": null } }, "login_user": { "results": { "access_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzcyMjMxNjU1QGV4YW1wbGUuY29tIiwiZXhwIjoxNzIzNTk4NzYxLjE5OTA1NCwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.FN2W4wEKQA8X1OLYFVfjUUJ46f6FsZdCjShsYbiIX_o", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRmM2JhNWI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2MTExLjgzOTksInRva2VuX3R5cGUiOiJhY2Nlc3MifQ.vW27WohcKi9ipcTC04i6rSg42ZH8NmnXFYUgfXcGAw0", "token_type": "access" }, "refresh_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzcyMjMxNjU1QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI0MTk5OTYxLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.ZvX9ASi6RZvKSn8O1t-ZDoJm_dgdtcch9EPT9RSvmtY", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRmM2JhNWI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3MzExLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.NUBrdq0PFM8TbDN4EGEefvke07HMLp0T4W6zUbOxt7w", "token_type": "refresh" } } }, "user_info": { "results": { - "email": "test_72231655@example.com", - "id": "f36f0a8a-9f12-5979-9416-1fbb7d28a80b", - "group_ids": [], - "hashed_password": "$2b$12$uXTZiDl4gRWPueHa1IEC3OCVRsxZFUJjIgOkajwSFDKFzyRcL2ge.", - "is_superuser": false, + "id": "14466e62-fd0a-5ad3-85b6-6965d7f6d336", + "email": "test_4f3ba5b7@example.com", "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:48:31.575356Z", + "updated_at": "2024-09-20T22:48:31.575356Z", "is_verified": true, + "collection_ids": [], + "hashed_password": "$2b$12$91hUOrNh1OhZZODomjxa0Oc/hGNUVwxFh45CXXBBOfhQP98y.pvFm", "verification_code_expiry": null, "name": null, "bio": null, - "profile_picture": null, - "created_at": "2024-08-14T00:26:00.833888Z", - "updated_at": "2024-08-14T00:26:00.833888Z" + "profile_picture": null } }, "change_password": { @@ -52,29 +52,29 @@ }, "update_profile": { "results": { - "email": "test_72231655@example.com", - "id": "f36f0a8a-9f12-5979-9416-1fbb7d28a80b", - "group_ids": [], - "hashed_password": "null", - "is_superuser": false, + "id": "14466e62-fd0a-5ad3-85b6-6965d7f6d336", + "email": "test_4f3ba5b7@example.com", "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:48:31.575356Z", + "updated_at": "2024-09-20T22:48:32.394281Z", "is_verified": true, + "collection_ids": [], + "hashed_password": null, "verification_code_expiry": null, "name": "John Doe", "bio": "R2R enthusiast", - "profile_picture": null, - "created_at": "2024-08-14T00:26:00.833888Z", - "updated_at": "2024-08-14T00:26:01.991559Z" + "profile_picture": null } }, "refresh_token": { "results": { "access_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzcyMjMxNjU1QGV4YW1wbGUuY29tIiwiZXhwIjoxNzIzNTk4NzYyLjEzNDQ1NiwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.zaXBVSgktzEB1BWrg2W9rsjmTW_uw1tEqHJEJ5diPRs", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRmM2JhNWI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2MTEyLjQyMjYzNywidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.09C6tAMd3WhrEmsqPdFsWx7lxsp0abGSdWGessddSw0", "token_type": "access" }, "refresh_token": { - "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzcyMjMxNjU1QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI0MTk5OTYyLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.09YDDLWNVPrWJAzFaXai3Kul5RJ7W8iNTKmkq-0y4n4", + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzRmM2JhNWI3QGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3MzEyLCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.yrm_ncSzAScPpdlqYHD2WWhy1AcwuO9CGHcrNLrTNGY", "token_type": "refresh" } } @@ -83,27 +83,74 @@ "results": [ { "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "email": "admin@example.com", + "is_superuser": true, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:26:10.261930Z", + "updated_at": "2024-09-20T22:26:10.261930Z", + "collection_ids": [], "num_files": 15, - "total_size_in_bytes": 5067615, + "total_size_in_bytes": 8767226, "document_ids": [ - "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", - "716fea3a-826b-5b27-8e59-ffbd1a35455a", - "52e12576-090f-59db-91f4-6d4b2e29ae6c", - "3e157b3a-8469-51db-90d9-52e7d896b49b", + "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "451adbbd-e24e-5c9b-80a8-f424b6c7accb", "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", - "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", "e797da22-8c5d-54e5-bed5-a55954cf6bf9", - "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", - "2f576170-c4f9-5141-a910-a0924f341de4", + "5b1bd54f-4d70-56b9-a017-a618bc75f94c", "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", - "57eec3df-cf68-5559-a80d-ae3fb55b9af1", - "db02076e-989a-59cd-98d5-e24e15a0bd27", + "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", "d421207a-d799-5806-8d67-46b2005b15d4", - "451adbbd-e24e-5c9b-80a8-f424b6c7accb", - "5b1bd54f-4d70-56b9-a017-a618bc75f94c" + "db02076e-989a-59cd-98d5-e24e15a0bd27", + "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "2f576170-c4f9-5141-a910-a0924f341de4", + "3e157b3a-8469-51db-90d9-52e7d896b49b" ] + }, + { + "user_id": "a0a60c19-7672-5537-a71c-07a771bc4fa3", + "email": "test_40669f1c@example.com", + "is_superuser": false, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:48:01.453932Z", + "updated_at": "2024-09-20T22:48:01.453932Z", + "collection_ids": [], + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] + }, + { + "user_id": "14466e62-fd0a-5ad3-85b6-6965d7f6d336", + "email": "test_4f3ba5b7@example.com", + "is_superuser": false, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:48:31.575356Z", + "updated_at": "2024-09-20T22:48:32.394281Z", + "collection_ids": [], + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] + }, + { + "user_id": "a5e4ef83-b9ae-56e0-952b-4116f487b000", + "email": "test_c8a21611@example.com", + "is_superuser": false, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:48:14.196002Z", + "updated_at": "2024-09-20T22:48:14.196002Z", + "collection_ids": [], + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] } - ] + ], + "total_entries": 4 }, "logout": { "results": { @@ -111,6 +158,8 @@ } }, "delete_account": { - "results": "User account deleted successfully" + "results": { + "message": "User account c8c03ba0-6d1d-524c-be39-7fb6e01b4298 deleted successfully." + } } } diff --git a/py/tests/regression/observed_outputs/test_document_management.json b/py/tests/regression/observed_outputs/test_document_management.json new file mode 100644 index 000000000..a916b77ca --- /dev/null +++ b/py/tests/regression/observed_outputs/test_document_management.json @@ -0,0 +1,1024 @@ +{ + "ingest_sample_files": { + "results": [ + { + "message": "Ingestion task queued successfully.", + "task_id": "234d5f77-3a09-4ee4-873b-dc97db81afe8", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "e361567a-cc8f-47be-914a-0e07286fa3bb", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "327685d0-ef5f-4405-a522-bdf8cfc79789", + "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "965b2b3f-74aa-4a39-9c36-ab1aad27fb25", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "1a467abd-31bf-4c21-b05d-dcb36822b050", + "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "0ae9a48e-bc5c-4eea-bf86-54bf00b09f8f", + "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "4915b7ed-e255-459b-8af0-3a423e09c881", + "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "96248159-021b-442e-9d14-783a1e80b39f", + "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "e886cf3f-362a-4b41-ab98-303a03a2292a", + "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "0ae3812e-071b-4b12-ba2d-a425c9d2bacc", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "41c6357d-0657-44a1-a0e4-12e99d552b82", + "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "1985e9a6-9e2c-4f0f-9d3c-99f8be6db02b", + "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "937ea186-fa97-4a08-a13c-240de51f9e9a", + "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "ceb2f270-595d-4bd6-bfd1-bd2925dc5b96", + "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "9c90d5e4-1513-48c3-a598-beaccd8fcf09", + "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "d88a4951-22ec-4778-b65c-298710b8e937", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" + } + ] + }, + "reingest_sample_file": { + "results": [ + { + "message": "Ingestion task queued successfully.", + "task_id": "f0d71ddb-7516-4331-ad7e-c7f0205f6f66", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "d2c55f19-ceb4-4aad-995d-37569419b40e", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "9e15204c-4f3a-41cc-9f34-0deff009df46", + "document_id": "52e12576-090f-59db-91f4-6d4b2e29ae6c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "dadbbaf6-559d-48d4-8db4-881f33760af5", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "0892bd5e-57a5-47f5-9a0a-8c558db7b4c1", + "document_id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "04b91d55-2b46-40a7-9ddb-da362286f6bc", + "document_id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "142a0871-1ab2-4f9b-a9c5-b77ff10141ea", + "document_id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "c60fa8db-33c4-4d66-8317-f4615568835d", + "document_id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "154593d2-8498-4b92-b765-005dfa9b0de0", + "document_id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "e076c320-2e29-4787-92a6-f60afd2db533", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "c7bfee83-259d-4663-9dcc-05d8def34632", + "document_id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "96875e20-aa60-4ba9-a0c7-013bfd883e68", + "document_id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "706ea773-4345-48cb-a5c8-b4296d7cf906", + "document_id": "db02076e-989a-59cd-98d5-e24e15a0bd27" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "55182343-2d74-43e7-873b-50ec76f4c6cd", + "document_id": "d421207a-d799-5806-8d67-46b2005b15d4" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "b41719da-cb6f-4667-919e-23fd0b7bf094", + "document_id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb" + }, + { + "message": "Ingestion task queued successfully.", + "task_id": "228230e4-4f54-413e-9655-2c9ae06c6754", + "document_id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c" + } + ] + }, + "documents_overview": { + "results": [ + { + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.456776Z", + "updated_at": "2024-09-20T22:50:10.456787Z", + "ingestion_attempt_number": null + }, + { + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.445592Z", + "updated_at": "2024-09-20T22:50:10.445596Z", + "ingestion_attempt_number": null + }, + { + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.506868Z", + "updated_at": "2024-09-20T22:50:09.506871Z", + "ingestion_attempt_number": null + }, + { + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.476022Z", + "updated_at": "2024-09-20T22:50:09.476024Z", + "ingestion_attempt_number": null + }, + { + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.471883Z", + "updated_at": "2024-09-20T22:50:09.471886Z", + "ingestion_attempt_number": null + }, + { + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.470416Z", + "updated_at": "2024-09-20T22:50:09.470418Z", + "ingestion_attempt_number": null + }, + { + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.469252Z", + "updated_at": "2024-09-20T22:50:09.469256Z", + "ingestion_attempt_number": null + }, + { + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.465810Z", + "updated_at": "2024-09-20T22:55:12.531882Z", + "ingestion_attempt_number": null + }, + { + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.455913Z", + "updated_at": "2024-09-20T22:55:12.517199Z", + "ingestion_attempt_number": null + }, + { + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:07.036336Z", + "updated_at": "2024-09-20T22:55:12.484462Z", + "ingestion_attempt_number": null + }, + { + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:05.565359Z", + "updated_at": "2024-09-20T22:55:12.516690Z", + "ingestion_attempt_number": null + }, + { + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "chunking", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.622762Z", + "updated_at": "2024-09-20T22:55:12.486232Z", + "ingestion_attempt_number": null + }, + { + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.211252Z", + "updated_at": "2024-09-20T22:55:12.509328Z", + "ingestion_attempt_number": null + }, + { + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "storing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.699362Z", + "updated_at": "2024-09-20T22:55:10.516462Z", + "ingestion_attempt_number": null + }, + { + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_1.html", + "version": "v0", + "size_in_bytes": 175340, + "ingestion_status": "storing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.517969Z", + "updated_at": "2024-09-20T22:55:10.518388Z", + "ingestion_attempt_number": null + }, + { + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "got.txt", + "version": "v0", + "size_in_bytes": 12656, + "ingestion_status": "chunking", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.516278Z", + "updated_at": "2024-09-20T22:55:10.519709Z", + "ingestion_attempt_number": null + } + ], + "total_entries": 16 + }, + "document_chunks_test": { + "results": [ + { + "fragment_id": "67e7ab57-eaa0-57d8-9276-da273abcdabd", + "extraction_id": "286b3218-517c-50bf-b8ea-1262e8ec6b42", + "document_id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "text": "June 2021 A few days ago, on the way home from school, my nine year old son", + "metadata": { + "version": "v0", + "chunk_order": 0, + "document_type": "html", + "unstructured_filetype": "text/html", + "unstructured_languages": [ + "eng" + ], + "unstructured_text_as_html": "
June 2021 A few days ago, on the way home from school, my nine year old son
", + "partitioned_by_unstructured": true, + "unstructured_is_continuation": true + } + } + ], + "total_entries": 1 + }, + "update_document_test": { + "results": { + "message": "Update task queued successfully.", + "task_id": "ce9a5927-64b8-4431-acf0-fe12ad2230e5", + "document_ids": [ + "db02076e-989a-59cd-98d5-e24e15a0bd27" + ] + } + }, + "rerun_documents_overview_test_1": { + "results": [ + { + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.456776Z", + "updated_at": "2024-09-20T22:55:14.529464Z", + "ingestion_attempt_number": null + }, + { + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.445592Z", + "updated_at": "2024-09-20T22:55:14.539605Z", + "ingestion_attempt_number": null + }, + { + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.506868Z", + "updated_at": "2024-09-20T22:55:14.533120Z", + "ingestion_attempt_number": null + }, + { + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.476022Z", + "updated_at": "2024-09-20T22:55:14.531133Z", + "ingestion_attempt_number": null + }, + { + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.471883Z", + "updated_at": "2024-09-20T22:55:14.531772Z", + "ingestion_attempt_number": null + }, + { + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.470416Z", + "updated_at": "2024-09-20T22:55:14.536782Z", + "ingestion_attempt_number": null + }, + { + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.469252Z", + "updated_at": "2024-09-20T22:55:14.569269Z", + "ingestion_attempt_number": null + }, + { + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.465810Z", + "updated_at": "2024-09-20T22:55:12.531882Z", + "ingestion_attempt_number": null + }, + { + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.455913Z", + "updated_at": "2024-09-20T22:55:12.517199Z", + "ingestion_attempt_number": null + }, + { + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:07.036336Z", + "updated_at": "2024-09-20T22:55:12.484462Z", + "ingestion_attempt_number": null + }, + { + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:05.565359Z", + "updated_at": "2024-09-20T22:55:12.516690Z", + "ingestion_attempt_number": null + }, + { + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.622762Z", + "updated_at": "2024-09-20T22:55:12.486232Z", + "ingestion_attempt_number": null + }, + { + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.211252Z", + "updated_at": "2024-09-20T22:55:12.509328Z", + "ingestion_attempt_number": null + }, + { + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.699362Z", + "updated_at": "2024-09-20T22:55:10.516462Z", + "ingestion_attempt_number": null + }, + { + "id": "b4ac4dd6-5f27-596e-a55b-7cf242ca30aa", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_1.html", + "version": "v0", + "size_in_bytes": 175340, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.517969Z", + "updated_at": "2024-09-20T22:55:10.518388Z", + "ingestion_attempt_number": null + }, + { + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "got.txt", + "version": "v0", + "size_in_bytes": 12656, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.516278Z", + "updated_at": "2024-09-20T22:55:10.519709Z", + "ingestion_attempt_number": null + } + ], + "total_entries": 16 + }, + "delete_document_test": { + "results": {} + }, + "rerun_documents_overview_test_2": { + "results": [ + { + "id": "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_3.html", + "version": "v0", + "size_in_bytes": 166556, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.456776Z", + "updated_at": "2024-09-20T22:55:14.529464Z", + "ingestion_attempt_number": null + }, + { + "id": "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "png", + "metadata": { + "version": "v0" + }, + "title": "screen_shot.png", + "version": "v0", + "size_in_bytes": 1055688, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:10.445592Z", + "updated_at": "2024-09-20T22:55:14.539605Z", + "ingestion_attempt_number": null + }, + { + "id": "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_2.html", + "version": "v0", + "size_in_bytes": 166816, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.506868Z", + "updated_at": "2024-09-20T22:55:14.533120Z", + "ingestion_attempt_number": null + }, + { + "id": "db02076e-989a-59cd-98d5-e24e15a0bd27", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle.txt", + "version": "v0", + "size_in_bytes": 97804, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.476022Z", + "updated_at": "2024-09-20T22:55:14.531133Z", + "ingestion_attempt_number": null + }, + { + "id": "d421207a-d799-5806-8d67-46b2005b15d4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "yc_companies.txt", + "version": "v0", + "size_in_bytes": 62948, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.471883Z", + "updated_at": "2024-09-20T22:55:14.531772Z", + "ingestion_attempt_number": null + }, + { + "id": "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "test.txt", + "version": "v0", + "size_in_bytes": 28, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.470416Z", + "updated_at": "2024-09-20T22:55:14.536782Z", + "ingestion_attempt_number": null + }, + { + "id": "2f576170-c4f9-5141-a910-a0924f341de4", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "lyft_2021.pdf", + "version": "v0", + "size_in_bytes": 1920404, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.469252Z", + "updated_at": "2024-09-20T22:55:14.569269Z", + "ingestion_attempt_number": null + }, + { + "id": "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_5.html", + "version": "v0", + "size_in_bytes": 165040, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.465810Z", + "updated_at": "2024-09-20T22:55:12.531882Z", + "ingestion_attempt_number": null + }, + { + "id": "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "graphrag.pdf", + "version": "v0", + "size_in_bytes": 2287544, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:09.455913Z", + "updated_at": "2024-09-20T22:55:12.517199Z", + "ingestion_attempt_number": null + }, + { + "id": "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "html", + "metadata": { + "version": "v0" + }, + "title": "pg_essay_4.html", + "version": "v0", + "size_in_bytes": 157484, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:07.036336Z", + "updated_at": "2024-09-20T22:55:12.484462Z", + "ingestion_attempt_number": null + }, + { + "id": "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "aristotle_v2.txt", + "version": "v0", + "size_in_bytes": 3380, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:05.565359Z", + "updated_at": "2024-09-20T22:55:12.516690Z", + "ingestion_attempt_number": null + }, + { + "id": "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample2.mp3", + "version": "v0", + "size_in_bytes": 96608, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.622762Z", + "updated_at": "2024-09-20T22:55:12.486232Z", + "ingestion_attempt_number": null + }, + { + "id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "pdf", + "metadata": { + "version": "v0" + }, + "title": "uber_2021.pdf", + "version": "v0", + "size_in_bytes": 2507312, + "ingestion_status": "parsing", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:04.211252Z", + "updated_at": "2024-09-20T22:55:12.509328Z", + "ingestion_attempt_number": null + }, + { + "id": "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "mp3", + "metadata": { + "version": "v0" + }, + "title": "sample.mp3", + "version": "v0", + "size_in_bytes": 162228, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.699362Z", + "updated_at": "2024-09-20T22:55:10.516462Z", + "ingestion_attempt_number": null + }, + { + "id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "collection_ids": [], + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "type": "txt", + "metadata": { + "version": "v0" + }, + "title": "got.txt", + "version": "v0", + "size_in_bytes": 12656, + "ingestion_status": "success", + "restructuring_status": "pending", + "created_at": "2024-09-20T22:50:02.516278Z", + "updated_at": "2024-09-20T22:55:10.519709Z", + "ingestion_attempt_number": null + } + ], + "total_entries": 15 + }, + "rerun_document_chunks_test": { + "results": "{\"detail\":{\"message\":\"An error 'list index out of range' occurred during document_chunks_app\",\"error\":\"list index out of range\",\"error_type\":\"IndexError\"}}" + } +} diff --git a/py/tests/regression/observed_outputs/test_group_management.json b/py/tests/regression/observed_outputs/test_group_management.json new file mode 100644 index 000000000..195daa6fc --- /dev/null +++ b/py/tests/regression/observed_outputs/test_group_management.json @@ -0,0 +1,20 @@ +{ + "create_groups": { + "error": "'R2RClient' object has no attribute 'create_group'" + }, + "add_users_to_groups": { + "error": "'R2RClient' object has no attribute 'add_user_to_group'" + }, + "group_based_document_access": { + "error": "'TestGroupManagement' object has no attribute 'group_id_1'" + }, + "admin_ingest_documents": { + "error": "'TestGroupManagement' object has no attribute 'group_id_1'" + }, + "user_ingest_and_search": { + "error": "'TestGroupManagement' object has no attribute 'group_id_1'" + }, + "cleanup": { + "error": "'R2RClient' object has no attribute 'delete_group'" + } +} diff --git a/py/tests/regression/observed_outputs/test_observability.json b/py/tests/regression/observed_outputs/test_observability.json new file mode 100644 index 000000000..458eeb497 --- /dev/null +++ b/py/tests/regression/observed_outputs/test_observability.json @@ -0,0 +1,1214 @@ +{ + "users_overview": { + "results": [ + { + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "email": "admin@example.com", + "is_superuser": true, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:49:55.902324Z", + "updated_at": "2024-09-20T22:49:55.902324Z", + "collection_ids": [], + "num_files": 15, + "total_size_in_bytes": 8862496, + "document_ids": [ + "3e157b3a-8469-51db-90d9-52e7d896b49b", + "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "d421207a-d799-5806-8d67-46b2005b15d4", + "db02076e-989a-59cd-98d5-e24e15a0bd27", + "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "2f576170-c4f9-5141-a910-a0924f341de4" + ] + }, + { + "user_id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", + "email": "test_9e56903b@example.com", + "is_superuser": false, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:58:45.796169Z", + "updated_at": "2024-09-20T22:58:46.618261Z", + "collection_ids": [], + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] + } + ], + "total_entries": 2 + }, + "logs": { + "results": [ + { + "run_id": "1b9efeb6-6dda-5d6a-80e9-6fcb16b174fd", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "search_latency", + "value": "0.15", + "timestamp": "2024-09-20 22:55:36" + }, + { + "key": "error", + "value": "list index out of range", + "timestamp": "2024-09-20 22:55:36" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"6d775bd7-dc69-5436-a749-e863dc28c968\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:49:52.837639\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:55:37.247359\", \"completion_end_time\": \"2024-09-20T15:55:38.224136\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"07aa09c5-81a8-5a48-953a-532064a446f8\", \"extraction_id\": \"d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7445549521115464, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"2044e305-c042-5f0d-b05d-a2b97181f7a8\", \"extraction_id\": \"4329441a-5faf-5e9d-801f-ebd753ee1bd3\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7071289420127869, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"3840834b-7c74-5417-9252-9080e609fb2f\", \"extraction_id\": \"cf934fe1-926d-5525-a230-30946961cf28\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6809690201571295, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"e2a37b27-0644-59e4-9746-37d48592a299\", \"extraction_id\": \"6b86ac2f-ce33-5126-83e6-a8731ea677c8\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6805637085605776, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"de30c3c9-cdfd-5872-bdaf-4859bef5c3a8\", \"extraction_id\": \"33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6537506580352783, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"4b6ee3eb-daca-5930-bafe-946cad56cdcc\", \"extraction_id\": \"eb2bc121-0b00-5f70-8eb6-549e1fb1ed72\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6387766599655151, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"af5f93d3-8b2f-5e71-a358-0dd56c2f68ac\", \"extraction_id\": \"acf12622-2e6e-5234-9768-ba448294a81d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.632179401387013, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d5379124-e7ff-509f-b47f-a79152eec2d4\", \"extraction_id\": \"2562b865-e4df-5376-9e70-927be9afbb7e\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.63012705682194, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"98a93be5-13ba-5bd6-9a18-e7ceef0fae88\", \"extraction_id\": \"8ab931e3-8f47-5598-90b8-928f387ec256\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6285917009601995, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5\", \"extraction_id\": \"8361bf60-bce2-56c2-b982-376a75e47d58\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6283430678639979, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9gpFrelYOJObG7VkyoUz23HCc0r2\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber's profit in 2020 was actually a net loss. The net loss attributable to Uber Technologies, Inc. for the year ended December 31, 2020, was $6.8 billion [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872937, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 44, \"prompt_tokens\": 2320, \"total_tokens\": 2364, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:55:38" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"ef8f512c-8b5b-5582-b4dd-5347a502ed08\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:49:52.837639\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:55:38.483020\", \"completion_end_time\": \"2024-09-20T15:55:39.733865\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7cbdab86-1689-5779-81bd-62f7eb3ab36d\", \"extraction_id\": \"866f85a0-b3d6-5fc5-9ca0-dbd2373eac58\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2625822126865387, \"text\": \"Eddard (Ned) Stark\\n\\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\\n\\nCatelyn (Cat) Tully\\n\\nNed\\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\\n\\nDaenerys Stormborn Targaryen\\n\\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\\n\\nJon Snow\\n\\nNed Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"8e563fb1-4665-53a8-8a83-63a1f88e2aea\", \"extraction_id\": \"f6bc23b5-bc80-5e49-9b55-25e9abe97073\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2610678750594818, \"text\": \"Jeor Mormont (Commander Mormont)\\n\\nLord Commander of the Night\\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \\u201cThe Old Bear.\\u201d\\n\\nMaester Aemon\\n\\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\\u2019s Watch ahead of any family loyalties.\\n\\nSamwell (Sam) Tarly\\n\\nA new recruit to the Night\\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\\u2019s closest companions at the Wall.\\n\\nSer Allister Thorne\\n\\nCastle Black\\u2019s resentful master-at-arms. He hard on the new recruits to the Night\\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\\u2019s rebellion against the former king, he was a Targaryen loyalist.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"eaf48cfe-592e-55fa-9f07-613a4f221c45\", \"extraction_id\": \"fdf6127b-e623-58bc-a50b-b7e7b040c03a\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2574058771133423, \"text\": \"Varys (The Spider)\\n\\nThe Red Keep\\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\\n\\nRobert Baratheon\\n\\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\\u2019s sister until she died.\\n\\nSer Jorah Mormont\\n\\nAn exiled knight who serves unofficially as Daenerys\\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"3cb1c2db-01e4-5ea8-a39f-31f5949637f8\", \"extraction_id\": \"02b64e7c-5aa5-5380-8fa0-3d8b64866aa8\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.20913409943187866, \"text\": \"Aerys II Targaryen\\n\\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\\u2019s older brother, Brandon Stark, in the Red Keep\\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\\n\\nRhaegar Targaryen\\n\\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\\u2019s betrothed, helping to set in motion the events that led to Robert\\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\\n\\nJon Arryn\\n\\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 10, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"5d09c80f-ba83-5204-a6b3-f08831e150b0\", \"extraction_id\": \"f86a905c-8d82-52ff-ad72-a800ca3af6f4\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1912464201450348, \"text\": \"Illyrio Mopatis\\n\\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\\n\\nSer Barristan Selmy\\n\\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \\u201cThe Bold\\u201d is a formidable fighter. He is, and has always been, an honorable knight.\\n\\nRenly Baratheon\\n\\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\\u2019s royal council.\\n\\nStannis Baratheon\\n\\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"d143b410-f9a4-5f3b-bb46-fb412eda8201\", \"extraction_id\": \"37f940e2-18f4-50f5-93aa-cec422fc9211\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1862841248512268, \"text\": \"Sandor (The Hound) Clegane\\n\\nPrince Joff\\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\\n\\nRobb Stark\\n\\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\\n\\nMaester Luwin\\n\\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\\n\\nTheon Greyjoy\\n\\nThe Starks\\u2019s ward and Robb\\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 5, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9gpGNU67ZRKhvfOaZSOwJnzbVP2S\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"John Snow is Ned Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872938, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 57, \"prompt_tokens\": 1426, \"total_tokens\": 1483, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:55:39" + } + ], + "timestamp": "2024-09-20T22:58:48", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b8ef0f89-7468-538f-9c6e-5d090ffb9f61", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "error", + "value": "list index out of range", + "timestamp": "2024-09-20 22:31:52" + }, + { + "key": "search_latency", + "value": "0.37", + "timestamp": "2024-09-20 22:46:30" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"516acc43-11ef-5306-8029-2e215441f1db\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:46:30.594529\", \"completion_end_time\": \"2024-09-20T15:46:31.915121\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"07aa09c5-81a8-5a48-953a-532064a446f8\", \"extraction_id\": \"d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7445549521115464, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"2044e305-c042-5f0d-b05d-a2b97181f7a8\", \"extraction_id\": \"4329441a-5faf-5e9d-801f-ebd753ee1bd3\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7071289420127869, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"3840834b-7c74-5417-9252-9080e609fb2f\", \"extraction_id\": \"cf934fe1-926d-5525-a230-30946961cf28\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6809690201571295, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"e2a37b27-0644-59e4-9746-37d48592a299\", \"extraction_id\": \"6b86ac2f-ce33-5126-83e6-a8731ea677c8\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6805637085605776, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"de30c3c9-cdfd-5872-bdaf-4859bef5c3a8\", \"extraction_id\": \"33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6537506580352783, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"4b6ee3eb-daca-5930-bafe-946cad56cdcc\", \"extraction_id\": \"eb2bc121-0b00-5f70-8eb6-549e1fb1ed72\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6387766599655151, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"af5f93d3-8b2f-5e71-a358-0dd56c2f68ac\", \"extraction_id\": \"acf12622-2e6e-5234-9768-ba448294a81d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6321083903312683, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d5379124-e7ff-509f-b47f-a79152eec2d4\", \"extraction_id\": \"2562b865-e4df-5376-9e70-927be9afbb7e\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.63012705682194, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"98a93be5-13ba-5bd6-9a18-e7ceef0fae88\", \"extraction_id\": \"8ab931e3-8f47-5598-90b8-928f387ec256\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6285917009601995, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5\", \"extraction_id\": \"8361bf60-bce2-56c2-b982-376a75e47d58\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6283430678639979, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9ggQHI4VJvvkrhyrkBwUtQQ26Ab2\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber did not make a profit in 2020. Instead, Uber reported a net loss attributable to Uber Technologies, Inc. of $6.8 billion for the year ended December 31, 2020 [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872390, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 45, \"prompt_tokens\": 2320, \"total_tokens\": 2365, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:46:31" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"7eded38c-be24-5fed-ada1-e24c88cfbb8c\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:46:32.161266\", \"completion_end_time\": \"2024-09-20T15:46:33.544364\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7cbdab86-1689-5779-81bd-62f7eb3ab36d\", \"extraction_id\": \"866f85a0-b3d6-5fc5-9ca0-dbd2373eac58\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.26240772008895874, \"text\": \"Eddard (Ned) Stark\\n\\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\\n\\nCatelyn (Cat) Tully\\n\\nNed\\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\\n\\nDaenerys Stormborn Targaryen\\n\\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\\n\\nJon Snow\\n\\nNed Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"8e563fb1-4665-53a8-8a83-63a1f88e2aea\", \"extraction_id\": \"f6bc23b5-bc80-5e49-9b55-25e9abe97073\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2610799748027318, \"text\": \"Jeor Mormont (Commander Mormont)\\n\\nLord Commander of the Night\\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \\u201cThe Old Bear.\\u201d\\n\\nMaester Aemon\\n\\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\\u2019s Watch ahead of any family loyalties.\\n\\nSamwell (Sam) Tarly\\n\\nA new recruit to the Night\\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\\u2019s closest companions at the Wall.\\n\\nSer Allister Thorne\\n\\nCastle Black\\u2019s resentful master-at-arms. He hard on the new recruits to the Night\\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\\u2019s rebellion against the former king, he was a Targaryen loyalist.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"eaf48cfe-592e-55fa-9f07-613a4f221c45\", \"extraction_id\": \"fdf6127b-e623-58bc-a50b-b7e7b040c03a\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2572833588977643, \"text\": \"Varys (The Spider)\\n\\nThe Red Keep\\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\\n\\nRobert Baratheon\\n\\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\\u2019s sister until she died.\\n\\nSer Jorah Mormont\\n\\nAn exiled knight who serves unofficially as Daenerys\\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"3cb1c2db-01e4-5ea8-a39f-31f5949637f8\", \"extraction_id\": \"02b64e7c-5aa5-5380-8fa0-3d8b64866aa8\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.20903720205800558, \"text\": \"Aerys II Targaryen\\n\\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\\u2019s older brother, Brandon Stark, in the Red Keep\\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\\n\\nRhaegar Targaryen\\n\\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\\u2019s betrothed, helping to set in motion the events that led to Robert\\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\\n\\nJon Arryn\\n\\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 10, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"ac15f806-8723-5fe7-832d-ed0427bd3550\", \"extraction_id\": \"416b07ed-cdd6-51fd-8f54-4164c0160860\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19556865096092224, \"text\": \"\\u201cSanctions\\u201d means economic or financial sanctions or trade embargoes imposed, administered or enforced from time to time by (a)\\n\\nthe U.S. government, including those administered by the Office of Foreign Assets Control of the U.S. Department of the Treasury or the U.S. Department of State, or (b) the United Nations Security Council, the European Union, any European Union member state, Her Majesty\\u2019s Treasury of the United Kingdom or other relevant sanctions authority.\\n\\n\\u201cSARON\\u201d means, with respect to any Business Day, a rate per annum equal to the Swiss Average Rate Overnight for such Business\\n\\nDay published by the SARON Administrator on the SARON Administrator\\u2019s Website.\\n\\n\\u201cSARON Administrator\\u201d means the SIX Swiss Exchange AG (or any successor administrator of the Swiss Average Rate\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1221, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 208, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"5d09c80f-ba83-5204-a6b3-f08831e150b0\", \"extraction_id\": \"f86a905c-8d82-52ff-ad72-a800ca3af6f4\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19119779765605927, \"text\": \"Illyrio Mopatis\\n\\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\\n\\nSer Barristan Selmy\\n\\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \\u201cThe Bold\\u201d is a formidable fighter. He is, and has always been, an honorable knight.\\n\\nRenly Baratheon\\n\\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\\u2019s royal council.\\n\\nStannis Baratheon\\n\\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"b47050a2-9906-5922-b6d4-52e4dedb499f\", \"extraction_id\": \"d436c7f7-d7c7-509e-a383-94a94360e601\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18726881596221867, \"text\": \"121\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1719, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 293, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"d143b410-f9a4-5f3b-bb46-fb412eda8201\", \"extraction_id\": \"37f940e2-18f4-50f5-93aa-cec422fc9211\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18609081208705902, \"text\": \"Sandor (The Hound) Clegane\\n\\nPrince Joff\\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\\n\\nRobb Stark\\n\\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\\n\\nMaester Luwin\\n\\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\\n\\nTheon Greyjoy\\n\\nThe Starks\\u2019s ward and Robb\\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 5, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"09f57c70-e7c6-548b-897f-fb8e9aba31c8\", \"extraction_id\": \"a021aa95-14d9-5301-9252-b06bcb852956\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.17496788948965758, \"text\": \"Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 276, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 36, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2\", \"extraction_id\": \"14b08757-0819-5105-af37-509686dd6d01\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.16958434879779816, \"text\": \"s, drivers, and the communities they serve.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9ggSfpp9sr3LZdfzBYse7JjT8eCK\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"John Snow is Ned Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872392, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 57, \"prompt_tokens\": 1810, \"total_tokens\": 1867, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:46:33" + }, + { + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:47:04" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"e3ef7ab5-9443-5bde-8b53-759cbcc1abcf\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:47:05.166511\", \"completion_end_time\": \"2024-09-20T15:47:06.311135\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"07aa09c5-81a8-5a48-953a-532064a446f8\", \"extraction_id\": \"d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7445549521115464, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"2044e305-c042-5f0d-b05d-a2b97181f7a8\", \"extraction_id\": \"4329441a-5faf-5e9d-801f-ebd753ee1bd3\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7071289420127869, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"3840834b-7c74-5417-9252-9080e609fb2f\", \"extraction_id\": \"cf934fe1-926d-5525-a230-30946961cf28\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6809690201571295, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"e2a37b27-0644-59e4-9746-37d48592a299\", \"extraction_id\": \"6b86ac2f-ce33-5126-83e6-a8731ea677c8\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6805637085605776, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"de30c3c9-cdfd-5872-bdaf-4859bef5c3a8\", \"extraction_id\": \"33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6537506580352783, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"4b6ee3eb-daca-5930-bafe-946cad56cdcc\", \"extraction_id\": \"eb2bc121-0b00-5f70-8eb6-549e1fb1ed72\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6387766599655151, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"af5f93d3-8b2f-5e71-a358-0dd56c2f68ac\", \"extraction_id\": \"acf12622-2e6e-5234-9768-ba448294a81d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6321083903312683, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d5379124-e7ff-509f-b47f-a79152eec2d4\", \"extraction_id\": \"2562b865-e4df-5376-9e70-927be9afbb7e\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.63012705682194, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"98a93be5-13ba-5bd6-9a18-e7ceef0fae88\", \"extraction_id\": \"8ab931e3-8f47-5598-90b8-928f387ec256\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6285917009601995, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5\", \"extraction_id\": \"8361bf60-bce2-56c2-b982-376a75e47d58\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6283430678639979, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9ggzDgzYQ95KymrEhTFz9JMYfqRu\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber did not make a profit in 2020. Instead, Uber reported a net loss attributable to Uber Technologies, Inc. of $6.8 billion for the year ended December 31, 2020 [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872425, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 45, \"prompt_tokens\": 2320, \"total_tokens\": 2365, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:47:06" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"08b096a7-a6ea-5587-8ccc-00de219de804\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:47:06.603771\", \"completion_end_time\": \"2024-09-20T15:47:07.779956\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7cbdab86-1689-5779-81bd-62f7eb3ab36d\", \"extraction_id\": \"866f85a0-b3d6-5fc5-9ca0-dbd2373eac58\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.26240772008895874, \"text\": \"Eddard (Ned) Stark\\n\\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\\n\\nCatelyn (Cat) Tully\\n\\nNed\\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\\n\\nDaenerys Stormborn Targaryen\\n\\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\\n\\nJon Snow\\n\\nNed Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"8e563fb1-4665-53a8-8a83-63a1f88e2aea\", \"extraction_id\": \"f6bc23b5-bc80-5e49-9b55-25e9abe97073\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2610799748027318, \"text\": \"Jeor Mormont (Commander Mormont)\\n\\nLord Commander of the Night\\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \\u201cThe Old Bear.\\u201d\\n\\nMaester Aemon\\n\\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\\u2019s Watch ahead of any family loyalties.\\n\\nSamwell (Sam) Tarly\\n\\nA new recruit to the Night\\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\\u2019s closest companions at the Wall.\\n\\nSer Allister Thorne\\n\\nCastle Black\\u2019s resentful master-at-arms. He hard on the new recruits to the Night\\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\\u2019s rebellion against the former king, he was a Targaryen loyalist.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"eaf48cfe-592e-55fa-9f07-613a4f221c45\", \"extraction_id\": \"fdf6127b-e623-58bc-a50b-b7e7b040c03a\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2572833588977643, \"text\": \"Varys (The Spider)\\n\\nThe Red Keep\\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\\n\\nRobert Baratheon\\n\\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\\u2019s sister until she died.\\n\\nSer Jorah Mormont\\n\\nAn exiled knight who serves unofficially as Daenerys\\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"3cb1c2db-01e4-5ea8-a39f-31f5949637f8\", \"extraction_id\": \"02b64e7c-5aa5-5380-8fa0-3d8b64866aa8\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.20903720205800558, \"text\": \"Aerys II Targaryen\\n\\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\\u2019s older brother, Brandon Stark, in the Red Keep\\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\\n\\nRhaegar Targaryen\\n\\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\\u2019s betrothed, helping to set in motion the events that led to Robert\\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\\n\\nJon Arryn\\n\\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 10, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"ac15f806-8723-5fe7-832d-ed0427bd3550\", \"extraction_id\": \"416b07ed-cdd6-51fd-8f54-4164c0160860\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19556865096092224, \"text\": \"\\u201cSanctions\\u201d means economic or financial sanctions or trade embargoes imposed, administered or enforced from time to time by (a)\\n\\nthe U.S. government, including those administered by the Office of Foreign Assets Control of the U.S. Department of the Treasury or the U.S. Department of State, or (b) the United Nations Security Council, the European Union, any European Union member state, Her Majesty\\u2019s Treasury of the United Kingdom or other relevant sanctions authority.\\n\\n\\u201cSARON\\u201d means, with respect to any Business Day, a rate per annum equal to the Swiss Average Rate Overnight for such Business\\n\\nDay published by the SARON Administrator on the SARON Administrator\\u2019s Website.\\n\\n\\u201cSARON Administrator\\u201d means the SIX Swiss Exchange AG (or any successor administrator of the Swiss Average Rate\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1221, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 208, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"5d09c80f-ba83-5204-a6b3-f08831e150b0\", \"extraction_id\": \"f86a905c-8d82-52ff-ad72-a800ca3af6f4\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19119779765605927, \"text\": \"Illyrio Mopatis\\n\\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\\n\\nSer Barristan Selmy\\n\\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \\u201cThe Bold\\u201d is a formidable fighter. He is, and has always been, an honorable knight.\\n\\nRenly Baratheon\\n\\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\\u2019s royal council.\\n\\nStannis Baratheon\\n\\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"b47050a2-9906-5922-b6d4-52e4dedb499f\", \"extraction_id\": \"d436c7f7-d7c7-509e-a383-94a94360e601\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18726881596221867, \"text\": \"121\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1719, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 293, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"d143b410-f9a4-5f3b-bb46-fb412eda8201\", \"extraction_id\": \"37f940e2-18f4-50f5-93aa-cec422fc9211\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18609081208705902, \"text\": \"Sandor (The Hound) Clegane\\n\\nPrince Joff\\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\\n\\nRobb Stark\\n\\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\\n\\nMaester Luwin\\n\\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\\n\\nTheon Greyjoy\\n\\nThe Starks\\u2019s ward and Robb\\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 5, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"09f57c70-e7c6-548b-897f-fb8e9aba31c8\", \"extraction_id\": \"a021aa95-14d9-5301-9252-b06bcb852956\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.17496788948965758, \"text\": \"Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 276, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 36, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2\", \"extraction_id\": \"14b08757-0819-5105-af37-509686dd6d01\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.16958434879779816, \"text\": \"s, drivers, and the communities they serve.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9gh0dsI6lcYo1d4Mrfj1UU4BNHwB\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"John Snow is Ned Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872426, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 57, \"prompt_tokens\": 1810, \"total_tokens\": 1867, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:47:07" + }, + { + "key": "search_latency", + "value": "0.30", + "timestamp": "2024-09-20 22:47:14" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"caa18bcc-a0dd-5146-bdba-4bd223901c27\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:47:15.194142\", \"completion_end_time\": \"2024-09-20T15:47:16.551823\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"07aa09c5-81a8-5a48-953a-532064a446f8\", \"extraction_id\": \"d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7445549521115464, \"text\": \"Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\\n\\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 445, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 53, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"2044e305-c042-5f0d-b05d-a2b97181f7a8\", \"extraction_id\": \"4329441a-5faf-5e9d-801f-ebd753ee1bd3\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.7071289420127869, \"text\": \"Total costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n100 %\\n\\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \\u2014 % (61)% \\u2014 % (61)%\\n\\n100 %\\n\\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \\u2014 % (3)% \\u2014 % (3)%\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n(1)\\n\\nTotals of percentage of revenues may not foot due to rounding.\\n\\nComparison of the Years Ended December 31, 2020 and 2021\\n\\nRevenue\\n\\nYear Ended December 31,\\n\\n(In millions, except percentages)\\n\\n2020\\n\\n2021\\n\\n2020 to 2021 % Change\\n\\nRevenue\\n\\n$\\n\\n11,139 $\\n\\n17,455\\n\\n57 %\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 463, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 57, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"3840834b-7c74-5417-9252-9080e609fb2f\", \"extraction_id\": \"cf934fe1-926d-5525-a230-30946961cf28\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6809690201571295, \"text\": \"Year Ended December 31, 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\\n\\nTotal costs and expenses Loss from operations\\n\\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\\n\\nLess: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n13,000 $\\n\\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\\n\\n$\\n\\nBasic\\n\\n$\\n\\n(6.81) $\\n\\n(3.86) $\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 574, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 77, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"e2a37b27-0644-59e4-9746-37d48592a299\", \"extraction_id\": \"6b86ac2f-ce33-5126-83e6-a8731ea677c8\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6805637085605776, \"text\": \"Less: net loss attributable to non-controlling interests, net of tax\\n\\n$\\n\\n11,139 $\\n\\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\\n\\n17,455\\n\\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\\n\\nNet loss attributable to Uber Technologies, Inc.\\n\\n$\\n\\n54\\n\\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\\n\\n(1)\\n\\n:\\n\\nYear Ended December 31, 2021 2020\\n\\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 462, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 56, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"de30c3c9-cdfd-5872-bdaf-4859bef5c3a8\", \"extraction_id\": \"33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6537506580352783, \"text\": \"The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\\n\\nusers to facilitate the completion of a successful transaction.\\n\\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 642, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 90, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"4b6ee3eb-daca-5930-bafe-946cad56cdcc\", \"extraction_id\": \"eb2bc121-0b00-5f70-8eb6-549e1fb1ed72\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6387766599655151, \"text\": \"Other income (expense), net\\n\\n$\\n\\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\\u201cSennder\\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\\n\\n(1)\\n\\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 799, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 118, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"af5f93d3-8b2f-5e71-a358-0dd56c2f68ac\", \"extraction_id\": \"acf12622-2e6e-5234-9768-ba448294a81d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6321083903312683, \"text\": \"2019\\n\\n100.0 %\\n\\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \\u2014 2.8 (71.9) 0.1 (72.0)%\\n\\n2019 to 2020 % Change\\n\\n(35) %\\n\\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\\n\\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\\n\\nCost of Revenue\\n\\n2021\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 493, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 63, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d5379124-e7ff-509f-b47f-a79152eec2d4\", \"extraction_id\": \"2562b865-e4df-5376-9e70-927be9afbb7e\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.63012705682194, \"text\": \"For additional discussion, see the risk factor titled \\u201c\\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\\n\\nAdditional Information\\n\\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\\n\\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\\n\\n10\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 12, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"98a93be5-13ba-5bd6-9a18-e7ceef0fae88\", \"extraction_id\": \"8ab931e3-8f47-5598-90b8-928f387ec256\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6285917009601995, \"text\": \"Uber Technologies, Inc. (\\u201cUber,\\u201d \\u201cwe,\\u201d \\u201cour,\\u201d or \\u201cus\\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\\u201cplatform(s)\\u201d or \\u201cPlatform(s)\\u201d). Uber connects consumers (\\u201cRider(s)\\u201d) with independent providers of ride services (\\u201cMobility Driver(s)\\u201d) for ridesharing services, and connects Riders and other consumers (\\u201cEaters\\u201d) with restaurants, grocers and other stores (collectively, \\u201cMerchants\\u201d) with delivery service providers (\\u201cCouriers\\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \\u201cend-user(s)\\u201d or \\u201cconsumer(s).\\u201d Mobility Drivers and Couriers are collectively referred to as \\u201cDriver(s).\\u201d Uber also connects consumers with public\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 592, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 84, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5\", \"extraction_id\": \"8361bf60-bce2-56c2-b982-376a75e47d58\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6283430678639979, \"text\": \"Year Ended December 31, 2020\\n\\n1,000 49 1,189 (27) \\u2014 (138) (34) 8,939 (4) 3,824\\n\\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\\n\\n8,209 34 12,067 $\\n\\n12,067 (349) 7,391 $\\n\\n332 $ 133\\n\\n412 $ 82\\n\\n14,224 4,229 \\u2014 251 9 \\u2014 \\u2014\\n\\n\\u2014 \\u2014 \\u2014 196 3,898 171 1,634\\n\\n2021\\n\\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\\n\\n7,391 349 7,805\\n\\n449 87\\n\\n\\u2014 \\u2014 232 184 1,868 1,018 \\u2014\\n\\nUBER TECHNOLOGIES, INC.\\n\\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\\n\\nNote 1 \\u2013 Description of Business and Summary of Significant Accounting Policies\\n\\nDescription of Business\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 83, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9gh9rEwOfdzePGRR2obbBMyp0RRa\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"Uber did not make a profit in 2020. Instead, Uber reported a net loss attributable to Uber Technologies, Inc. of $6.8 billion for the year ended December 31, 2020 [3].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872435, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 45, \"prompt_tokens\": 2320, \"total_tokens\": 2365, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:47:16" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"78d50a85-4cae-513c-b0eb-d65b16e5e957\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:26:07.338533\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:47:16.818722\", \"completion_end_time\": \"2024-09-20T15:47:17.990428\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7cbdab86-1689-5779-81bd-62f7eb3ab36d\", \"extraction_id\": \"866f85a0-b3d6-5fc5-9ca0-dbd2373eac58\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.26240772008895874, \"text\": \"Eddard (Ned) Stark\\n\\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\\n\\nCatelyn (Cat) Tully\\n\\nNed\\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\\n\\nDaenerys Stormborn Targaryen\\n\\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\\n\\nJon Snow\\n\\nNed Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"8e563fb1-4665-53a8-8a83-63a1f88e2aea\", \"extraction_id\": \"f6bc23b5-bc80-5e49-9b55-25e9abe97073\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2610799748027318, \"text\": \"Jeor Mormont (Commander Mormont)\\n\\nLord Commander of the Night\\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \\u201cThe Old Bear.\\u201d\\n\\nMaester Aemon\\n\\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\\u2019s Watch ahead of any family loyalties.\\n\\nSamwell (Sam) Tarly\\n\\nA new recruit to the Night\\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\\u2019s closest companions at the Wall.\\n\\nSer Allister Thorne\\n\\nCastle Black\\u2019s resentful master-at-arms. He hard on the new recruits to the Night\\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\\u2019s rebellion against the former king, he was a Targaryen loyalist.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"eaf48cfe-592e-55fa-9f07-613a4f221c45\", \"extraction_id\": \"fdf6127b-e623-58bc-a50b-b7e7b040c03a\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2572833588977643, \"text\": \"Varys (The Spider)\\n\\nThe Red Keep\\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\\n\\nRobert Baratheon\\n\\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\\u2019s sister until she died.\\n\\nSer Jorah Mormont\\n\\nAn exiled knight who serves unofficially as Daenerys\\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"3cb1c2db-01e4-5ea8-a39f-31f5949637f8\", \"extraction_id\": \"02b64e7c-5aa5-5380-8fa0-3d8b64866aa8\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.20903720205800558, \"text\": \"Aerys II Targaryen\\n\\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\\u2019s older brother, Brandon Stark, in the Red Keep\\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\\n\\nRhaegar Targaryen\\n\\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\\u2019s betrothed, helping to set in motion the events that led to Robert\\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\\n\\nJon Arryn\\n\\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 10, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"ac15f806-8723-5fe7-832d-ed0427bd3550\", \"extraction_id\": \"416b07ed-cdd6-51fd-8f54-4164c0160860\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19556865096092224, \"text\": \"\\u201cSanctions\\u201d means economic or financial sanctions or trade embargoes imposed, administered or enforced from time to time by (a)\\n\\nthe U.S. government, including those administered by the Office of Foreign Assets Control of the U.S. Department of the Treasury or the U.S. Department of State, or (b) the United Nations Security Council, the European Union, any European Union member state, Her Majesty\\u2019s Treasury of the United Kingdom or other relevant sanctions authority.\\n\\n\\u201cSARON\\u201d means, with respect to any Business Day, a rate per annum equal to the Swiss Average Rate Overnight for such Business\\n\\nDay published by the SARON Administrator on the SARON Administrator\\u2019s Website.\\n\\n\\u201cSARON Administrator\\u201d means the SIX Swiss Exchange AG (or any successor administrator of the Swiss Average Rate\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1221, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 208, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"5d09c80f-ba83-5204-a6b3-f08831e150b0\", \"extraction_id\": \"f86a905c-8d82-52ff-ad72-a800ca3af6f4\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.19119779765605927, \"text\": \"Illyrio Mopatis\\n\\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\\n\\nSer Barristan Selmy\\n\\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \\u201cThe Bold\\u201d is a formidable fighter. He is, and has always been, an honorable knight.\\n\\nRenly Baratheon\\n\\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\\u2019s royal council.\\n\\nStannis Baratheon\\n\\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"b47050a2-9906-5922-b6d4-52e4dedb499f\", \"extraction_id\": \"d436c7f7-d7c7-509e-a383-94a94360e601\", \"document_id\": \"3e157b3a-8469-51db-90d9-52e7d896b49b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18726881596221867, \"text\": \"121\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1719, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 293, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"d143b410-f9a4-5f3b-bb46-fb412eda8201\", \"extraction_id\": \"37f940e2-18f4-50f5-93aa-cec422fc9211\", \"document_id\": \"716fea3a-826b-5b27-8e59-ffbd1a35455a\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.18609081208705902, \"text\": \"Sandor (The Hound) Clegane\\n\\nPrince Joff\\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\\n\\nRobb Stark\\n\\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\\n\\nMaester Luwin\\n\\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\\n\\nTheon Greyjoy\\n\\nThe Starks\\u2019s ward and Robb\\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 5, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"09f57c70-e7c6-548b-897f-fb8e9aba31c8\", \"extraction_id\": \"a021aa95-14d9-5301-9252-b06bcb852956\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.17496788948965758, \"text\": \"Shared and Shared Saver Rides enables unrelated parties traveling along similar routes to benefit from a discounted fare at the cost of possibly longer travel times. With a Shared or Shared Saver Ride, when the first rider requests a ride, our algorithms use the first rider\\u2019s destination and attempt to match them with other riders traveling along a similar route. If a match between riders is made, our algorithms re-route the driver to include the pick-up location of the matched rider on the active route. For Shared and Shared Saver Rides, drivers earn a fixed amount based on a number of factors, including the time and distance of the ride, the base fare charged to riders and the level of rider demand. We determine the rider fare based on the predicted time and distance of the ride, the level of rider demand and the likelihood of being able to match additional riders along the given route, and such fare is quoted to the riders prior to their commitment to the ride. The fare charged to the riders is decoupled\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 276, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 36, \"partitioned_by_unstructured\": true, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"dbd5427c-f5ef-5fa6-83ae-a4a8ddbb48c2\", \"extraction_id\": \"14b08757-0819-5105-af37-509686dd6d01\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.16958434879779816, \"text\": \"s, drivers, and the communities they serve.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 77, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9ghBuKmJiN8fBXaFZuAe7soQzK91\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"John Snow is Ned Stark\\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726872437, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 57, \"prompt_tokens\": 1810, \"total_tokens\": 1867, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:47:17" + } + ], + "timestamp": "2024-09-20T22:49:22", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "1cf018a8-7152-5a74-b4de-dbcb4418d19d", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-20T22:25:58", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "ff899df9-42a3-58be-9a09-dd3a36759d22", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "search_latency", + "value": "0.27", + "timestamp": "2024-09-20 22:08:47" + }, + { + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:08:53" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"c9c8de19-a132-5479-8ec2-b2dcd9bd8285\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:08:05.831308\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:09:08.616542\", \"completion_end_time\": \"2024-09-20T15:09:13.432280\", \"search_query\": \"What was Uber's profit in 2020?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"6896b4b8-4dd8-5d9f-a2c0-8319b809d323\", \"extraction_id\": \"c61ee221-dffc-5b8f-871a-fffe855838a3\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.08705742690480622, \"text\": \"Though Aristotle wrote many elegant treatises and dialogues for publication, only around a third of his original output has survived, none of it intended for publication. Aristotle provided a complex synthesis of the various philosophies existing prior to him. His teachings and methods of inquiry have had a significant impact across the world, and remain a subject of contemporary philosophical discussion.\\n\\nAristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition of the Catholic Church.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"fd867a27-cb48-5cae-b036-a241d6d3a248\", \"extraction_id\": \"7d8cdc4c-f5d0-52f3-a01d-ef523c267e48\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.07870185927130524, \"text\": \"Aristotle[A] (Greek: \\u1f08\\u03c1\\u03b9\\u03c3\\u03c4\\u03bf\\u03c4\\u03ad\\u03bb\\u03b7\\u03c2 Aristot\\u00e9l\\u0113s, pronounced [aristot\\u00e9l\\u025b\\u02d0s]; 384\\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"1f652125-10b3-51fa-9e17-bd9e3d575af9\", \"extraction_id\": \"5d156c5a-ecdd-5014-838d-c72034c9ce33\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.05540380046975024, \"text\": \"Aristotle was revered among medieval Muslim scholars as \\\"The First Teacher\\\", and among medieval Christians like Thomas Aquinas as simply \\\"The Philosopher\\\", while the poet Dante called him \\\"the master of those who know\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with the modern advent of virtue ethics.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}, {\"fragment_id\": \"03b79bda-bd92-5ba5-a731-de67df33f6be\", \"extraction_id\": \"40d04128-0a7b-5849-b009-259a43fa546e\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.0551895797252655, \"text\": \"Little is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c.\\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum, which helped him to produce many of his hundreds of books on papyrus scrolls.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Uber's profit in 2020?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9g6HQBxt87MJfSIXTivvIqifZne8\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any information about Uber's profit in 2020. The context is focused on Aristotle and his influence, life, and works. Therefore, I cannot provide an answer to the query about Uber's profit in 2020 based on the given context.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726870149, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_157b3831f5\", \"usage\": {\"completion_tokens\": 56, \"prompt_tokens\": 674, \"total_tokens\": 730, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:09:13" + }, + { + "key": "completion_record", + "value": "{\"message_id\": \"b0888016-8ffa-52f4-a9d6-42f5defe0592\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-20T15:08:05.831308\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-20T15:09:15.002436\", \"completion_end_time\": \"2024-09-20T15:09:16.057564\", \"search_query\": \"Who is John Snow?\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"fd867a27-cb48-5cae-b036-a241d6d3a248\", \"extraction_id\": \"7d8cdc4c-f5d0-52f3-a01d-ef523c267e48\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01700653594771242, \"text\": \"Aristotle[A] (Greek: \\u1f08\\u03c1\\u03b9\\u03c3\\u03c4\\u03bf\\u03c4\\u03ad\\u03bb\\u03b7\\u03c2 Aristot\\u00e9l\\u0113s, pronounced [aristot\\u00e9l\\u025b\\u02d0s]; 384\\u2013322 BC) was an Ancient Greek philosopher and polymath. His writings cover a broad range of subjects spanning the natural sciences, philosophy, linguistics, economics, politics, psychology, and the arts. As the founder of the Peripatetic school of philosophy in the Lyceum in Athens, he began the wider Aristotelian tradition that followed, which set the groundwork for the development of modern science.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 1, \"full_text_rank\": 200, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"6896b4b8-4dd8-5d9f-a2c0-8319b809d323\", \"extraction_id\": \"c61ee221-dffc-5b8f-871a-fffe855838a3\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.016692307692307694, \"text\": \"Though Aristotle wrote many elegant treatises and dialogues for publication, only around a third of his original output has survived, none of it intended for publication. Aristotle provided a complex synthesis of the various philosophies existing prior to him. His teachings and methods of inquiry have had a significant impact across the world, and remain a subject of contemporary philosophical discussion.\\n\\nAristotle's views profoundly shaped medieval scholarship. The influence of his physical science extended from late antiquity and the Early Middle Ages into the Renaissance, and was not replaced systematically until the Enlightenment and theories such as classical mechanics were developed. He influenced Judeo-Islamic philosophies during the Middle Ages, as well as Christian theology, especially the Neoplatonism of the Early Church and the scholastic tradition of the Catholic Church.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 2, \"full_text_rank\": 200, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"1f652125-10b3-51fa-9e17-bd9e3d575af9\", \"extraction_id\": \"5d156c5a-ecdd-5014-838d-c72034c9ce33\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01638993710691824, \"text\": \"Aristotle was revered among medieval Muslim scholars as \\\"The First Teacher\\\", and among medieval Christians like Thomas Aquinas as simply \\\"The Philosopher\\\", while the poet Dante called him \\\"the master of those who know\\\". His works contain the earliest known formal study of logic, and were studied by medieval scholars such as Peter Abelard and Jean Buridan. Aristotle's influence on logic continued well into the 19th century. In addition, his ethics, although always influential, gained renewed interest with the modern advent of virtue ethics.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 3, \"full_text_rank\": 200, \"associated_query\": \"Who is John Snow?\"}}, {\"fragment_id\": \"03b79bda-bd92-5ba5-a731-de67df33f6be\", \"extraction_id\": \"40d04128-0a7b-5849-b009-259a43fa546e\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.016098765432098764, \"text\": \"Little is known about Aristotle's life. He was born in the city of Stagira in northern Greece during the Classical period. His father, Nicomachus, died when Aristotle was a child, and he was brought up by a guardian. At 17 or 18, he joined Plato's Academy in Athens and remained there until the age of 37 (c.\\u2009347 BC). Shortly after Plato died, Aristotle left Athens and, at the request of Philip II of Macedon, tutored his son Alexander the Great beginning in 343 BC. He established a library in the Lyceum, which helped him to produce many of his hundreds of books on papyrus scrolls.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v1\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 4, \"full_text_rank\": 200, \"associated_query\": \"Who is John Snow?\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9g6NG2IXLeGOv1AEeEm0AVnk2gRn\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any information about John Snow. All the references [1], [2], [3], and [4] pertain to Aristotle and his contributions to philosophy and science. Therefore, I cannot provide information about John Snow based on the given context.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726870155, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_52a7f40b0b\", \"usage\": {\"completion_tokens\": 55, \"prompt_tokens\": 659, \"total_tokens\": 714, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 22:09:16" + }, + { + "key": "error", + "value": "list index out of range", + "timestamp": "2024-09-20 22:19:21" + } + ], + "timestamp": "2024-09-20T22:21:34", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "7ddf2f5b-371d-5d3b-bf48-3ceb1de3106e", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:07:53", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b5d1aae6-20c8-57b8-ada5-41d22d60b884", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:07:19", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "5c32b0ce-47e1-54e0-8fe5-0f26f06ddc91", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:06:22", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b9ff1873-e86d-5db5-9d8f-4377ff1468ec", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:06:03", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "ba267530-d625-5dd6-bbbc-69979097955b", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:05:02", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "a23d8952-7eb2-55ad-85fc-b3357779a041", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:04:42", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b51deaea-5c13-5d63-99b8-815d6d1ade65", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T22:04:17", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "3340a301-2021-5e9b-8e92-96f86b45824f", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 22:01:55" + } + ], + "timestamp": "2024-09-20T22:01:55", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "c02ac9d9-53cd-5fb6-9195-a3117c932932", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "search_latency", + "value": "0.27", + "timestamp": "2024-09-20 21:58:52" + }, + { + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:00:05" + }, + { + "key": "search_latency", + "value": "0.19", + "timestamp": "2024-09-20 22:00:16" + }, + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 22:00:44" + }, + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 22:01:28" + } + ], + "timestamp": "2024-09-20T22:01:28", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "f6d75420-dd50-56c7-8a30-14d012d4bcc0", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "error", + "value": "'dict' object has no attribute 'replace'", + "timestamp": "2024-09-20 21:56:31" + } + ], + "timestamp": "2024-09-20T21:56:31", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "15eab53f-6057-5748-be6d-d4da7d4e3754", + "run_type": "MANAGEMENT", + "entries": [ + { + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 21:54:42" + }, + { + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 21:55:14" + }, + { + "key": "error", + "value": "'dict' object has no attribute 'replace'", + "timestamp": "2024-09-20 21:55:28" + } + ], + "timestamp": "2024-09-20T21:55:28", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6f4d56c8-29aa-56e0-8455-ca58b53e2644", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.40", + "timestamp": "2024-09-20 21:10:41" + }, + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 21:31:35" + }, + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 21:32:28" + }, + { + "key": "error", + "value": "0", + "timestamp": "2024-09-20 21:32:29" + }, + { + "key": "error", + "value": "'dict' object has no attribute 'replace'", + "timestamp": "2024-09-20 21:32:29" + }, + { + "key": "search_latency", + "value": "0.41", + "timestamp": "2024-09-20 21:45:17" + }, + { + "key": "search_latency", + "value": "0.26", + "timestamp": "2024-09-20 21:46:19" + } + ], + "timestamp": "2024-09-20T21:46:19", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "f5b31cd5-3dcb-5b00-8377-cbfa595a6cf8", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"716bc004-3d1b-5547-a317-0d5404bc2a9e\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-19T19:14:02.641680\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-19T19:14:10.367949\", \"completion_end_time\": \"2024-09-19T19:14:12.049757\", \"search_query\": \"whoami\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7af6c227-7e0a-51d8-844d-123688e793ae\", \"extraction_id\": \"b3e01927-0dce-57b5-844f-dca923b4b4f0\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1970618457477743, \"text\": \"rom the Middle Ages \\u2013 is accurate, he may also have had an er\\u00f4menos, Palaephatus of Abydus. [14]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"a0fe7d25-56fb-5d1a-bd3d-264fef543989\", \"extraction_id\": \"4a601788-294d-57ac-99f2-faec957e3e10\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1940603620934055, \"text\": \"t of the end of the potentiality. Because the end (telos) is the principle of every change, and potentiality exists for the sake of the end, actuality, accordingly, is the end. Referring then to the previous example, it can be said that an actuality is when a plant does one of the activities that plants do. [34]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 22, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"afc15a4b-b4fe-524a-91ce-b5547563a091\", \"extraction_id\": \"010c18f1-9e6a-5044-9f3a-7e6a6f7bc10b\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1800131690494321, \"text\": \"For that for the sake of which (to hou heneka) a thing is, is its principle, and the becoming is for the sake of the end; and the actuality is the end, and it is for the sake of this that the potentiality is acquired. For animals do not see in order that they may have sight, but they have sight that they may see. [38]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 23, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"aa11b010-7592-54d6-adf1-1b7d66104d6f\", \"extraction_id\": \"840428b3-d7bb-5679-96fa-a811199b7f17\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.14735030688791606, \"text\": \"at causes change of what is changed' and so suggests all sorts of agents, non-living or living, acting as the sources of change or movement or rest. Representing the current understanding of causality as the relation of cause and effect, this covers the modern definitions of \\\"cause\\\" as either the agent or agency or particular events or states of affairs. In the case of two dominoes, when the first is knocked over it causes the second also to fall over.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 35, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"26dd5b44-79f0-5748-b32d-05b4340278c8\", \"extraction_id\": \"906c25ab-0d39-57d2-948d-78fdcb1b5ef1\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1457018231508943, \"text\": \"y perception of the things remembered and of the time passed. [108]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 58, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"044c345d-4d30-5ed7-b2ed-cb221779c629\", \"extraction_id\": \"5a529619-a7e7-5f8a-bde3-ad6969f4b948\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1379266299053057, \"text\": \"Aristotle argued that a capability like playing the flute could be acquired \\u2013 the potential made actual \\u2013 by learning. Coming-to-be is a change where the substrate of the thing that has undergone the change has itself changed. In that particular change he introduces the concept of potentiality (dynamis) and actuality (entelecheia) in association with the matter and the form. Referring to potentiality, this is what a thing is capable of doing or being acted upon if the conditions are right and it is not prevented by something else. For example, the seed of a plant in the soil is potentially (dynamei) a plant, and if it is not prevented by something, it will become a plant. Potentially, beings can either 'act' (poiein) or 'be acted upon' (paschein), which can be either innate or learned. For example, the eyes possess the potentiality of sight (innate \\u2013 being acted upon), while the capability of playing the flute can be possessed by learning (exercise \\u2013 acting). Actuality is the fulfilment of the end of the\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 21, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"43011a9c-1c28-5c0b-938b-77286da0e61c\", \"extraction_id\": \"c0a185bf-d7ec-56ff-b033-63733ffa27bc\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12206629299919969, \"text\": \"a common ancestor, or that one kind can change into another, or that kinds can become extinct. [73]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 43, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"3f7f806f-e328-51cb-890c-8ac2e2a14b39\", \"extraction_id\": \"7b73d820-0d97-52db-8a24-11438a3a3dce\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12004935025874697, \"text\": \"hell) Larva-bearing insects\\tAnt, cicada\\twithout\\t6 legs\\tS, V\\tCold, Dry Spontaneously generating\\tSponges, worms\\twithout\\tnone\\tS, V\\tCold, Wet or Dry, from earth Plants\\tFig\\twithout\\tnone\\tV\\tCold, Dry Minerals\\tIron\\twithout\\tnone\\tnone\\tCold, Dry Psychology Soul Further information: On the Soul\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 51, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"66262e77-577d-57c3-a7d9-27d535fbb8a7\", \"extraction_id\": \"a4850741-4545-56ef-bb7d-1febadfbe9f5\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11991938233856092, \"text\": \"[111] This leads the person to believe the dream is real, even when the dreams are absurd in nature. [111] In De Anima iii 3, Aristotle ascribes the ability to create, to store, and to recall images in the absence of perception to the faculty of imagination, phantasia. [11]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 63, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"853cc80f-11a4-5a44-b1ab-42624cee9834\", \"extraction_id\": \"d3450c7b-8f6d-5faa-875d-a22dc911dc72\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11793822921426989, \"text\": \"[86] Philosophers of science have generally concluded that Aristotle was not interested in taxonomy,[87][88] but zoologists who studied this question in the early 21st century think otherwise. [89][90][91] He believed that purposive final causes guided all natural processes; this teleological view justified his observed data as an expression of formal design. [92]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 49, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9NRqyRZNDi8TO6lA9Wme7DMDSPGq\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any direct information or relevant references to answer the query \\\"whoami.\\\" The context primarily includes excerpts related to Aristotle's philosophy, causality, potentiality and actuality, and some other unrelated topics. Therefore, there is no specific line item that addresses the query \\\"whoami.\\\"\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726798450, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 61, \"prompt_tokens\": 926, \"total_tokens\": 987, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 02:14:12" + }, + { + "key": "search_latency", + "value": "0.47", + "timestamp": "2024-09-20 19:29:31" + }, + { + "key": "search_latency", + "value": "0.25", + "timestamp": "2024-09-20 19:33:40" + } + ], + "timestamp": "2024-09-20T19:33:40", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "0444aec2-ea7e-5ea0-841c-3001d205bde6", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"6dfe2ba9-61a9-5a2c-80aa-1b54c1db682e\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-19T18:45:35.157907\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-19T18:45:41.489449\", \"completion_end_time\": \"2024-09-19T18:45:42.794740\", \"search_query\": \"whoami\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7af6c227-7e0a-51d8-844d-123688e793ae\", \"extraction_id\": \"b3e01927-0dce-57b5-844f-dca923b4b4f0\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1970618457477743, \"text\": \"rom the Middle Ages \\u2013 is accurate, he may also have had an er\\u00f4menos, Palaephatus of Abydus. [14]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"a0fe7d25-56fb-5d1a-bd3d-264fef543989\", \"extraction_id\": \"4a601788-294d-57ac-99f2-faec957e3e10\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1940603620934055, \"text\": \"t of the end of the potentiality. Because the end (telos) is the principle of every change, and potentiality exists for the sake of the end, actuality, accordingly, is the end. Referring then to the previous example, it can be said that an actuality is when a plant does one of the activities that plants do. [34]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 22, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"afc15a4b-b4fe-524a-91ce-b5547563a091\", \"extraction_id\": \"010c18f1-9e6a-5044-9f3a-7e6a6f7bc10b\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1800131690494321, \"text\": \"For that for the sake of which (to hou heneka) a thing is, is its principle, and the becoming is for the sake of the end; and the actuality is the end, and it is for the sake of this that the potentiality is acquired. For animals do not see in order that they may have sight, but they have sight that they may see. [38]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 23, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"aa11b010-7592-54d6-adf1-1b7d66104d6f\", \"extraction_id\": \"840428b3-d7bb-5679-96fa-a811199b7f17\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.14735030688791606, \"text\": \"at causes change of what is changed' and so suggests all sorts of agents, non-living or living, acting as the sources of change or movement or rest. Representing the current understanding of causality as the relation of cause and effect, this covers the modern definitions of \\\"cause\\\" as either the agent or agency or particular events or states of affairs. In the case of two dominoes, when the first is knocked over it causes the second also to fall over.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 35, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"26dd5b44-79f0-5748-b32d-05b4340278c8\", \"extraction_id\": \"906c25ab-0d39-57d2-948d-78fdcb1b5ef1\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1457018231508943, \"text\": \"y perception of the things remembered and of the time passed. [108]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 58, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"044c345d-4d30-5ed7-b2ed-cb221779c629\", \"extraction_id\": \"5a529619-a7e7-5f8a-bde3-ad6969f4b948\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1379266299053057, \"text\": \"Aristotle argued that a capability like playing the flute could be acquired \\u2013 the potential made actual \\u2013 by learning. Coming-to-be is a change where the substrate of the thing that has undergone the change has itself changed. In that particular change he introduces the concept of potentiality (dynamis) and actuality (entelecheia) in association with the matter and the form. Referring to potentiality, this is what a thing is capable of doing or being acted upon if the conditions are right and it is not prevented by something else. For example, the seed of a plant in the soil is potentially (dynamei) a plant, and if it is not prevented by something, it will become a plant. Potentially, beings can either 'act' (poiein) or 'be acted upon' (paschein), which can be either innate or learned. For example, the eyes possess the potentiality of sight (innate \\u2013 being acted upon), while the capability of playing the flute can be possessed by learning (exercise \\u2013 acting). Actuality is the fulfilment of the end of the\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 21, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"43011a9c-1c28-5c0b-938b-77286da0e61c\", \"extraction_id\": \"c0a185bf-d7ec-56ff-b033-63733ffa27bc\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12206629299919969, \"text\": \"a common ancestor, or that one kind can change into another, or that kinds can become extinct. [73]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 43, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"3f7f806f-e328-51cb-890c-8ac2e2a14b39\", \"extraction_id\": \"7b73d820-0d97-52db-8a24-11438a3a3dce\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12004935025874697, \"text\": \"hell) Larva-bearing insects\\tAnt, cicada\\twithout\\t6 legs\\tS, V\\tCold, Dry Spontaneously generating\\tSponges, worms\\twithout\\tnone\\tS, V\\tCold, Wet or Dry, from earth Plants\\tFig\\twithout\\tnone\\tV\\tCold, Dry Minerals\\tIron\\twithout\\tnone\\tnone\\tCold, Dry Psychology Soul Further information: On the Soul\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 51, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"66262e77-577d-57c3-a7d9-27d535fbb8a7\", \"extraction_id\": \"a4850741-4545-56ef-bb7d-1febadfbe9f5\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11991938233856092, \"text\": \"[111] This leads the person to believe the dream is real, even when the dreams are absurd in nature. [111] In De Anima iii 3, Aristotle ascribes the ability to create, to store, and to recall images in the absence of perception to the faculty of imagination, phantasia. [11]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 63, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"853cc80f-11a4-5a44-b1ab-42624cee9834\", \"extraction_id\": \"d3450c7b-8f6d-5faa-875d-a22dc911dc72\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11793822921426989, \"text\": \"[86] Philosophers of science have generally concluded that Aristotle was not interested in taxonomy,[87][88] but zoologists who studied this question in the early 21st century think otherwise. [89][90][91] He believed that purposive final causes guided all natural processes; this teleological view justified his observed data as an expression of formal design. [92]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 49, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9N0IUw0FU71iBT33HoNB5jyFFn83\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any information directly related to the query \\\"whoami.\\\" The context includes excerpts discussing various philosophical concepts, Aristotle's theories, and other unrelated topics. Therefore, there is no specific line item reference that answers the query \\\"whoami.\\\"\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726796742, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 52, \"prompt_tokens\": 926, \"total_tokens\": 978, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 01:45:42" + } + ], + "timestamp": "2024-09-20T01:45:41", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "02861813-36a4-5f34-878f-41564b66e265", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"74046a07-fd49-5349-9e2c-f7c7caddb79d\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-19T17:53:28.810027\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-19T17:53:44.119869\", \"completion_end_time\": \"2024-09-19T17:53:45.961894\", \"search_query\": \"whoami\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7af6c227-7e0a-51d8-844d-123688e793ae\", \"extraction_id\": \"b3e01927-0dce-57b5-844f-dca923b4b4f0\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01700653594771242, \"text\": \"rom the Middle Ages \\u2013 is accurate, he may also have had an er\\u00f4menos, Palaephatus of Abydus. [14]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 1, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"a0fe7d25-56fb-5d1a-bd3d-264fef543989\", \"extraction_id\": \"4a601788-294d-57ac-99f2-faec957e3e10\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.016692307692307694, \"text\": \"t of the end of the potentiality. Because the end (telos) is the principle of every change, and potentiality exists for the sake of the end, actuality, accordingly, is the end. Referring then to the previous example, it can be said that an actuality is when a plant does one of the activities that plants do. [34]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 22, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 2, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"afc15a4b-b4fe-524a-91ce-b5547563a091\", \"extraction_id\": \"010c18f1-9e6a-5044-9f3a-7e6a6f7bc10b\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01638993710691824, \"text\": \"For that for the sake of which (to hou heneka) a thing is, is its principle, and the becoming is for the sake of the end; and the actuality is the end, and it is for the sake of this that the potentiality is acquired. For animals do not see in order that they may have sight, but they have sight that they may see. [38]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 23, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 3, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"aa11b010-7592-54d6-adf1-1b7d66104d6f\", \"extraction_id\": \"840428b3-d7bb-5679-96fa-a811199b7f17\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.016098765432098764, \"text\": \"at causes change of what is changed' and so suggests all sorts of agents, non-living or living, acting as the sources of change or movement or rest. Representing the current understanding of causality as the relation of cause and effect, this covers the modern definitions of \\\"cause\\\" as either the agent or agency or particular events or states of affairs. In the case of two dominoes, when the first is knocked over it causes the second also to fall over.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 35, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 4, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"26dd5b44-79f0-5748-b32d-05b4340278c8\", \"extraction_id\": \"906c25ab-0d39-57d2-948d-78fdcb1b5ef1\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.015818181818181818, \"text\": \"y perception of the things remembered and of the time passed. [108]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 58, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 5, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"044c345d-4d30-5ed7-b2ed-cb221779c629\", \"extraction_id\": \"5a529619-a7e7-5f8a-bde3-ad6969f4b948\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.015547619047619046, \"text\": \"Aristotle argued that a capability like playing the flute could be acquired \\u2013 the potential made actual \\u2013 by learning. Coming-to-be is a change where the substrate of the thing that has undergone the change has itself changed. In that particular change he introduces the concept of potentiality (dynamis) and actuality (entelecheia) in association with the matter and the form. Referring to potentiality, this is what a thing is capable of doing or being acted upon if the conditions are right and it is not prevented by something else. For example, the seed of a plant in the soil is potentially (dynamei) a plant, and if it is not prevented by something, it will become a plant. Potentially, beings can either 'act' (poiein) or 'be acted upon' (paschein), which can be either innate or learned. For example, the eyes possess the potentiality of sight (innate \\u2013 being acted upon), while the capability of playing the flute can be possessed by learning (exercise \\u2013 acting). Actuality is the fulfilment of the end of the\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 21, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 6, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"43011a9c-1c28-5c0b-938b-77286da0e61c\", \"extraction_id\": \"c0a185bf-d7ec-56ff-b033-63733ffa27bc\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01528654970760234, \"text\": \"a common ancestor, or that one kind can change into another, or that kinds can become extinct. [73]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 43, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 7, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"3f7f806f-e328-51cb-890c-8ac2e2a14b39\", \"extraction_id\": \"7b73d820-0d97-52db-8a24-11438a3a3dce\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.01503448275862069, \"text\": \"hell) Larva-bearing insects\\tAnt, cicada\\twithout\\t6 legs\\tS, V\\tCold, Dry Spontaneously generating\\tSponges, worms\\twithout\\tnone\\tS, V\\tCold, Wet or Dry, from earth Plants\\tFig\\twithout\\tnone\\tV\\tCold, Dry Minerals\\tIron\\twithout\\tnone\\tnone\\tCold, Dry Psychology Soul Further information: On the Soul\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 51, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"semantic_rank\": 8, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"66262e77-577d-57c3-a7d9-27d535fbb8a7\", \"extraction_id\": \"a4850741-4545-56ef-bb7d-1febadfbe9f5\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.0147909604519774, \"text\": \"[111] This leads the person to believe the dream is real, even when the dreams are absurd in nature. [111] In De Anima iii 3, Aristotle ascribes the ability to create, to store, and to recall images in the absence of perception to the faculty of imagination, phantasia. [11]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 63, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 9, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"853cc80f-11a4-5a44-b1ab-42624cee9834\", \"extraction_id\": \"d3450c7b-8f6d-5faa-875d-a22dc911dc72\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.014555555555555556, \"text\": \"[86] Philosophers of science have generally concluded that Aristotle was not interested in taxonomy,[87][88] but zoologists who studied this question in the early 21st century think otherwise. [89][90][91] He believed that purposive final causes guided all natural processes; this teleological view justified his observed data as an expression of formal design. [92]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 49, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"semantic_rank\": 10, \"full_text_rank\": 200, \"associated_query\": \"whoami\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9MC0H6xvVYpdFZaJlVEJKhdBkMKq\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any direct information or definition related to the query \\\"whoami.\\\" The context includes various excerpts discussing topics such as Aristotle's philosophy, causality, potentiality and actuality, and other unrelated subjects. Therefore, there is no specific line item reference that directly answers the query \\\"whoami.\\\"\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726793624, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 63, \"prompt_tokens\": 926, \"total_tokens\": 989, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 00:53:45" + } + ], + "timestamp": "2024-09-20T00:53:44", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8bad29fd-d728-5953-93f3-687f595679ea", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"bffa759b-c8f6-52df-b335-f465d0901665\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-19T17:53:28.810027\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-19T17:53:33.610163\", \"completion_end_time\": \"2024-09-19T17:53:35.575833\", \"search_query\": \"whoami\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"7af6c227-7e0a-51d8-844d-123688e793ae\", \"extraction_id\": \"b3e01927-0dce-57b5-844f-dca923b4b4f0\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1970618457477743, \"text\": \"rom the Middle Ages \\u2013 is accurate, he may also have had an er\\u00f4menos, Palaephatus of Abydus. [14]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"a0fe7d25-56fb-5d1a-bd3d-264fef543989\", \"extraction_id\": \"4a601788-294d-57ac-99f2-faec957e3e10\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1940603620934055, \"text\": \"t of the end of the potentiality. Because the end (telos) is the principle of every change, and potentiality exists for the sake of the end, actuality, accordingly, is the end. Referring then to the previous example, it can be said that an actuality is when a plant does one of the activities that plants do. [34]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 22, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"afc15a4b-b4fe-524a-91ce-b5547563a091\", \"extraction_id\": \"010c18f1-9e6a-5044-9f3a-7e6a6f7bc10b\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1800131690494321, \"text\": \"For that for the sake of which (to hou heneka) a thing is, is its principle, and the becoming is for the sake of the end; and the actuality is the end, and it is for the sake of this that the potentiality is acquired. For animals do not see in order that they may have sight, but they have sight that they may see. [38]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 23, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"aa11b010-7592-54d6-adf1-1b7d66104d6f\", \"extraction_id\": \"840428b3-d7bb-5679-96fa-a811199b7f17\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.14735030688791606, \"text\": \"at causes change of what is changed' and so suggests all sorts of agents, non-living or living, acting as the sources of change or movement or rest. Representing the current understanding of causality as the relation of cause and effect, this covers the modern definitions of \\\"cause\\\" as either the agent or agency or particular events or states of affairs. In the case of two dominoes, when the first is knocked over it causes the second also to fall over.\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 35, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"26dd5b44-79f0-5748-b32d-05b4340278c8\", \"extraction_id\": \"906c25ab-0d39-57d2-948d-78fdcb1b5ef1\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1457018231508943, \"text\": \"y perception of the things remembered and of the time passed. [108]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 58, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"044c345d-4d30-5ed7-b2ed-cb221779c629\", \"extraction_id\": \"5a529619-a7e7-5f8a-bde3-ad6969f4b948\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.1379266299053057, \"text\": \"Aristotle argued that a capability like playing the flute could be acquired \\u2013 the potential made actual \\u2013 by learning. Coming-to-be is a change where the substrate of the thing that has undergone the change has itself changed. In that particular change he introduces the concept of potentiality (dynamis) and actuality (entelecheia) in association with the matter and the form. Referring to potentiality, this is what a thing is capable of doing or being acted upon if the conditions are right and it is not prevented by something else. For example, the seed of a plant in the soil is potentially (dynamei) a plant, and if it is not prevented by something, it will become a plant. Potentially, beings can either 'act' (poiein) or 'be acted upon' (paschein), which can be either innate or learned. For example, the eyes possess the potentiality of sight (innate \\u2013 being acted upon), while the capability of playing the flute can be possessed by learning (exercise \\u2013 acting). Actuality is the fulfilment of the end of the\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 21, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"43011a9c-1c28-5c0b-938b-77286da0e61c\", \"extraction_id\": \"c0a185bf-d7ec-56ff-b033-63733ffa27bc\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12206629299919969, \"text\": \"a common ancestor, or that one kind can change into another, or that kinds can become extinct. [73]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 43, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"3f7f806f-e328-51cb-890c-8ac2e2a14b39\", \"extraction_id\": \"7b73d820-0d97-52db-8a24-11438a3a3dce\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.12004935025874697, \"text\": \"hell) Larva-bearing insects\\tAnt, cicada\\twithout\\t6 legs\\tS, V\\tCold, Dry Spontaneously generating\\tSponges, worms\\twithout\\tnone\\tS, V\\tCold, Wet or Dry, from earth Plants\\tFig\\twithout\\tnone\\tV\\tCold, Dry Minerals\\tIron\\twithout\\tnone\\tnone\\tCold, Dry Psychology Soul Further information: On the Soul\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 51, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"66262e77-577d-57c3-a7d9-27d535fbb8a7\", \"extraction_id\": \"a4850741-4545-56ef-bb7d-1febadfbe9f5\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11991938233856092, \"text\": \"[111] This leads the person to believe the dream is real, even when the dreams are absurd in nature. [111] In De Anima iii 3, Aristotle ascribes the ability to create, to store, and to recall images in the absence of perception to the faculty of imagination, phantasia. [11]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 63, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"853cc80f-11a4-5a44-b1ab-42624cee9834\", \"extraction_id\": \"d3450c7b-8f6d-5faa-875d-a22dc911dc72\", \"document_id\": \"9fbe403b-c11c-5aae-8ade-ef22980c3ad1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.11793822921426989, \"text\": \"[86] Philosophers of science have generally concluded that Aristotle was not interested in taxonomy,[87][88] but zoologists who studied this question in the early 21st century think otherwise. [89][90][91] He believed that purposive final causes guided all natural processes; this teleological view justified his observed data as an expression of formal design. [92]\", \"metadata\": {\"title\": \"aristotle.txt\", \"version\": \"v0\", \"chunk_order\": 49, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A9MBqQlCrJXvv3QidbMelZO2Tff0y\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The provided context does not contain any direct information or references that answer the query \\\"whoami.\\\" The context consists of various excerpts discussing topics related to Aristotle's philosophy, causality, potentiality and actuality, and other unrelated subjects. None of these excerpts provide information relevant to identifying or defining \\\"whoami\\\" in this context.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726793614, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_3537616b13\", \"usage\": {\"completion_tokens\": 65, \"prompt_tokens\": 926, \"total_tokens\": 991, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-20 00:53:35" + } + ], + "timestamp": "2024-09-20T00:53:33", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "ac1d1a1b-7a89-52df-b0a5-75c40b79dcde", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:53:08", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "97d50297-594a-50d2-853a-3e56426c90a3", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:52:34", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "7e2e2799-8824-5699-a741-77930709c675", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:51:13", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "87c7a629-eb7d-5720-9ef1-f918f52db2be", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:50:29", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "2182195b-87d9-5287-a27a-36210625e1ae", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:49:07", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "05ab7cbf-bdcf-5929-a275-c06eb59c7bc5", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:48:05", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "7e9b57d3-8c61-5cf9-9910-cee4452c30ad", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:43:21", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "285c5018-0694-54b3-acd1-acbba6fa0bcf", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:41:39", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "f76ead61-d907-58f5-9ff1-c8e6d882ba41", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:40:38", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "84f58954-b5f9-5d6b-85da-f2e68d4ddcc9", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:38:23", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "0a0a8f34-e83b-5beb-88e9-229631e4249c", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:38:12", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "680d0d47-8fab-587f-a177-f4d6c06feb3f", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:37:44", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6b5d2a0b-860a-581e-bab5-8caaf015d7ae", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-20T00:37:00", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "a3f81877-9665-5c50-97cc-f1bab9e084df", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.30", + "timestamp": "2024-09-20 00:36:54" + } + ], + "timestamp": "2024-09-20T00:36:53", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "aa4b3c3c-1e45-5b7d-b6f3-58c4db58d4fe", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.25", + "timestamp": "2024-09-20 00:06:48" + } + ], + "timestamp": "2024-09-20T00:06:48", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "671a54a3-d357-534f-b538-7518c6b7ee08", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:06:27", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b9588f6a-3f67-53d9-9644-c5eaf43a9c47", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:05:49", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "fa868c02-5bdf-5bdf-ab18-7d75f818c800", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:04:39", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8474dd2e-667c-5b25-871a-9d873a1582fd", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:04:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "ae87b4e5-f206-5b84-8bab-a85bbd31acad", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:02:31", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8d1589e3-2188-5d4b-b0f9-1e353d1dc569", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-20T00:01:09", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "36d3cb24-7ccf-54b8-8ca1-5f8d0ade217b", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:57:15", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "4456b54b-f062-5ee4-b8f4-8e65ef289566", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:56:35", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "e0a08fee-22a2-5935-8436-f816722fa659", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:55:34", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "715f8b90-1870-5248-9403-05ee6d3cdf9c", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:54:34", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "e6165c88-0b36-5e62-a7b4-59e23d5c3a85", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:54:05", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6aa9528f-60c6-5acb-9f21-8cc94adef6f0", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T23:53:05", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "cbbcab33-9a63-52ff-969a-daaffcfc20f8", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.31", + "timestamp": "2024-09-19 23:53:01" + } + ], + "timestamp": "2024-09-19T23:53:01", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8f370ebc-90db-56df-840c-db626d5f30dd", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.54", + "timestamp": "2024-09-19 23:52:20" + } + ], + "timestamp": "2024-09-19T23:52:19", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "43cc7d2a-7dd5-5290-a0f3-51173eca55e9", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "object async_generator can't be used in 'await' expression", + "timestamp": "2024-09-19 23:52:00" + } + ], + "timestamp": "2024-09-19T23:52:00", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "419b0758-bed2-5b4d-9916-693af8e55bba", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "object async_generator can't be used in 'await' expression", + "timestamp": "2024-09-19 23:51:04" + } + ], + "timestamp": "2024-09-19T23:51:04", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "d2794946-9889-5e46-8745-9d3a49f93d4b", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "object async_generator can't be used in 'await' expression", + "timestamp": "2024-09-19 23:50:08" + } + ], + "timestamp": "2024-09-19T23:50:08", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "b8607d1e-8181-5aaf-8d8d-e32ef7c6938e", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "object async_generator can't be used in 'await' expression", + "timestamp": "2024-09-19 23:49:17" + } + ], + "timestamp": "2024-09-19T23:49:17", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "06847c6e-e677-55d6-886e-fab35debaed8", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "object async_generator can't be used in 'await' expression", + "timestamp": "2024-09-19 23:48:46" + } + ], + "timestamp": "2024-09-19T23:48:46", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "eb18f507-c0fc-5124-a344-c03acc513d12", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "local variable 'state' referenced before assignment", + "timestamp": "2024-09-19 23:48:01" + } + ], + "timestamp": "2024-09-19T23:48:01", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8760ec84-e170-5887-b5af-6fb19f106bcb", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "error", + "value": "SearchPipeline.run() missing 1 required positional argument: 'state'", + "timestamp": "2024-09-19 23:47:20" + } + ], + "timestamp": "2024-09-19T23:47:20", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "5021d77f-c1d3-54fe-b717-3af09d023604", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.47", + "timestamp": "2024-09-19 17:53:36" + } + ], + "timestamp": "2024-09-19T17:53:35", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "2da8728a-1624-58eb-a6fc-69e203adb6b8", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T17:53:26", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "17e0fe25-8966-5cbe-b6e4-b51305cc0843", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T15:49:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "9da41279-aa41-5615-a926-01399f602cd3", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:48:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "85f52831-f67c-509a-aaf0-18e59b70decc", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:48:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6fa29906-2ebf-5fe6-bbca-b07d3e8a171b", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:47:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "3fb5bb51-0de4-546c-a0a4-4b8f8852739a", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:47:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "4f48cffd-c2d9-50b9-b8c9-a9bcc6479ff8", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:46:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6678c056-c680-5307-bccb-87754aec6490", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:46:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "852beb94-0ae6-5319-bcce-812585dac93d", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:45:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "a2ec6bfa-82ca-55d1-8b6e-b69495947db1", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:45:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "bc037058-af5b-55ec-b69d-29d929146c7a", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:59", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "5493c808-7c1f-5875-9378-d8fb3691e8cb", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:56", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "079cd57b-5d16-5082-96b6-20bc3ea9be1e", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:53", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "d2c14a18-ed1f-59cd-bdaa-20b891d8e0b9", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:52", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "006d5f1c-733e-5dbe-aa51-2ab75bda6156", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:52", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "e7c5a7a8-fbd8-5795-af52-842a170782b2", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:52", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "e5e07dc0-c104-5d83-b071-f80f4e577bf0", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T15:44:46", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "9fc7f9fd-1ecd-543d-a0ef-6866a764ff9e", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "3d8ef7ea-62b7-5872-8d65-bbc547d47392", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "a0f3b95e-3fce-52af-bca3-750f92f17ce9", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "3a37f62a-063b-51e5-9958-a964b42efacc", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T15:44:11", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6e7e5362-fca8-506f-acdc-8f13cbc2a21d", + "run_type": "RETRIEVAL", + "entries": [], + "timestamp": "2024-09-19T01:57:12", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6ec58fae-ab4d-5a75-a758-c476f7fd6ddd", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"5745350d-8ee4-5e4c-a32c-988d241d759e\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-18T18:43:26.293447\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-18T18:54:09.107896\", \"completion_end_time\": \"2024-09-18T18:54:23.917531\", \"search_query\": \"--stream\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"887a1ec4-6264-5548-a652-73651a02537b\", \"extraction_id\": \"170f5a46-b448-525d-aa34-2679b3064ba8\", \"document_id\": \"ddd85f9a-3b3c-5c11-9656-8ae828cde61b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2866899712908254, \"text\": \"### Zach Janicki\\n\\nCTO and cofounder of Offstream. CS from Notre Dame. Previously 1st engineering hire at Repool (YC S21) building hedge fund software, and early engineer at Pathpoint building insurance brokerage software. I've spent my career turning paperwork into software\\n\\n### Varsha Ramesh Walsh\\n\\nCEO and co-founder at Offstream. HBS '23. Previously business lead at Indigo Ag where I launched a traceable cotton business where I closed contracts that got the business to $50M revenue bookings in a year. At Indigo, I experienced the pain point we are now solving at Offstream first-hand. We hired 80 ppl and spent 50M to just get carbon compliant.\\n\\n### Company Launches\\n\\n[### Offstream - Streamlined carbon compliance for project developers](\\n\\nWe\\u2019re Varsha and Zach, the co-founders of [Offstream]( We\\u2019re automating carbon compliance for project developers.\\n\\n\\ud83d\\udc65\\u00a0*\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"cf499846-4068-5a2b-91da-34f685114858\", \"extraction_id\": \"10bc9f1d-66fa-525c-8fe9-253ca21e56a0\", \"document_id\": \"ef2bd5ad-e961-5436-b1a1-0423b8ccbaef\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2814340461155209, \"text\": \"### YC S24 Application Video\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 11, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"acb32977-1ec1-5ef1-b8ae-d183890df01d\", \"extraction_id\": \"8e21e5ef-f804-5944-9e27-feec2bc142c0\", \"document_id\": \"ddd85f9a-3b3c-5c11-9656-8ae828cde61b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2810347378253937, \"text\": \"The Solution**\\n\\nOffstream is a one-stop shop for carbon compliance, making it simple for carbon project developers to get compliant and stay compliant with regulatory and voluntary market compliance standards.\\n\\n[\\n\\nUsers set up and generate life cycle assessments (LCA), which are a core component of all compliance standards. Offstream builds off the LCA to generate compliance documentation at a fraction of the cost and time of alternatives.\\n\\n[Are you ready to stop spending time on paperwork and spend more time scaling decarbonization? Join us. ](\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 4, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"0e51162b-0a16-51f0-8725-112cb5a71917\", \"extraction_id\": \"45a850e8-ff00-519c-b2d1-d15344593e77\", \"document_id\": \"6825c713-8c68-5a8c-8a2a-385eba6562c1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2801004946231842, \"text\": \"What? **\\n\\nAny JavaScript class can be marked as [@cloudstate]( and:\\n\\n1. The data in it is now persisted. You query the data with regular JavaScript. 2. The methods on it can now be called from the front end. No Express/API Routes/tRPC needed!\\n\\n**Coolest Side Effect:** By building our tooling around Fullstack TypeScript, we enable **full-stack feature packages,** a way for JavaScript developers to package any feature they build and share it in a way that it can be embedded in other projects. Think **npm install mui**, except it\\u2019s **npm install chat**, or **npm install auth**. Imagine a world where every third-party SaaS provider today can be an open source package that runs in your application.\\n\\nOur Ask:**\\n\\n**Build with us! ** Contribute to our [Open Source Repository,]( or try [deploying an app with us](\\n\\n### Other Company Launches\\n\\n### Freestyle: TypeScript for EVERYTHING\\n\\nA fundamental shift in how we approach web development\\n\\n[Read Launch \\u203a](\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"5542e6dc-4cf8-5cac-8137-11b769bd44b1\", \"extraction_id\": \"6b40be10-b8f7-5222-bcdf-5687ceda1c40\", \"document_id\": \"577031ec-3b15-5be6-9e33-57e4e22943f3\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2792621850967407, \"text\": \"The Solution:**\\n\\nHaystack attempts to address this in the following ways:\\n\\n1. It allows you to explore your codebase as a directed graph of functions, classes, etc, on the canvas. We feel like this better fits how your mind understands your codebase and helps you find and alter functional flows more intuitively. 2. It has a navigational copilot that makes edits across files or functions much easier. After you make some changes, Haystack will try to predict your next action and create functions/methods or refactor upstream/downstream code for you. Haystack will surface these speculative edits on the canvas in a way that you can easily dismiss or incorporate them, allowing you to make large-scale changes with a few clicks or keystrokes.\\n\\n[See Haystack in action! ]( class=\\\"embed-container youtube\\\"\\\\>\\n\\n**Our Ask, Again:** Download and use Haystack at [\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"ac0db056-e308-5404-ba49-e9635f490464\", \"extraction_id\": \"40f13073-8791-5d4c-aff4-07d72da2d548\", \"document_id\": \"577031ec-3b15-5be6-9e33-57e4e22943f3\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2755703874442339, \"text\": \"**Our Ask**: Engineers have really enjoyed the smooth experience of editing their codebase in Haystack. Join them at [ It takes just a single click to import your VS Code extensions and settings, so you can get to coding straight away.\\n\\nHi everyone! We\\u2019re [Akshay Subramaniam]( and [Jake Yatvitskiy]( and we\\u2019re building Haystack.\\n\\n**The Problem:** Haystack was born out of our frustrations with working in large and mature codebases, specifically with navigating and editing functional flows. A great example of a functional flow is the code flow for adding an item to the Amazon shopping cart \\u2014 from the database layer all the way to the frontend UI.\\n\\nOftentimes dealing with such flows would involve navigating a maze of files and functions, and making any edits would involve a lengthy process of doing corresponding downstream/upstream plumbing.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"951795b4-d570-5bd2-ba08-0d75d50991bc\", \"extraction_id\": \"b23ce51d-5406-5d0c-89e1-e85d3441c735\", \"document_id\": \"ddd85f9a-3b3c-5c11-9656-8ae828cde61b\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.26440533995628357, \"text\": \"Title: Offstream: Vanta for carbon compliance | Y Combinator\\n\\nURL Source:\\n\\nMarkdown Content:\\n\\n### Vanta for carbon compliance\\n\\nOffstream is Vanta for carbon compliance. We have grown 20+% MoM since joining the YC batch and have 10 clean energy developers using our platform to get and stay compliant with regulations. Today, every new infrastructure project, from data centers to biomass power plants has a complicated mess of local, state, federal, and private sector standards that they must understand and comply with to make their projects profitable. Offstream solves this by using LLMs to parse regulations, generate technical reports for customers, and manage 100,000+ operational data points on project\\u2019s lifecycles. We cut the time customers spend on compliance by 90% and at 1/5th of the cost of alternatives.\\n\\nOffstream\\n\\nFounded:2023\\n\\nTeam Size:3\\n\\nLocation:San Francisco\\n\\n### Active Founders\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"2a47f5ed-9827-5f0e-b5ae-b554c93db974\", \"extraction_id\": \"5bec113d-922a-5a09-9652-56c7f2aeb60f\", \"document_id\": \"5abbb31c-39b6-5acd-b8d4-d0502b023777\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.2634134888648987, \"text\": \"Empires like Splunk have been built on the premise that messy logs are a fact of life and you therefore need to fork out for best-in-class indexing and querying. We say it\\u2019s time to tackle this problem at the source.\\n\\nPatchwork: Logging, _Un_\\\\\\n\\nimagined**\\n\\nWith Patchwork, developers can focus on building features while our product ensures that every log is context-rich, actionable, and secure. Developers can un-imagine logs, forever.\\n\\nPatchwork works in the background, reviewing your existing codebase and stepping in during continuous integration to ensure that your new code meets your engineering standards.\\n\\nOur product is flexible, but in our experience, good logs:\\n\\nAre context\\n\\nrich and actionable,\\n\\nExplain what the failed method was attempting (the intent),\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 3, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"6d268889-e06f-5db4-bb63-0611f2a13630\", \"extraction_id\": \"4d9f1146-9899-541e-bcd9-5bd790ad30c7\", \"document_id\": \"4667ad24-1166-5517-a98f-7eef4c9f78fa\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.240000339150372, \"text\": \"Most software agent companies have deeply optimized for swe-bench. However, this is not a representative task. We are building tooling we actually use. It\\u2019s written parts of itself already and we are extremely excited to share it with you.\\n\\n**How are we building this? **\\n\\nOur approach is to start with the IDE. Developers are most comfortable in the ide, so that\\u2019s where we are starting. We are integrating our agent directly, and it is able to make changes across massive codebases (below is it writing a feature in the cal\\\\[.\\\\]com codebase which is 1,155,075 lines long at the time of writing).\\n\\nSo, we are making an agent that is capable of _living in your codebase_ with almost perfect context.\\n\\nIt\\u2019s not just autocomplete. It\\u2019s not just chat with your codebase. It\\u2019s something beyond a software workflow. Our tooling makes the code feel fluid.\\n\\n_And thats special._\\n\\nHere it is editing the codebase:\\n\\nHere is the resulting component:\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}, {\"fragment_id\": \"2ebda6c7-7d63-5908-a30c-0392231e5e04\", \"extraction_id\": \"1fa76175-4ae3-5d17-aaf1-05c973847c64\", \"document_id\": \"9d5f2ac0-57b0-5aff-bb80-9cd48d1dc290\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.23899090936698797, \"text\": \"### Selected answers from Mineflow's original YC application for the S24 Batch\\n\\n#### Describe what your company does in 50 characters or less.\\n\\nAI\\n\\ndriven geological modeling.\\n\\n### YC S24 Application Video\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 6, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"--stream\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A90f8uoIMm6kzwPGjgNn6Q7JzMDTi\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The query \\\"--stream\\\" appears to be related to the company Offstream, which focuses on automating carbon compliance for project developers. Offstream is co-founded by Zach Janicki and Varsha Ramesh Walsh [1]. The company aims to simplify the compliance process for carbon project developers by generating life cycle assessments (LCA) and compliance documentation more efficiently and cost-effectively [3]. Offstream has been described as \\\"Vanta for carbon compliance\\\" and has shown significant growth since joining the YC batch [7].\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726710862, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e375328146\", \"usage\": {\"completion_tokens\": 104, \"prompt_tokens\": 1644, \"total_tokens\": 1748, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-19 01:54:23" + } + ], + "timestamp": "2024-09-19T01:54:09", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "db3bff8b-4ff1-5bb4-92be-a1b091547a04", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "completion_record", + "value": "{\"message_id\": \"9fbd7aac-ce42-58ad-a807-a3770253294f\", \"message_type\": \"assistant\", \"timestamp\": \"2024-09-18T18:43:26.293447\", \"feedback\": null, \"score\": null, \"completion_start_time\": \"2024-09-18T18:53:49.514627\", \"completion_end_time\": \"2024-09-18T18:54:24.912781\", \"search_query\": \"whoami\", \"search_results\": {\"vector_search_results\": [{\"fragment_id\": \"8d28893a-41ab-5449-9382-9ae096957494\", \"extraction_id\": \"4f66a1e7-cd0a-565c-8df6-62403ebbc73e\", \"document_id\": \"1766ba53-4918-5f6b-8098-cbb8411c742d\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.37434355173447487, \"text\": \"**](mailto:info@ionworks.com) for a demo\\n\\nSubmit a [\\n\\n**PyBaMM Testimonial\\n\\n**]( for a chance to win some [swag](\\n\\nStar\\u00a0[\\n\\n**PyBaMM on GitHub\\n\\n**](\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"b6009e33-ca50-5104-8219-b8ef1d269ee4\", \"extraction_id\": \"8513b3d4-a057-5c57-8cea-99d40f3f8025\", \"document_id\": \"89e3d644-03b8-5281-b8d8-e7c95ae96818\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.3742204904556301, \"text\": \"136% MoM. With AI, we've been able to repurpose that same automation to tackle refunds in the flight industry and the class-action lawsuit landscape. Why us? (1) We're obsessed with reclaiming what's rightfully yours: Kamal and Anthony grew up in Lebanon, where people's money was unfairly taken away. (2) Weijie was the lead infra engineer for Wish's recommendation system, the feature that fueled their IPO. Our combined experience includes Apple, Addepar, Bolt, Wish, and StackAdapt.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"3f0b286d-6175-5ea9-a447-7e3bf3e446ef\", \"extraction_id\": \"8b17d20a-3aae-5dbf-91c2-05b92b3184a1\", \"document_id\": \"89e3d644-03b8-5281-b8d8-e7c95ae96818\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.34553650942227243, \"text\": \"Our Team \\ud83d\\udc68\\u200d\\ud83d\\udc68\\u200d\\ud83d\\udc66\\n\\n-----------------\\n\\n[Weijie]( (CTO) spent 6 years in e\\n\\ncommerce and was the lead infra engineer for Wish\\u2019s recommendation system, the product that fueled their IPO. He was a Senior SWE at Bolt and StackAdapt. He graduated from Waterloo & UIUC.\\n\\n[Kamal]( (CEO) earned a B.S. in Electrical Engineering and Computer Sciences from UC Berkeley, worked at Apple as a Security/ML software engineer intern, and met hundreds of engineers while working at Human Capital. He grew up in Lebanon, where he led operations resulting in 230+ families fed for an entire month and 25+ civilians housed.\\n\\n[Anthony]( (COO) graduated from USC in Computer Engineering and Computer Sciences and worked at Addepar as a software engineer intern. He also grew up in Lebanon, where he won a national hackathon, built a 5\\n\\nfigure affiliate marketing business, and led an engineering team at the country\\u2019s largest retailer.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 7, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"7cfddfe5-a6b5-5753-ac25-57c398161788\", \"extraction_id\": \"5444a3f4-0458-5d90-9199-3c6bc6d12d89\", \"document_id\": \"ef2bd5ad-e961-5436-b1a1-0423b8ccbaef\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.33084922042938536, \"text\": \"Founders*\\n\\n------------\\n\\n[Shaun Modi]( CEO. One of the first product designers hired at Airbnb. Worked side by side with CEO Brian Chesky crafting the user experience of Airbnb. Fmr Google, Fmr Nasa, named one of Business Insider's Top 75 Designers in Technology. BFA from RISD.\\n\\n[Tom Hallaran]( CTO. Fmr Director US AI Center of Excellence, Fmr VP of engineering at acquired adtech startup Selectable Media, Initial technical lead and architect of the largest political payment app in US. Undergrad from Columbia.\\n\\n[X]( [Linkedin]( [Instagram](\\n\\n#### YC Sign Photo\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 6, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"e05a37f1-02eb-5c7b-a06e-3944839f5625\", \"extraction_id\": \"3722cd74-ae41-586e-a878-a226ec54b3dd\", \"document_id\": \"62b51549-c42a-5f65-8325-c9f533917107\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.3276761379628017, \"text\": \"### Amy Xiao\\n\\nCo-founder & CEO at Aviary. ex Snapchat, Arize AI, AWS I\\u2019ve been in ML since 2016, starting as one of the first 10 hires at Borealis AI as an ML Research Engineer. At Snap, I helped build the ML infrastructure on the Perception team, where we delivered Scan, a real-time visual search product. Most recently, as an ML Solutions Architect at Arize, I worked with top ML teams, across tech and Fortune 200 companies, to advise on observing LLMs/ML models in production and land deals.\\n\\n### Edward Zhou\\n\\nCofounder & CTO @ Aviary. ex @ Notion, Pinterest At Notion, I founded the search team with a coworker in 2021, and later on went to lead search ranking and ML infra across the company. Also did lots of large scale search infra (\\\\>50b docs) & some product eng (Notion comments) At Pinterest, I led a team to build a 0-1 ads optimization recommendation system. Also worked on ML serving and realtime infra powering our billion+ dollar ads system, and built a few 0-1 mobile and web ad products.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"bb623da9-c0fc-5408-82ed-8d1a66bf0e65\", \"extraction_id\": \"05b1d344-f607-579a-b594-35be021fa6d6\", \"document_id\": \"013e0043-0bb7-5f52-bd21-738513c46a59\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.32682364263252706, \"text\": \"cast announcements, and keep important links top of mind every time a new browser tab is opened. \\u2022 All Hands Q&A: Crowdsource questions, anonymously if desired, for large team meetings to make sure everyone's voice is heard, and the most top of mind questions are answered and recorded. \\u2022 Smart Groups: Create rules based groups of people and use them to make sure the right people are in the right conversations and have the right access.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 2, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"unstructured_is_continuation\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"a3094b9f-a30c-5129-9998-3e4521d58995\", \"extraction_id\": \"a63267ca-cfaf-53d0-bdf2-4534e8e89847\", \"document_id\": \"02452258-207d-5fbd-8db6-b04dd0838f6d\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.3257497160733085, \"text\": \"### Resources\\n\\n[Startup Directory](\\n\\n[Startup Library](\\n\\n[Investors](\\n\\n[SAFE](\\n\\n[Hacker News](\\n\\n[Launch YC](\\n\\n[YC Deals](\\n\\n### Make something people want.\\n\\n[Apply](\\n\\n[Twitter](\\n\\n\\u00a9 2024 Y Combinator\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 8, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"551bfb45-a4d5-519d-8b82-119bcdab42e6\", \"extraction_id\": \"be19344d-d209-53b4-8657-1d857a05cb55\", \"document_id\": \"89e3d644-03b8-5281-b8d8-e7c95ae96818\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.32335275411606057, \"text\": \"Title: pap! : The world owes you money. We get it for you. | Y Combinator\\n\\nURL Source:\\n\\nMarkdown Content:\\n\\n### The world owes you money. We get it for you.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 0, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"4818de84-8eeb-5baa-b22e-3825fe56c005\", \"extraction_id\": \"82b49090-2fd0-5d2b-babe-229cb3ea5c86\", \"document_id\": \"4bdafeb6-49c6-5471-9900-7521e44d72d1\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.32218984829247965, \"text\": \"Location:New York\\n\\n### Active Founders\\n\\n### Yooni Ahn\\n\\nYooni is the CEO and co-founder @ Odo. She built products and business at Robinhood (Robinhood Gold, Customer Care during Gamestop, Original Content) for five years. Before that, she did consulting at Oliver Wyman where she spent endless hours writing government proposals. She studied public policy at Princeton University.\\n\\n### Andrew Wagner\\n\\nAndrew is the CTO and Co-founder of Odo. He's a generalist software engineer with experience across iOS and full stack web development. He worked for Robinhood, Garmin, and also spent many years freelancing for small companies. Working for those companies, he developed outsized skill in taking projects from 0 to 1 extremely quickly and also built a deep empathy for businesses struggling to balance billable work with finding new contracts.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}, {\"fragment_id\": \"10479397-4fa5-5601-9784-84350b3fdf38\", \"extraction_id\": \"d47f24fe-41dc-52ec-8ba3-a9b6ecb315cf\", \"document_id\": \"27fe2d03-d33d-50a6-a6a2-ffe1e212365c\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.3196453452110313, \"text\": \"Founder\\n\\nAlan is the co-founder and CEO of Callback. He was previously a product manager at Microsoft and AWS and the co-founder of an entertainment analytics startup. He studied CS at Harvard and Columbia and has an MBA from Harvard Business School.\\n\\n### Company Launches\\n\\n[### Callback\\n\\nAI business process outsourcing](\\n\\n**TLDR**: [**Callback**]( **is the first AI-native BPO built using a modern tech stack. **\\n\\nOur software platform helps our customers stay focused on their core business by automating their ops processes with humans in the loop. We\\u2019ve handled tasks such as image labeling, invoice extraction, and PDF report parsing \\u2014 with greater accuracy and speed than traditional BPOs or off-the-shelf software solutions.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 1, \"document_type\": \"txt\", \"unstructured_filetype\": \"text/plain\", \"unstructured_languages\": [\"eng\"], \"partitioned_by_unstructured\": true, \"associated_query\": \"whoami\"}}], \"kg_search_results\": null}, \"llm_response\": {\"id\": \"chatcmpl-A90f6hwFQGEhvJZS2BNUWlTC2mkAd\", \"choices\": [{\"finish_reason\": \"stop\", \"index\": 0, \"logprobs\": null, \"message\": {\"content\": \"The query \\\"whoami\\\" does not directly correspond to any specific information provided in the context. However, based on the context provided, here are some possible interpretations:\\n\\n1. If you are Kamal, you are the CEO who earned a B.S. in Electrical Engineering and Computer Sciences from UC Berkeley, worked at Apple as a Security/ML software engineer intern, and have experience leading operations in Lebanon [3].\\n2. If you are Weijie, you are the CTO who spent 6 years in e-commerce, was the lead infra engineer for Wish\\u2019s recommendation system, and have worked as a Senior SWE at Bolt and StackAdapt [3].\\n3. If you are Anthony, you are the COO who graduated from USC in Computer Engineering and Computer Sciences, worked at Addepar as a software engineer intern, and have experience leading an engineering team at Lebanon\\u2019s largest retailer [3].\\n4. If you are Shaun Modi, you are the CEO who was one of the first product designers hired at Airbnb, worked at Google and NASA, and have a BFA from RISD [4].\\n5. If you are Tom Hallaran, you are the CTO who was the Director of the US AI Center of Excellence, VP of engineering at Selectable Media, and have an undergrad degree from Columbia [4].\\n6. If you are Amy Xiao, you are the Co-founder & CEO at Aviary with experience at Snapchat, Arize AI, and AWS, and have been in ML since 2016 [5].\\n7. If you are Edward Zhou, you are the Cofounder & CTO at Aviary with experience at Notion and Pinterest, and have worked on large-scale search infra and ML serving [5].\\n8. If you are Yooni Ahn, you are the CEO and co-founder at Odo with experience at Robinhood and Oliver Wyman, and studied public policy at Princeton University [9].\\n9. If you are Andrew Wagner, you are the CTO and Co-founder at Odo with experience at Robinhood, Garmin, and freelancing, and have skills in taking projects from 0 to 1 quickly [9].\\n10. If you are Alan, you are the co-founder and CEO of Callback with experience as a product manager at Microsoft and AWS, and have studied CS at Harvard and Columbia and have an MBA from Harvard Business School [10].\\n\\nPlease specify more details if you need a more precise identification.\", \"refusal\": null, \"role\": \"assistant\", \"function_call\": null, \"tool_calls\": null}}], \"created\": 1726710860, \"model\": \"gpt-4o-2024-05-13\", \"object\": \"chat.completion\", \"service_tier\": null, \"system_fingerprint\": \"fp_e375328146\", \"usage\": {\"completion_tokens\": 492, \"prompt_tokens\": 1413, \"total_tokens\": 1905, \"completion_tokens_details\": {\"reasoning_tokens\": 0}}}}", + "timestamp": "2024-09-19 01:54:24" + } + ], + "timestamp": "2024-09-19T01:53:49", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "ceefe8e4-c8fc-51f4-82e2-de0c0c1fe58c", + "run_type": "RETRIEVAL", + "entries": [ + { + "key": "search_latency", + "value": "0.28", + "timestamp": "2024-09-19 01:53:45" + } + ], + "timestamp": "2024-09-19T01:53:44", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "a80ddb89-90dc-53ea-bace-e1def56aa31b", + "run_type": "RESTRUCTURE", + "entries": [], + "timestamp": "2024-09-19T01:50:12", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6538caba-05a8-5d2d-ba96-d0dce032cb7b", + "run_type": "RESTRUCTURE", + "entries": [], + "timestamp": "2024-09-19T01:46:39", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "8f4432c2-b19e-57fa-8749-2c3db52edea6", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:46:16", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "fb090f47-9bed-5c6d-bd41-cc2cfc333c44", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:46:06", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "64d8dfe1-3337-531b-9b54-1d8797f19a70", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:55", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "79721a21-72b6-5f0c-8c6d-8e80c58af197", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:45", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "191f5e2e-40f0-5853-b8dc-b76ffe625996", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:34", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "c7695a69-8759-5a31-901d-adf868603ba2", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:24", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "4da3a262-9683-5b34-8d06-b5db0e010f93", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:13", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "bcef0a5a-d826-5e33-ab26-751447d9c6a9", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:45:03", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "295d6933-d51d-5c98-a564-4fd96a57eb87", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:52", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "bea16293-e78b-5950-9a71-9e2ba3f944e7", + "run_type": "MANAGEMENT", + "entries": [], + "timestamp": "2024-09-19T01:44:52", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "7504ce73-fdaa-5d41-90e7-788c8e3503e3", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:51", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "2084e8ca-597c-54d0-943d-b71fbaed0499", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:50", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "59e0a6a8-1403-54a1-ac7b-37dfa4f3c418", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:50", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "9a6b99b3-24dc-5922-8de8-65eb1c66f7c2", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:49", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6b762506-e72e-5ea7-adb7-b46e4a469d2f", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:49", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + }, + { + "run_id": "6c60a5f9-7459-5bd6-8bd0-bc88ff840745", + "run_type": "INGESTION", + "entries": [], + "timestamp": "2024-09-19T01:44:48", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220" + } + ] + }, + "analytics": { + "results": { + "analytics_data": { + "search_latencies": { + "Mean": 0.302, + "Median": 0.27, + "Mode": 0.23, + "Standard Deviation": 0.1, + "Variance": 0.01 + } + }, + "filtered_logs": { + "search_latencies": [ + { + "run_id": "1b9efeb6-6dda-5d6a-80e9-6fcb16b174fd", + "key": "search_latency", + "value": "0.15", + "timestamp": "2024-09-20 22:55:36" + }, + { + "run_id": "b8ef0f89-7468-538f-9c6e-5d090ffb9f61", + "key": "search_latency", + "value": "0.30", + "timestamp": "2024-09-20 22:47:14" + }, + { + "run_id": "b8ef0f89-7468-538f-9c6e-5d090ffb9f61", + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:47:04" + }, + { + "run_id": "b8ef0f89-7468-538f-9c6e-5d090ffb9f61", + "key": "search_latency", + "value": "0.37", + "timestamp": "2024-09-20 22:46:30" + }, + { + "run_id": "ff899df9-42a3-58be-9a09-dd3a36759d22", + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:08:53" + }, + { + "run_id": "ff899df9-42a3-58be-9a09-dd3a36759d22", + "key": "search_latency", + "value": "0.27", + "timestamp": "2024-09-20 22:08:47" + }, + { + "run_id": "c02ac9d9-53cd-5fb6-9195-a3117c932932", + "key": "search_latency", + "value": "0.19", + "timestamp": "2024-09-20 22:00:16" + }, + { + "run_id": "c02ac9d9-53cd-5fb6-9195-a3117c932932", + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 22:00:05" + }, + { + "run_id": "c02ac9d9-53cd-5fb6-9195-a3117c932932", + "key": "search_latency", + "value": "0.27", + "timestamp": "2024-09-20 21:58:52" + }, + { + "run_id": "15eab53f-6057-5748-be6d-d4da7d4e3754", + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 21:55:14" + }, + { + "run_id": "15eab53f-6057-5748-be6d-d4da7d4e3754", + "key": "search_latency", + "value": "0.23", + "timestamp": "2024-09-20 21:54:42" + }, + { + "run_id": "6f4d56c8-29aa-56e0-8455-ca58b53e2644", + "key": "search_latency", + "value": "0.26", + "timestamp": "2024-09-20 21:46:19" + }, + { + "run_id": "6f4d56c8-29aa-56e0-8455-ca58b53e2644", + "key": "search_latency", + "value": "0.41", + "timestamp": "2024-09-20 21:45:17" + }, + { + "run_id": "6f4d56c8-29aa-56e0-8455-ca58b53e2644", + "key": "search_latency", + "value": "0.40", + "timestamp": "2024-09-20 21:10:41" + }, + { + "run_id": "f5b31cd5-3dcb-5b00-8377-cbfa595a6cf8", + "key": "search_latency", + "value": "0.25", + "timestamp": "2024-09-20 19:33:40" + }, + { + "run_id": "f5b31cd5-3dcb-5b00-8377-cbfa595a6cf8", + "key": "search_latency", + "value": "0.47", + "timestamp": "2024-09-20 19:29:31" + }, + { + "run_id": "a3f81877-9665-5c50-97cc-f1bab9e084df", + "key": "search_latency", + "value": "0.30", + "timestamp": "2024-09-20 00:36:54" + }, + { + "run_id": "aa4b3c3c-1e45-5b7d-b6f3-58c4db58d4fe", + "key": "search_latency", + "value": "0.25", + "timestamp": "2024-09-20 00:06:48" + }, + { + "run_id": "cbbcab33-9a63-52ff-969a-daaffcfc20f8", + "key": "search_latency", + "value": "0.31", + "timestamp": "2024-09-19 23:53:01" + }, + { + "run_id": "8f370ebc-90db-56df-840c-db626d5f30dd", + "key": "search_latency", + "value": "0.54", + "timestamp": "2024-09-19 23:52:20" + }, + { + "run_id": "5021d77f-c1d3-54fe-b717-3af09d023604", + "key": "search_latency", + "value": "0.47", + "timestamp": "2024-09-19 17:53:36" + }, + { + "run_id": "ceefe8e4-c8fc-51f4-82e2-de0c0c1fe58c", + "key": "search_latency", + "value": "0.28", + "timestamp": "2024-09-19 01:53:45" + } + ] + } + } + } +} diff --git a/py/tests/regression/observed_outputs/test_retrieval.json b/py/tests/regression/observed_outputs/test_retrieval.json new file mode 100644 index 000000000..8ea8e66f9 --- /dev/null +++ b/py/tests/regression/observed_outputs/test_retrieval.json @@ -0,0 +1,640 @@ +{ + "search": { + "results": { + "vector_search_results": [ + { + "fragment_id": "392ab9b4-c4bc-5894-8edf-332fcd9245bb", + "extraction_id": "cd49a88d-92e5-59f1-8331-3d3d3ecb7f3a", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.23822619091116293, + "text": "Finance Leases\n\n2022 2023 2024 2025 2026 Thereafter Total undiscounted lease payments Less: imputed interest\n\n$\n\n280 $ 312 264 214 198 2,067 3,335 (1,506) 1,829 $\n\n140 60 34 9 \u2014 1 244 (10) 234\n\nTotal lease liabilities\n\n$\n\nAs of December 31, 2021, we had additional operating leases and finance leases, primarily for corporate offices and servers, that have not yet commenced of $421 million and $19 million, respectively. These operating and finance leases will commence between fiscal year 2022 and fiscal year 2023 with lease terms of 2 years to 13 years.\n\nMission Bay 1 & 2\n\nIn 2015, we entered into a joint venture (\u201cJV\u201d) agreement with a real estate developer (\u201cJV Partner\u201d) to develop land (\u201cthe Land\u201d) in San Francisco to construct our new headquarters (the \u201cHeadquarters\u201d). The Headquarters consists of two adjacent office buildings totaling approximately 423,000 rentable square feet. In connection with the JV arrangement, we acquired a 49% interest in the JV, the principal asset of which was the Land.", + "metadata": { + "version": "v0", + "chunk_order": 759, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 109, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "fd8508db-c444-5ed9-afce-67340354fb1e", + "extraction_id": "7f16fa20-9bc1-5841-ba74-95cdbb27e9fb", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.21572618390761744, + "text": "(c)\n\nCommuting and Corporate Housing Expenses. During your Employment, the Company shall cover the cost of your reasonable and substantiated expenses for travel between your primary residence and the Company\u2019s headquarters in San Francisco and corporate housing in the San Francisco Bay Area, up to a pre-tax maximum of $200,000 per year in the aggregate. All expense reimbursements shall be made in accordance with the Company\u2019s expense reimbursement policy.\n\n(d)", + "metadata": { + "version": "v0", + "chunk_order": 971, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 170, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "76d010b2-1498-531a-bf89-66aa17331203", + "extraction_id": "f31920df-e1db-5a2c-9b8f-9c7b845a21c1", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1947217732667923, + "text": "45-2647441 (I.R.S. Employer Identification No.)\n\n1515 3rd Street San Francisco, California 94158 (Address of principal executive offices, including zip code) (415) 612-8582 (Registrant\u2019s telephone number, including area code) ____________________________________________\n\nSecurities registered pursuant to Section 12(b) of the Act:\n\nTitle of each class Common Stock, par value $0.00001 per share", + "metadata": { + "version": "v0", + "chunk_order": 1, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 1, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "c96ee688-6e36-5abb-b066-d87779be1cf6", + "extraction_id": "eb08b70d-2e82-5de3-90ee-98537a761ea8", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.19264961779117584, + "text": "Item 2. Properties.\n\nOur corporate headquarters are located in San Francisco, California, and consist of approximately 420,000 square feet under lease agreements through May 31,\n\n2030. We maintain additional offices in multiple locations in the U.S. and internationally in Montreal, Canada, Munich, Germany and Minsk, Belarus.\n\nWe lease all of our facilities and do not own any real property. We believe our facilities are adequate and suitable for our current needs and that, should it be\n\nneeded, suitable additional or alternative space will be available to accommodate our operations.\n\n53\n\nItem 3. Legal Proceedings.\n\nSee discussion under the heading Legal Proceedings in Note 9 to the consolidated financial statements included in Part II, Item 8 of this report.\n\nItem 4. Mine Safety Disclosures.\n\nNot applicable.\n\n54\n\nPART II\n\nItem 5. Market for Registrant\u2019s Common Equity, Related Stockholder Matters and Issuer Purchases of Equity Securities.", + "metadata": { + "version": "v0", + "chunk_order": 434, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 53, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "cfc07310-2da0-5cfb-ba9b-b3c669084882", + "extraction_id": "376938c5-338f-5c56-b599-efcf94ace67a", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.18162215163575735, + "text": "Property and equipment, net\n\n$\n\nWe capitalized $76 million and $55 million in internal-use software costs during the years ended December 31, 2020 and 2021, respectively, which is included in property and equipment, net on the consolidated balance sheets. Amortization of capitalized software development costs was $22 million, $55 million, and $69 million for the years ended December 31, 2019, 2020 and 2021, respectively.\n\nAmounts in construction in progress represent buildings, leasehold improvements, assets under construction, and other assets not placed in service.", + "metadata": { + "version": "v0", + "chunk_order": 754, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 107, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "2654f646-222c-50af-bd1c-c7311e6a9dc9", + "extraction_id": "b25d210b-1b58-578a-b038-34f76d77f377", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.18122072927828292, + "text": "\u201cCanadian BA Rate Borrowing\u201d refers to a Borrowing bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n10\n\n\u201cCanadian BA Rate Loan\u201d refers to a Loan bearing interest at a rate determined by reference to the Canadian BA Rate.\n\n\u201cCanadian Dollars\u201d means the lawful currency of Canada.\n\n\u201cCapital Lease Obligations\u201d of any Person means the obligations of such Person to pay rent or other amounts under any lease of (or", + "metadata": { + "version": "v0", + "chunk_order": 1085, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 182, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "309f729d-78eb-569a-837c-50367c20e898", + "extraction_id": "dfc368a6-efaf-5f4d-a20b-0fd6059a5f35", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1806427240371704, + "text": "principal payments on finance lease obligations for $35.5 million.\n\nCash provided by financing activities was $512.6 million for the year ended December 31, 2020, which primarily consisted of proceeds from issuance of our 2025\n\nNotes of $734.1 million offset by the purchase of the Capped Calls for $132.7 million.\n\nLiquidity and Capital Resources", + "metadata": { + "version": "v0", + "chunk_order": 531, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 71, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "a0a98796-ca8a-5c8b-9063-a15064a914d4", + "extraction_id": "411a2659-a240-5c20-b11a-305b2b66987d", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.16736936076696907, + "text": "Depreciation and Amortization\n\nDepreciation and amortization expenses primarily consist of depreciation on buildings, site improvements, computer and network equipment, software, leasehold improvements, furniture and fixtures, and amortization of intangible assets. Depreciation includes expenses associated with buildings, site improvements, computer and network equipment, leased vehicles, and furniture, fixtures, as well as leasehold improvements. Amortization includes expenses associated with our capitalized internal-use software and acquired intangible assets.\n\nAs our business recovers from the impacts of COVID-19, we would anticipate depreciation and amortization expenses to increase as we continue to build out\n\nour network infrastructure and building locations.", + "metadata": { + "version": "v0", + "chunk_order": 457, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 55, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "b31fdb40-3d2e-54ba-8064-321fd1420f38", + "extraction_id": "ff6c949e-ff32-5c38-983c-eba10f3a9609", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1666801671171001, + "text": "Our future capital requirements will depend on many factors, including, but not limited to our growth, our ability to maintain profitability on an Adjusted EBITDA basis, our ability to attract and retain drivers and riders on our platform, the continuing market acceptance of our offerings, the timing and extent of spending to support our efforts to develop our platform, actual insurance payments for which we have made reserves, measures we take in response to the COVID-19 pandemic, our ability to maintain demand for and confidence in the safety of our platform during and following the COVID-19 pandemic, and the expansion of sales and marketing activities. As noted above, we expect to see continued suppression of demand for our platform and the resultant negative impacts on revenue for so long as the travel restrictions and other social distancing measures in response to COVID-19 remain in place. Further, we may in the future enter into arrangements to acquire or invest in businesses, products, services and", + "metadata": { + "version": "v0", + "chunk_order": 538, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 72, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + }, + { + "fragment_id": "ff0eb83c-ea67-51e2-8201-4211aa9d591a", + "extraction_id": "69e31e99-75df-5850-b71b-2b973d4cf244", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1657480697932172, + "text": "(1)\n\n(2)\n\nFor the year ended December 31, 2019, the increase in the valuation allowance was primarily attributable to a step-up in the tax basis of intellectual property rights, an increase in U.S. federal, state and Netherlands deferred tax assets resulting from the loss from operations, and tax credits generated during the year.\n\nFor the year ended December 31, 2020, the increase in the valuation allowance was primarily attributable to an increase in tax rate in Netherlands, an increase\n\nin U.S. federal, state and Netherlands deferred tax assets resulting from the loss from operations, and tax credits generated during the year.", + "metadata": { + "version": "v0", + "chunk_order": 969, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 148, + "partitioned_by_unstructured": true, + "associated_query": "What is the capital of France?" + } + } + ], + "kg_search_results": null + } + }, + "basic_rag": { + "results": { + "completion": { + "id": "chatcmpl-A9gpFrelYOJObG7VkyoUz23HCc0r2", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "Uber's profit in 2020 was actually a net loss. The net loss attributable to Uber Technologies, Inc. for the year ended December 31, 2020, was $6.8 billion [3].", + "refusal": null, + "role": "assistant", + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1726872937, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_3537616b13", + "usage": { + "completion_tokens": 44, + "prompt_tokens": 2320, + "total_tokens": 2364, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + } + }, + "search_results": { + "vector_search_results": [ + { + "fragment_id": "07aa09c5-81a8-5a48-953a-532064a446f8", + "extraction_id": "d3060c36-85dc-5e8d-b8ff-cfe4c1753ccc", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.7445549521115464, + "text": "Revenue was $17.5 billion, or up 57% year-over-year, reflecting the overall growth in our Delivery business and an increase in Freight revenue attributable to the acquisition of Transplace in the fourth quarter of 2021 as well as growth in the number of shippers and carriers on the network combined with an increase in volumes with our top shippers.\n\nNet loss attributable to Uber Technologies, Inc. was $496 million, a 93% improvement year-over-year, driven by a $1.6 billion pre-tax gain on the sale of our ATG Business to Aurora, a $1.6 billion pre-tax net benefit relating to Uber\u2019s equity investments, as well as reductions in our fixed cost structure and increased variable cost efficiencies. Net loss attributable to Uber Technologies, Inc. also included $1.2 billion of stock-based compensation expense.", + "metadata": { + "version": "v0", + "chunk_order": 445, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 53, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "2044e305-c042-5f0d-b05d-a2b97181f7a8", + "extraction_id": "4329441a-5faf-5e9d-801f-ebd753ee1bd3", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.7071289420127869, + "text": "Total costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n100 %\n\n46 % 16 % 32 % 20 % 24 % 5 % 144 % (44)% (4)% (15)% (62)% (2)% \u2014 % (61)% \u2014 % (61)%\n\n100 %\n\n54 % 11 % 27 % 12 % 13 % 5 % 122 % (22)% (3)% 19 % (6)% (3)% \u2014 % (3)% \u2014 % (3)%\n\nNet loss attributable to Uber Technologies, Inc.\n\n(1)\n\nTotals of percentage of revenues may not foot due to rounding.\n\nComparison of the Years Ended December 31, 2020 and 2021\n\nRevenue\n\nYear Ended December 31,\n\n(In millions, except percentages)\n\n2020\n\n2021\n\n2020 to 2021 % Change\n\nRevenue\n\n$\n\n11,139 $\n\n17,455\n\n57 %", + "metadata": { + "version": "v0", + "chunk_order": 463, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 57, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "3840834b-7c74-5417-9252-9080e609fb2f", + "extraction_id": "cf934fe1-926d-5525-a230-30946961cf28", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6809690201571295, + "text": "Year Ended December 31, 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization\n\nTotal costs and expenses Loss from operations\n\nInterest expense Other income (expense), net Loss before income taxes and loss from equity method investments Provision for (benefit from) income taxes Loss from equity method investments Net loss including non-controlling interests\n\nLess: net loss attributable to non-controlling interests, net of tax\n\n$\n\n13,000 $\n\n6,061 2,302 4,626 4,836 3,299 472 21,596 (8,596) (559) 722 (8,433) 45 (34) (8,512) (6) (8,506) $\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\nNet loss attributable to Uber Technologies, Inc. Net loss per share attributable to Uber Technologies, Inc. common stockholders:\n\n$\n\nBasic\n\n$\n\n(6.81) $\n\n(3.86) $", + "metadata": { + "version": "v0", + "chunk_order": 574, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 77, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "e2a37b27-0644-59e4-9746-37d48592a299", + "extraction_id": "6b86ac2f-ce33-5126-83e6-a8731ea677c8", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6805637085605776, + "text": "Less: net loss attributable to non-controlling interests, net of tax\n\n$\n\n11,139 $\n\n5,154 1,819 3,583 2,205 2,666 575 16,002 (4,863) (458) (1,625) (6,946) (192) (34) (6,788) (20) (6,768) $\n\n17,455\n\n9,351 1,877 4,789 2,054 2,316 902 21,289 (3,834) (483) 3,292 (1,025) (492) (37) (570) (74) (496)\n\nNet loss attributable to Uber Technologies, Inc.\n\n$\n\n54\n\nThe following table sets forth the components of our consolidated statements of operations for each of the periods presented as a percentage of revenue\n\n(1)\n\n:\n\nYear Ended December 31, 2021 2020\n\nRevenue Costs and expenses Cost of revenue, exclusive of depreciation and amortization shown separately below Operations and support Sales and marketing Research and development General and administrative Depreciation and amortization", + "metadata": { + "version": "v0", + "chunk_order": 462, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 56, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "de30c3c9-cdfd-5872-bdaf-4859bef5c3a8", + "extraction_id": "33bc6d8b-9fdc-5df7-be1d-fa7de176a0b5", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6537506580352783, + "text": "The Uber Service activities are performed to satisfy our sole performance obligation in the transaction, which is to connect Drivers and Merchants with end-\n\nusers to facilitate the completion of a successful transaction.\n\nIn 2020, we began charging Mobility end-users a fee to use the platform in certain markets. In these transactions, in addition to a performance obligation to Drivers, we also have a performance obligation to end-users, which is to connect end-users to Drivers in the marketplace. We recognize revenue when a trip is complete. We present revenue on a net basis for these transactions, as we do not control the service provided by Drivers to end-users. For the years ended December 31, 2020 and 2021, we recognized total revenue of $323 million and $336 million, respectively, associated with these fees charged to end-users.", + "metadata": { + "version": "v0", + "chunk_order": 642, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 90, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "4b6ee3eb-daca-5930-bafe-946cad56cdcc", + "extraction_id": "eb2bc121-0b00-5f70-8eb6-549e1fb1ed72", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6387766599655151, + "text": "Other income (expense), net\n\n$\n\nDuring the year ended December 31, 2020, gain on business divestitures, net represented a $154 million gain on the sale of our Uber Eats India operations to Zomato recognized in the first quarter of 2020 and a $77 million gain on the sale of our European Freight Business to sennder GmbH (\u201cSennder\u201d) recognized in the fourth quarter of 2020, partially offset by a $27 million loss on the sale of our JUMP operations to Lime recognized in the second quarter of 2020.\n\n(1)\n\nDuring the year ended December 31, 2021, gain on business divestitures, net represented a $1.6 billion gain on the sale of our ATG Business to Aurora", + "metadata": { + "version": "v0", + "chunk_order": 799, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 118, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "af5f93d3-8b2f-5e71-a358-0dd56c2f68ac", + "extraction_id": "acf12622-2e6e-5234-9768-ba448294a81d", + "document_id": "2f576170-c4f9-5141-a910-a0924f341de4", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.632179401387013, + "text": "2019\n\n100.0 %\n\n60.2 17.6 41.6 22.5 32.8 174.7 (74.7) \u2014 2.8 (71.9) 0.1 (72.0)%\n\n2019 to 2020 % Change\n\n(35) %\n\nsecond quarter of 2021. These increases were offset by investments in driver supply by increasing driver incentives recorded as a reduction to revenue by $942.9 million in 2021 as compared to the prior year as rider demand outpaced driver supply during certain periods of the pandemic recovery in 2021. Revenue in 2020 was also higher in the first quarter of 2020 prior to the implementation of shelter-in-place orders and other travel restrictions across North America beginning March 2020.\n\nWe expect to see continued recovery in demand for our platform and the resulting positive impacts on revenue as there are more widespread immunity levels, more communities reopen and other restrictive travel and social distancing measures in response to COVID-19 are eased. However, we cannot predict the impact of COVID variants and the longer term impact of the pandemic on consumer behavior.\n\nCost of Revenue\n\n2021", + "metadata": { + "version": "v0", + "chunk_order": 493, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 63, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "d5379124-e7ff-509f-b47f-a79152eec2d4", + "extraction_id": "2562b865-e4df-5376-9e70-927be9afbb7e", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.63012705682194, + "text": "For additional discussion, see the risk factor titled \u201c\u2014If we are unable to attract or maintain a critical mass of Drivers, consumers, merchants, shippers, and carriers, whether as a result of competition or other factors, our platform will become less appealing to platform users, and our financial results would be adversely impacted.\u201d included in Part I, Item 1A of this Annual Report on Form 10-K as well our 2021 ESG Report and our 2021 People and Culture Report. The information in these reports is not a part of this Form 10-K.\n\nAdditional Information\n\nWe were founded in 2009 and incorporated as Ubercab, Inc., a Delaware corporation, in July 2010. In February 2011, we changed our name to Uber\n\nTechnologies, Inc. Our principal executive offices are located at 1515 3rd Street, San Francisco, California 94158, and our telephone number is (415) 612-8582.\n\n10", + "metadata": { + "version": "v0", + "chunk_order": 77, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 12, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "98a93be5-13ba-5bd6-9a18-e7ceef0fae88", + "extraction_id": "8ab931e3-8f47-5598-90b8-928f387ec256", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6285917009601995, + "text": "Uber Technologies, Inc. (\u201cUber,\u201d \u201cwe,\u201d \u201cour,\u201d or \u201cus\u201d) was incorporated in Delaware in July 2010, and is headquartered in San Francisco, California. Uber is a technology platform that uses a massive network, leading technology, operational excellence and product expertise to power movement from point A to point B. Uber develops and operates proprietary technology applications supporting a variety of offerings on its platform (\u201cplatform(s)\u201d or \u201cPlatform(s)\u201d). Uber connects consumers (\u201cRider(s)\u201d) with independent providers of ride services (\u201cMobility Driver(s)\u201d) for ridesharing services, and connects Riders and other consumers (\u201cEaters\u201d) with restaurants, grocers and other stores (collectively, \u201cMerchants\u201d) with delivery service providers (\u201cCouriers\u201d) for meal preparation, grocery and other delivery services. Riders and Eaters are collectively referred to as \u201cend-user(s)\u201d or \u201cconsumer(s).\u201d Mobility Drivers and Couriers are collectively referred to as \u201cDriver(s).\u201d Uber also connects consumers with public", + "metadata": { + "version": "v0", + "chunk_order": 592, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 84, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + }, + { + "fragment_id": "d87084d1-c52a-5a4b-96ad-9fc1cb98bfc5", + "extraction_id": "8361bf60-bce2-56c2-b982-376a75e47d58", + "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.6283430678639979, + "text": "Year Ended December 31, 2020\n\n1,000 49 1,189 (27) \u2014 (138) (34) 8,939 (4) 3,824\n\n247 125 2,628 (527) (891) (224) 38 1,379 (92) (4,327)\n\n8,209 34 12,067 $\n\n12,067 (349) 7,391 $\n\n332 $ 133\n\n412 $ 82\n\n14,224 4,229 \u2014 251 9 \u2014 \u2014\n\n\u2014 \u2014 \u2014 196 3,898 171 1,634\n\n2021\n\n675 107 1,484 (27) (307) (226) 101 1,780 (69) 65\n\n7,391 349 7,805\n\n449 87\n\n\u2014 \u2014 232 184 1,868 1,018 \u2014\n\nUBER TECHNOLOGIES, INC.\n\nNOTES TO CONSOLIDATED FINANCIAL STATEMENTS\n\nNote 1 \u2013 Description of Business and Summary of Significant Accounting Policies\n\nDescription of Business", + "metadata": { + "version": "v0", + "chunk_order": 591, + "document_type": "pdf", + "unstructured_filetype": "application/pdf", + "unstructured_languages": [ + "eng" + ], + "unstructured_page_number": 83, + "partitioned_by_unstructured": true, + "associated_query": "What was Uber's profit in 2020?" + } + } + ], + "kg_search_results": null + } + } + }, + "hybrid_rag": { + "results": { + "completion": { + "id": "chatcmpl-A9gpGNU67ZRKhvfOaZSOwJnzbVP2S", + "choices": [ + { + "finish_reason": "stop", + "index": 0, + "logprobs": null, + "message": { + "content": "John Snow is Ned Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations [1].", + "refusal": null, + "role": "assistant", + "function_call": null, + "tool_calls": null + } + } + ], + "created": 1726872938, + "model": "gpt-4o-2024-05-13", + "object": "chat.completion", + "service_tier": null, + "system_fingerprint": "fp_3537616b13", + "usage": { + "completion_tokens": 57, + "prompt_tokens": 1426, + "total_tokens": 1483, + "completion_tokens_details": { + "reasoning_tokens": 0 + } + } + }, + "search_results": { + "vector_search_results": [ + { + "fragment_id": "7cbdab86-1689-5779-81bd-62f7eb3ab36d", + "extraction_id": "866f85a0-b3d6-5fc5-9ca0-dbd2373eac58", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.2625822126865387, + "text": "Eddard (Ned) Stark\n\nThe Lord of Winterfell and new Hand of the King. A devoted father and dutiful lord, he is best characterized by his strong sense of honor, and he strives to always do what is right, regardless of his personal feelings.\n\nCatelyn (Cat) Tully\n\nNed\u2019s wife and Lady Stark of Winterfell. She is intelligent, strong, and fiercely devoted to her family, leading her to seek out the person responsible for trying to kill her son Bran.\n\nDaenerys Stormborn Targaryen\n\nThe Dothraki khaleesi (queen) and Targaryen princess. She and her brother are the only surviving members of the Targaryen family, and she grows from a frightened girl to a confident ruler, while still maintaining her kindness, over the course of the novel.\n\nJon Snow\n\nNed Stark\u2019s bastard son. Since Catelyn is not his mother, he is not a proper member of the Stark family, and he often feels himself an outsider. He is also a highly capable swordsman and thinker, with a knack for piercing observations.", + "metadata": { + "version": "v0", + "chunk_order": 0, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "fragment_id": "8e563fb1-4665-53a8-8a83-63a1f88e2aea", + "extraction_id": "f6bc23b5-bc80-5e49-9b55-25e9abe97073", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.2610678750594818, + "text": "Jeor Mormont (Commander Mormont)\n\nLord Commander of the Night\u2019s Watch at Castle Black. Commander Mormont is tough, old, and wise, and his men call him \u201cThe Old Bear.\u201d\n\nMaester Aemon\n\nThe chief man of learning at Castle Black. Despite his blind white eyes, Maester Aemon sees and speaks the truth in cryptic ways. Though few people realize it, Aemon is one of the few surviving members of the Targaryen family, but he has always put his vows to the Night\u2019s Watch ahead of any family loyalties.\n\nSamwell (Sam) Tarly\n\nA new recruit to the Night\u2019s Watch who is fat and cowardly but very smart. Sam loves to read and eat but hates to fight, and he quickly becomes one of Jon Snow\u2019s closest companions at the Wall.\n\nSer Allister Thorne\n\nCastle Black\u2019s resentful master-at-arms. He hard on the new recruits to the Night\u2019s Watch and seems to enjoy making them suffer, causing Jon to rebel against him. During Robert\u2019s rebellion against the former king, he was a Targaryen loyalist.", + "metadata": { + "version": "v0", + "chunk_order": 7, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "fragment_id": "eaf48cfe-592e-55fa-9f07-613a4f221c45", + "extraction_id": "fdf6127b-e623-58bc-a50b-b7e7b040c03a", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.2574058771133423, + "text": "Varys (The Spider)\n\nThe Red Keep\u2019s master of whispers and a eunuch. His role in the court is to run a network of spies and keep the king informed, and he often uses what he knows to manipulate those around him, including the king.\n\nRobert Baratheon\n\nThe corpulent king of Westeros. He loves to fight, drink, and sleep with women, and he hates the duties of ruling. He and Ned are long-time friends, and he was engaged to Ned\u2019s sister until she died.\n\nSer Jorah Mormont\n\nAn exiled knight who serves unofficially as Daenerys\u2019s chief advisor. Though he was exiled by Ned Stark for selling slaves, he is intelligent, valiant, and a great fighter. He swears allegiance to Viserys as true king of Westeros, but he also feeds information about the Targaryens back to Varys.", + "metadata": { + "version": "v0", + "chunk_order": 3, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "fragment_id": "3cb1c2db-01e4-5ea8-a39f-31f5949637f8", + "extraction_id": "02b64e7c-5aa5-5380-8fa0-3d8b64866aa8", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.20913409943187866, + "text": "Aerys II Targaryen\n\nKing of Westeros before Robert Baratheon. He was known as The Mad King because of his cruelty. Aerys murdered Ned\u2019s older brother, Brandon Stark, in the Red Keep\u2019s throne room. At the end of the war that followed, Jaime Lannister slew Aerys in the same room.\n\nRhaegar Targaryen\n\nThe heir to Aerys and older brother of Daenerys and Viserys. Rhaegar kidnapped Lyanna Stark, Robert\u2019s betrothed, helping to set in motion the events that led to Robert\u2019s Rebellion. The war effectively ended when Robert slew Rhaegar with his warhammer on the Trident River.\n\nJon Arryn\n\nThe recently deceased Lord of the Eyrie and Hand of the King. Jon Arryn fostered Ned Stark and Robert Baratheon at the Eyrie. When Robert became king, Jon Arryn served as his Hand until his murder.", + "metadata": { + "version": "v0", + "chunk_order": 10, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "fragment_id": "5d09c80f-ba83-5204-a6b3-f08831e150b0", + "extraction_id": "f86a905c-8d82-52ff-ad72-a800ca3af6f4", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1912464201450348, + "text": "Illyrio Mopatis\n\nAn obese merchant from the Free Cities who helps Daenerys and Viserys Targaryen. Illyrio is very rich and very well-informed. He is quick to please, especially when there is a possibility that his kindness will help him avoid trouble or gain greater fortune in the future.\n\nSer Barristan Selmy\n\nLord Commander of the Kingsguard. He has served kings Jaehaerys, Aerys II, and Robert. Though he has grown old, Barristan \u201cThe Bold\u201d is a formidable fighter. He is, and has always been, an honorable knight.\n\nRenly Baratheon\n\nThe youngest of the three Baratheon brothers. Renly is lighthearted and opportunistic, and unexpectedly ambitious. He serves on Robert\u2019s royal council.\n\nStannis Baratheon\n\nThe middle brother of the three Baratheons. Stannis does not appear in A Game of Thrones, but as the brother of the king, he is a potential heir to the throne. Stannis does not seem to be well-liked.", + "metadata": { + "version": "v0", + "chunk_order": 8, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + }, + { + "fragment_id": "d143b410-f9a4-5f3b-bb46-fb412eda8201", + "extraction_id": "37f940e2-18f4-50f5-93aa-cec422fc9211", + "document_id": "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "collection_ids": [], + "score": 0.1862841248512268, + "text": "Sandor (The Hound) Clegane\n\nPrince Joff\u2019s unofficial bodyguard. Proud that he is not a knight, The Hound appears to have no scruples whatsoever and does what Joffrey orders, however cruel or unjust, without question. His face is scarred on one side by extensive burning inflicted by his brother, Gregor.\n\nRobb Stark\n\nThe eldest Stark son and thus heir to Ned Stark. Though just fourteen, he is mature beyond his age as well as being brave and dutiful like his father.\n\nMaester Luwin\n\nCounselor to Ned, Catelyn, and Robb. Luwin is old and wise, and his advice proves indispensible to the Starks.\n\nTheon Greyjoy\n\nThe Starks\u2019s ward and Robb\u2019s best friend. Ned Stark took the young Theon, now nineteen, as a ward after putting down a rebellion led by the Greyjoy family, and Theon consequently grew up with the Stark children as something like a brother.", + "metadata": { + "version": "v0", + "chunk_order": 5, + "document_type": "txt", + "unstructured_filetype": "text/plain", + "unstructured_languages": [ + "eng" + ], + "partitioned_by_unstructured": true, + "associated_query": "Who is John Snow?" + } + } + ], + "kg_search_results": null + } + } + }, + "streaming_rag": { + "results": { + "completion": { + "choices": [ + { + "message": { + "content": "[{\"fragment_id\": \"94684f2d-fe60-5ba3-b1e8-0a921841bac9\", \"extraction_id\": \"fde39a49-00fc-5622-addd-13eb9c3bad4b\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6675721804840684, \"text\": \"Total Stockholders\\u2019 Equity (Deficit) 1,676,163\\n\\n5,184\\n\\n\\u2014\\n\\n(26,298)\\n\\n28,637\\n\\n(1) 721,710 (2,038) (1,009,359) 1,393,998\\n\\nLyft, Inc. Consolidated Statements of Cash Flows (in thousands)\\n\\n2021\\n\\nCash flows from operating activities Net loss Adjustments to reconcile net loss to net cash used in operating activities\\n\\n$\\n\\n(1,009,359)\\n\\nDepreciation and amortization Stock-based compensation Amortization of premium on marketable securities Accretion of discount on marketable securities Amortization of debt discount and issuance costs Deferred income tax from convertible senior notes Loss on sale and disposal of assets, net Gain on divestiture Other Changes in operating assets and liabilities, net effects of acquisition\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 572, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 82, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"0d5c5803-8846-59d4-8ae3-3696b718f162\", \"extraction_id\": \"fa3d2549-593a-5a80-88a2-b2d031d79771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6647443571534382, \"text\": \"79\\n\\n2019 3,615,960\\n\\n2,176,469 636,116 1,505,640 814,122 1,186,093 6,318,440 (2,702,480) \\u2014 102,595 (2,599,885) 2,356 (2,602,241)\\n\\n(11.44)\\n\\n227,498\\n\\n81,321 75,212 971,941 72,046 398,791\\n\\nLyft, Inc. Consolidated Statements of Comprehensive Loss (in thousands)\\n\\nNet loss Other comprehensive income (loss)\\n\\n$\\n\\nYear Ended December 31, 2020 (1,752,857) $\\n\\n2021 (1,009,359) $\\n\\nForeign currency translation adjustment Unrealized gain (loss) on marketable securities, net of taxes\\n\\nOther comprehensive income (loss)\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 567, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 79, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"5d663f0a-c9a9-580f-818b-5ca0a1ca73f2\", \"extraction_id\": \"12916d2c-0691-528c-86aa-6784c1f35c55\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6628832616140414, \"text\": \"Overview\\n\\nLyft, Inc (the \\u201cCompany\\u201d or \\u201cLyft\\u201d) started a movement to revolutionize transportation. In 2012, we launched our peer-to-peer marketplace for on-demand ridesharing and have continued to pioneer innovations aligned with our mission. Today, Lyft is one of the largest multimodal transportation networks in the United States and Canada.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 16, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"7133acca-f147-5f43-b2e0-71228282fda0\", \"extraction_id\": \"212d8290-564d-5039-93cc-00cea31a1771\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6603014862717096, \"text\": \"Revenues from Contracts with Customers (ASC 606)\\n\\nWe generate substantially all our revenue from our ridesharing marketplace that connects drivers and riders. We recognize revenue from fees paid by drivers for use of our Lyft Platform offerings in accordance with ASC 606 as described in Note 2 of the notes to our consolidated financial statements. Drivers enter into terms of service (\\u201cToS\\u201d) with us in order to use our Lyft Driver App.\\n\\n58\\n\\n2019 to 2020 % Change\\n\\n19.0% (1.8)% (6.7)% 2.3%\\n\\nWe provide a service to drivers to complete a successful transportation service for riders. This service includes on-demand lead generation that assists drivers to find, receive and fulfill on-demand requests from riders seeking transportation services and related collection activities using our Lyft Platform. As a result, our single performance obligation in the transaction is to connect drivers with riders to facilitate the completion of a successful transportation service for riders.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 459, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 58, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"1a76991f-fa85-59b2-b522-700d47b2d809\", \"extraction_id\": \"0773cd62-b39f-517f-b6f1-be788b38374d\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6477650017878516, \"text\": \"Corporate Information\\n\\nWe were incorporated in 2007 as Bounder Web, Inc., a Delaware corporation. In 2008, we changed our name to Zimride, Inc. We founded Lyft in 2012 and\\n\\nchanged our name to Lyft, Inc. in 2013 when we sold the assets related to our Zimride operations.\\n\\n13\\n\\nAvailable Information\\n\\nOur website is located at www.lyft.com, and our investor relations website is located at investor.lyft.com. Copies of our Annual Report on Form 10-K, Quarterly Reports on Form 10-Q, Current Reports on Form 8-K and amendments to these reports filed or furnished pursuant to Section 13(a) or 15(d) of the Exchange Act, as amended, are available free of charge on our investor relations website as soon as reasonably practicable after we file such material electronically with or furnish it to the Securities and Exchange Commission (the \\u201cSEC\\u201d). The SEC also maintains a website that contains our SEC filings at www.sec.gov.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 82, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 13, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"17673edc-6fb7-577d-9bca-457c5745382d\", \"extraction_id\": \"bde94416-baaa-573a-9bc7-86ddf28535b1\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6406270265579269, \"text\": \"We generate substantially all of our revenue from our ridesharing marketplace that connects drivers and riders. We collect service fees and commissions from drivers for their use of our ridesharing marketplace. As drivers accept more rider leads and complete more rides, we earn more revenue. We also generate revenue from riders renting Light Vehicles, drivers renting vehicles through Express Drive, Lyft Rentals renters, Lyft Driver Center and Lyft Auto Care users, and by making our ridesharing marketplace available to organizations through our Lyft Business offerings, such as our Concierge and Corporate Business Travel programs. In the second quarter of 2021, we began generating revenues from licensing and data access agreements, primarily with third-party autonomous vehicle companies.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 20, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 5, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"8f6299b7-4582-5bac-8c74-7ca57714aefa\", \"extraction_id\": \"310e9e1f-25d3-5287-a905-5446f661d6da\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6395331787506555, \"text\": \"Revenue Recognition\\n\\nThe Company generates its revenue from its multimodal transportation networks that offer access to a variety of transportation options through the Lyft Platform and mobile-based applications. Substantially all of the Company\\u2019s revenue is generated from its ridesharing marketplace that connects drivers and riders and is recognized in accordance with Accounting Standards Codification Topic 606 (\\u201cASC 606\\u201d). In addition, the Company generates revenue in accordance with ASC 606 from licensing and data access, primarily with third-party autonomous vehicle companies. The Company also generates rental revenue from Flexdrive, its network of Light Vehicles and Lyft Rentals, which is recognized in accordance with Accounting Standards Codification Topic 842 (\\u201cASC 842\\u201d).\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 591, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 86, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"ff837ea0-0062-59ca-bb4f-aa7a1c9cecd0\", \"extraction_id\": \"41e4db8a-0478-5015-8263-cde0618ec626\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6308713775353904, \"text\": \"Light Vehicle Rider and Lyft Rentals Renter Incentives\\n\\nIncentives offered to Light Vehicle riders and Lyft Rentals renters were not material for the years ended December 31, 2021 and 2020.\\n\\nFor the years ended December 31, 2021, 2020 and 2019, in relation to the driver, rider, Light Vehicle riders and Lyft Rentals renters incentive programs, the Company recorded $1.3 billion, $390.8 million and $560.3 million as a reduction to revenue and $64.7 million, $135.0 million and $381.5 million as sales and marketing expense, respectively.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 611, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 89, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"c24d144b-c22d-5c08-876f-a03e43620aa4\", \"extraction_id\": \"2a4caab0-6193-5263-8eab-c7763e8f38e8\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6294938325882002, \"text\": \"Software Development Costs\\n\\nThe Company incurs costs related to developing the Lyft Platform and related support systems. The Company capitalizes development costs related to the Lyft Platform and related support systems once the preliminary project stage is complete and it is probable that the project will be completed and the software will be used to perform the function intended. The Company capitalized $16.2 million and $12.8 million of software development costs during the year ended December 31, 2021 and 2020, respectively. For the year ended December 31, 2019, capitalized software development costs was not material.\\n\\nInsurance Reserves\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 649, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 94, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}, {\"fragment_id\": \"83985ff5-653f-53eb-b137-f616b4292f51\", \"extraction_id\": \"0919e3d5-03b0-5d54-b5f3-7f6ad4534412\", \"document_id\": \"2f576170-c4f9-5141-a910-a0924f341de4\", \"user_id\": \"2acb499e-8428-543b-bd85-0d9098718220\", \"collection_ids\": [], \"score\": 0.6223346365889701, \"text\": \"32.1\\u2020\\n\\nCertifications of Principal Executive Officer and Principal Financial Officer pursuant to 18 U.S.C. Section 1350, as adopted pursuant to Section 906 of the Sarbanes-Oxley Act of 2002.\\n\\n101\\n\\nThe following financial information from Lyft, Inc.\\u2019s Annual Report on Form 10-K for the fiscal year ended December 31, 2021 formatted in Inline XBRL (eXtensible Business Reporting Language): (i) Consolidated Statements of Operations for the fiscal years ended December 31, 2021, 2020 and 2019; (ii) Consolidated Statements of Comprehensive Income (Loss) for the fiscal years ended December 31, 2021, 2020, and 2019; (iii) Consolidated Balance Sheets as of December 31, 2021 and 2020; (iv) Consolidated Statements of Cash Flows for the fiscal years ended December 31, 2021, 2020, and 2019; (v) Consolidated Statements of Redeemable Convertible Preferred Stock and Stockholders\\u2019 Equity for the fiscal years ended December 31, 2021, 2020, and 2019; and (vi) Notes to the Consolidated Financial Statements.\", \"metadata\": {\"version\": \"v0\", \"chunk_order\": 817, \"document_type\": \"pdf\", \"unstructured_filetype\": \"application/pdf\", \"unstructured_languages\": [\"eng\"], \"unstructured_page_number\": 127, \"partitioned_by_unstructured\": true, \"associated_query\": \"What was Lyft's profit in 2020?\"}}]Lyft's profit in 2020 was a net loss of $1,752,857,000 [2]." + } + } + ] + } + } + } +} diff --git a/py/tests/regression/observed_outputs/test_user_management.json b/py/tests/regression/observed_outputs/test_user_management.json new file mode 100644 index 000000000..9b334e838 --- /dev/null +++ b/py/tests/regression/observed_outputs/test_user_management.json @@ -0,0 +1,139 @@ +{ + "register_user": { + "results": { + "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", + "email": "test_9e56903b@example.com", + "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:58:45.796169Z", + "updated_at": "2024-09-20T22:58:45.796169Z", + "is_verified": false, + "collection_ids": [], + "hashed_password": "$2b$12$febFeILwiLIv21raMwdHGuNNl3zKXbxymHlnqMw/BUEvtAzdHa1OK", + "verification_code_expiry": null, + "name": null, + "bio": null, + "profile_picture": null + } + }, + "login_user": { + "results": { + "access_token": { + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2NzI2LjA2ODg5NiwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.smZcUPGlas4ql_OFpxa9uaZwIEagcERBMuemGIAerh0", + "token_type": "access" + }, + "refresh_token": { + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3OTI2LCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.u_TcrN_i0KTWRPwkb1-jsbKjJbsu8T6I3h0m96pwIx4", + "token_type": "refresh" + } + } + }, + "user_info": { + "results": { + "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", + "email": "test_9e56903b@example.com", + "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:58:45.796169Z", + "updated_at": "2024-09-20T22:58:45.796169Z", + "is_verified": true, + "collection_ids": [], + "hashed_password": "$2b$12$febFeILwiLIv21raMwdHGuNNl3zKXbxymHlnqMw/BUEvtAzdHa1OK", + "verification_code_expiry": null, + "name": null, + "bio": null, + "profile_picture": null + } + }, + "change_password": { + "results": { + "message": "Password changed successfully" + } + }, + "update_profile": { + "results": { + "id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", + "email": "test_9e56903b@example.com", + "is_active": true, + "is_superuser": false, + "created_at": "2024-09-20T22:58:45.796169Z", + "updated_at": "2024-09-20T22:58:46.618261Z", + "is_verified": true, + "collection_ids": [], + "hashed_password": null, + "verification_code_expiry": null, + "name": "John Doe", + "bio": "R2R enthusiast", + "profile_picture": null + } + }, + "refresh_token": { + "results": { + "access_token": { + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI2ODc2NzI2LjY0MzM0NSwidG9rZW5fdHlwZSI6ImFjY2VzcyJ9.UYcp5cwnOvEtCNm-nqQzI8TWw7mmVn_cSEcEDtTltLI", + "token_type": "access" + }, + "refresh_token": { + "token": "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9.eyJzdWIiOiJ0ZXN0XzllNTY5MDNiQGV4YW1wbGUuY29tIiwiZXhwIjoxNzI3NDc3OTI2LCJ0b2tlbl90eXBlIjoicmVmcmVzaCJ9.u_TcrN_i0KTWRPwkb1-jsbKjJbsu8T6I3h0m96pwIx4", + "token_type": "refresh" + } + } + }, + "superuser_test": { + "results": [ + { + "user_id": "2acb499e-8428-543b-bd85-0d9098718220", + "email": "admin@example.com", + "is_superuser": true, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:49:55.902324Z", + "updated_at": "2024-09-20T22:49:55.902324Z", + "collection_ids": [], + "num_files": 15, + "total_size_in_bytes": 8862496, + "document_ids": [ + "3e157b3a-8469-51db-90d9-52e7d896b49b", + "30f950f0-c692-57c5-b6ec-ff78ccf5ccdc", + "7b0f40c5-2ace-5781-ae35-ead99ddee8c5", + "451adbbd-e24e-5c9b-80a8-f424b6c7accb", + "e797da22-8c5d-54e5-bed5-a55954cf6bf9", + "c3291abf-8a4e-5d9d-80fd-232ef6fd8526", + "5b1bd54f-4d70-56b9-a017-a618bc75f94c", + "716fea3a-826b-5b27-8e59-ffbd1a35455a", + "57eec3df-cf68-5559-a80d-ae3fb55b9af1", + "01d514a4-5cb1-5c86-be8c-0c5c312a02c9", + "d421207a-d799-5806-8d67-46b2005b15d4", + "db02076e-989a-59cd-98d5-e24e15a0bd27", + "52e12576-090f-59db-91f4-6d4b2e29ae6c", + "7c1105fc-8f62-5a8f-ac81-fe88f3ec9e4c", + "2f576170-c4f9-5141-a910-a0924f341de4" + ] + }, + { + "user_id": "05f95944-be0f-5913-b04c-0eaa5cf45a39", + "email": "test_9e56903b@example.com", + "is_superuser": false, + "is_active": true, + "is_verified": true, + "created_at": "2024-09-20T22:58:45.796169Z", + "updated_at": "2024-09-20T22:58:46.618261Z", + "collection_ids": [], + "num_files": 0, + "total_size_in_bytes": 0, + "document_ids": [] + } + ], + "total_entries": 2 + }, + "logout": { + "results": { + "message": "Logged out successfully" + } + }, + "delete_account": { + "results": { + "message": "User account 24d5aa60-d6c5-5360-af03-f178e12b8722 deleted successfully." + } + } +} diff --git a/py/tests/regression/runner.py b/py/tests/regression/runner.py index d7e0d1796..355a3ffa3 100644 --- a/py/tests/regression/runner.py +++ b/py/tests/regression/runner.py @@ -20,11 +20,11 @@ def __init__( self.client = R2RClient(base_url=base_url) self.tests: List[BaseTest] = [] self.test_order = [ - # "TestDocumentManagement", - # "TestRetrieval", + "TestDocumentManagement", + "TestRetrieval", "TestUserManagement", # "TestObservability", - # "TestGroupManagement", + "TestGroupManagement", ] self.check_only = check_only self.update_expected = update_expected diff --git a/py/tests/regression/test_cases/__init__.py b/py/tests/regression/test_cases/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/py/tests/regression/test_cases/test_document_management.py b/py/tests/regression/test_cases/test_document_management.py index bf3a5ff19..7079408a7 100644 --- a/py/tests/regression/test_cases/test_document_management.py +++ b/py/tests/regression/test_cases/test_document_management.py @@ -1,4 +1,5 @@ import os +import time from core import R2RException from tests.regression.test_cases.base import BaseTest @@ -25,7 +26,7 @@ def get_test_cases(self): client ), "reingest_sample_file": lambda client: self.ingest_sample_files_test( - client + client, do_sleep=False ), "documents_overview": lambda client: self.documents_overview_test( client @@ -36,10 +37,13 @@ def get_test_cases(self): "update_document_test": lambda client: self.update_document_test( client ), + "rerun_documents_overview_test_1": lambda client: self.documents_overview_test( + client + ), "delete_document_test": lambda client: self.delete_document_test( client ), - "rerun_documents_overview_test": lambda client: self.documents_overview_test( + "rerun_documents_overview_test_2": lambda client: self.documents_overview_test( client ), "rerun_document_chunks_test": lambda client: self.document_chunks_test( @@ -47,24 +51,27 @@ def get_test_cases(self): ), } - def ingest_sample_files_test(self, client): + def ingest_sample_files_test(self, client, do_sleep=True): file_path = os.path.abspath(__file__) data_path = os.path.join( os.path.dirname(file_path), "..", "..", "..", - "r2r", + "core", "examples", "data", ) try: - return client.ingest_files( + result = client.ingest_files( [ os.path.join(data_path, file_name) for file_name in os.listdir(data_path) ] ) + if do_sleep: + time.sleep(300) + return result except R2RException as e: return {"results": str(e)} @@ -92,7 +99,7 @@ def update_document_test(self, client): "..", "..", "..", - "r2r", + "core", "examples", "data", "aristotle_v2.txt", @@ -100,6 +107,7 @@ def update_document_test(self, client): update_response = client.update_files( [file_path], [TestDocumentManagement.UPDATE_FILE_ID] ) + time.sleep(20) return update_response except R2RException as e: return {"results": str(e)} diff --git a/py/tests/regression/test_cases/test_user_management.py b/py/tests/regression/test_cases/test_user_management.py index 0c6412f60..c405a7511 100644 --- a/py/tests/regression/test_cases/test_user_management.py +++ b/py/tests/regression/test_cases/test_user_management.py @@ -64,19 +64,19 @@ def __init__(self, client): def get_test_cases(self): return { "register_user": lambda client: self.register_user_test(client), - # "login_user": lambda client: self.login_user_test(client), - # "user_info": lambda client: self.user_info_test(client), - # "change_password": lambda client: self.change_password_test( - # client - # ), - # # "reset_password": lambda client: self.reset_password_test(client), - # "update_profile": lambda client: self.update_profile_test(client), - # "refresh_token": lambda client: self.refresh_token_test(client), - # "superuser_test": lambda client: self.superuser_test(client), - # "logout": lambda client: self.logout_test(client), - # "delete_account": lambda client: self.delete_user_test(client), - # "login_user": lambda client: self.login_user_test(client), - # "refresh_token": lambda client: self.refresh_token_test(client), + "login_user": lambda client: self.login_user_test(client), + "user_info": lambda client: self.user_info_test(client), + "change_password": lambda client: self.change_password_test( + client + ), + # "reset_password": lambda client: self.reset_password_test(client), + "update_profile": lambda client: self.update_profile_test(client), + "refresh_token": lambda client: self.refresh_token_test(client), + "superuser_test": lambda client: self.superuser_test(client), + "logout": lambda client: self.logout_test(client), + "delete_account": lambda client: self.delete_user_test(client), + "login_user": lambda client: self.login_user_test(client), + "refresh_token": lambda client: self.refresh_token_test(client), } def register_user_test(self, client): diff --git a/py/tests/test_abstractions.py b/py/tests/test_abstractions.py deleted file mode 100644 index b9e81ad1d..000000000 --- a/py/tests/test_abstractions.py +++ /dev/null @@ -1,243 +0,0 @@ -import asyncio -import uuid -from datetime import datetime - -import pytest - -from core import ( - AsyncPipe, - AsyncState, - Prompt, - Vector, - VectorEntry, - VectorSearchResult, - VectorType, - generate_id_from_label, -) -from core.base.abstractions.completion import CompletionRecord, MessageType -from core.base.abstractions.search import AggregateSearchResult - - -@pytest.fixture(scope="session", autouse=True) -def event_loop_policy(): - asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -@pytest.fixture(scope="function", autouse=True) -async def cleanup_tasks(): - yield - tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - [task.cancel() for task in tasks] - await asyncio.gather(*tasks, return_exceptions=True) - - -@pytest.mark.asyncio -async def test_async_state_update_and_get(): - state = AsyncState() - outer_key = "test_key" - values = {"inner_key": "value"} - await state.update(outer_key, values) - result = await state.get(outer_key, "inner_key") - assert result == "value" - - -@pytest.mark.asyncio -async def test_async_state_delete(): - state = AsyncState() - outer_key = "test_key" - values = {"inner_key": "value"} - await state.update(outer_key, values) - await state.delete(outer_key, "inner_key") - result = await state.get(outer_key, "inner_key") - assert result == {}, "Expect empty result after deletion" - - -class MockAsyncPipe(AsyncPipe): - async def _run_logic(self, input, state, run_id, *args, **kwargs): - yield "processed" - - -@pytest.mark.asyncio -async def test_async_pipe_run(): - pipe = MockAsyncPipe() - - async def list_to_generator(lst): - for item in lst: - yield item - - input = pipe.Input(message=list_to_generator(["test"])) - state = AsyncState() - try: - async_generator = await pipe.run(input, state) - results = [result async for result in async_generator] - assert results == ["processed"] - except asyncio.CancelledError: - pass # Task cancelled as expected - - -def test_prompt_initialization_and_formatting(): - prompt = Prompt( - name="greet", template="Hello, {name}!", input_types={"name": "str"} - ) - formatted = prompt.format_prompt({"name": "Alice"}) - assert formatted == "Hello, Alice!" - - -def test_prompt_missing_input(): - prompt = Prompt( - name="greet", template="Hello, {name}!", input_types={"name": "str"} - ) - with pytest.raises(ValueError): - prompt.format_prompt({}) - - -def test_prompt_invalid_input_type(): - prompt = Prompt( - name="greet", template="Hello, {name}!", input_types={"name": "int"} - ) - with pytest.raises(TypeError): - prompt.format_prompt({"name": "Alice"}) - - -def test_vector_fixed_length_validation(): - with pytest.raises(ValueError): - Vector(data=[1.0, 2.0], type=VectorType.FIXED, length=3) - - -def test_message_type_enum(): - assert str(MessageType.SYSTEM) == "system" - assert str(MessageType.USER) == "user" - assert str(MessageType.ASSISTANT) == "assistant" - assert str(MessageType.FUNCTION) == "function" - assert str(MessageType.TOOL) == "tool" - - -def test_completion_record_initialization(): - record = CompletionRecord( - message_id=uuid.uuid4(), - message_type=MessageType.USER, - search_query="test query", - llm_response="test response", - ) - assert isinstance(record.message_id, uuid.UUID) - assert record.message_type == MessageType.USER - assert isinstance(record.timestamp, datetime) - assert record.search_query == "test query" - assert record.llm_response == "test response" - - -def test_completion_record_optional_fields(): - record = CompletionRecord( - message_id=uuid.uuid4(), message_type=MessageType.SYSTEM - ) - assert record.feedback is None - assert record.score is None - assert record.completion_start_time is None - assert record.completion_end_time is None - assert record.search_query is None - assert record.search_results is None - assert record.llm_response is None - - -def test_completion_record_to_dict(): - search_results = AggregateSearchResult(vector_search_results=[]) - record = CompletionRecord( - message_id=uuid.uuid4(), - message_type=MessageType.ASSISTANT, - feedback=["Good"], - score=[0.9], - completion_start_time=datetime(2023, 1, 1, 12, 0), - completion_end_time=datetime(2023, 1, 1, 12, 1), - search_query="test", - search_results=search_results, - llm_response="Response", - ) - record_dict = record.to_dict() - - assert isinstance(record_dict["message_id"], str) - assert record_dict["message_type"] == "assistant" - assert isinstance(record_dict["timestamp"], str) - assert record_dict["feedback"] == ["Good"] - assert record_dict["score"] == [0.9] - assert record_dict["completion_start_time"] == "2023-01-01T12:00:00" - assert record_dict["completion_end_time"] == "2023-01-01T12:01:00" - assert record_dict["search_query"] == "test" - assert isinstance(record_dict["search_results"], dict) - assert record_dict["llm_response"] == "Response" - - -def test_completion_record_to_json(): - record = CompletionRecord( - message_id=uuid.uuid4(), - message_type=MessageType.FUNCTION, - llm_response="JSON test", - ) - json_str = record.to_json() - assert isinstance(json_str, str) - - import json - - parsed_dict = json.loads(json_str) - assert parsed_dict["message_type"] == "function" - assert parsed_dict["llm_response"] == "JSON test" - - -@pytest.mark.parametrize("message_type", list(MessageType)) -def test_completion_record_all_message_types(message_type): - record = CompletionRecord( - message_id=uuid.uuid4(), message_type=message_type - ) - assert record.message_type == message_type - - -def test_completion_record_serialization_with_none_values(): - record = CompletionRecord( - message_id=uuid.uuid4(), message_type=MessageType.TOOL - ) - record_dict = record.to_dict() - for field in [ - "feedback", - "score", - "completion_start_time", - "completion_end_time", - "search_query", - "search_results", - "llm_response", - ]: - assert record_dict[field] is None - - -def test_completion_record_with_complex_search_results(): - search_result = VectorSearchResult( - fragment_id=uuid.uuid4(), - extraction_id=uuid.uuid4(), - document_id=uuid.uuid4(), - user_id=uuid.uuid4(), - group_ids=[], - score=0.95, - text="Sample text", - metadata={"key": "value"}, - ) - aggregate_result = AggregateSearchResult( - vector_search_results=[search_result] - ) - record = CompletionRecord( - message_id=uuid.uuid4(), - message_type=MessageType.USER, - search_results=aggregate_result, - ) - record_dict = record.to_dict() - assert isinstance(record_dict["search_results"], dict) - assert isinstance( - record_dict["search_results"]["vector_search_results"], list - ) - assert len(record_dict["search_results"]["vector_search_results"]) == 1 - result = record_dict["search_results"]["vector_search_results"][0] - assert result["score"] == 0.95 - assert result["metadata"] == {"key": "value"} - assert "fragment_id" in result - assert "extraction_id" in result - assert "document_id" in result - assert "user_id" in result - assert result["text"] == "Sample text" - assert result["group_ids"] == [] diff --git a/py/tests/test_auth.py b/py/tests/test_auth.py deleted file mode 100644 index 00471b96e..000000000 --- a/py/tests/test_auth.py +++ /dev/null @@ -1,648 +0,0 @@ -# import random -# from datetime import datetime, timedelta, timezone -# from unittest.mock import Mock, patch - -# import pytest - -# from core import ( -# AuthConfig, -# BCryptConfig, -# BCryptProvider, -# DatabaseConfig, -# PostgresDBProvider, -# R2RAuthProvider, -# R2RException, -# ) -# from core.main.services import AuthService - - -# # Fixture for PostgresDBProvider -# @pytest.fixture -# def pg_vector_db(): -# random_collection_name = ( -# f"test_collection_{random.randint(0, 1_000_000_000)}" -# ) -# config = DatabaseConfig.create( -# provider="postgres", vecs_collection=random_collection_name -# ) -# db = PostgresDBProvider( -# config, crypto_provider=BCryptProvider(BCryptConfig()), dimension=3 -# ) -# yield db -# # Teardown -# db.vx.delete_collection(db.config.vecs_collection) - - -# @pytest.fixture -# def auth_config(): -# return AuthConfig( -# secret_key="wNFbczH3QhUVcPALwtWZCPi0lrDlGV3P1DPRVEQCPbM", -# access_token_lifetime_in_minutes=30, -# refresh_token_lifetime_in_days=7, -# require_email_verification=True, -# ) - - -# @pytest.fixture -# def auth_provider(auth_config, pg_vector_db): -# return R2RAuthProvider( -# auth_config, -# crypto_provider=BCryptProvider(BCryptConfig()), -# db_provider=pg_vector_db, -# ) - - -# @pytest.fixture -# def mock_email_provider(): -# mock_email = Mock() -# mock_email.send_verification_email = Mock() -# return mock_email - - -# @pytest.fixture -# def auth_service(auth_provider, auth_config, pg_vector_db): -# # Mock other necessary components for AuthService -# mock_providers = Mock() -# mock_providers.auth = auth_provider -# mock_providers.database = pg_vector_db -# mock_providers.email = mock_email_provider -# mock_pipelines = Mock() -# mock_run_manager = Mock() -# mock_logging_connection = Mock() -# mock_assistants = Mock() - -# return AuthService( -# config=Mock(auth=auth_config), -# providers=mock_providers, -# pipelines=mock_pipelines, -# run_manager=mock_run_manager, -# agents=mock_assistants, -# logging_connection=mock_logging_connection, -# ) - - -# @pytest.mark.asyncio -# async def test_create_user(auth_service, auth_provider): -# new_user = await auth_service.register( -# email="create@example.com", password="password123" -# ) -# assert new_user.email == "create@example.com" -# assert not new_user.is_verified -# fetched_user = auth_provider.db_provider.relational.get_user_by_email( -# new_user.email -# ) -# assert fetched_user.email == new_user.email -# assert fetched_user.is_verified == new_user.is_verified -# assert fetched_user.hashed_password == new_user.hashed_password -# assert fetched_user.is_active == new_user.is_active - - -# @pytest.mark.asyncio -# async def test_verify_user(auth_service, auth_provider): -# # Mock the generate_verification_code method to return a known value -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="verify@example.com", password="password123" -# ) - -# # mock verification -# assert new_user.email == "verify@example.com" -# assert not new_user.is_verified - -# # Verify the user using the known verification code -# verification = auth_provider.verify_email("123456") -# assert verification["message"] == "Email verified successfully" - -# # Check that the user is now verified -# response = auth_provider.db_provider.relational.get_user_by_email( -# "verify@example.com" -# ) -# assert response.is_verified -# assert response.email == "verify@example.com" - - -# @pytest.mark.asyncio -# async def test_login_success(auth_service, auth_provider): -# # Register a new user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="login_test@example.com", password="correct_password" -# ) - -# # Verify the user -# auth_provider.verify_email("123456") - -# # Attempt login with correct password -# login_result = await auth_service.login( -# "login_test@example.com", "correct_password" -# ) - -# assert "access_token" in login_result -# assert "refresh_token" in login_result -# assert login_result["access_token"].token_type == "access" -# assert login_result["refresh_token"].token_type == "refresh" - - -# @pytest.mark.asyncio -# async def test_login_failure_wrong_password(auth_service, auth_provider): -# # Register a new user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="login_fail@example.com", password="correct_password" -# ) - -# # Verify the user -# auth_provider.verify_email("123456") - -# # Attempt login with incorrect password -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("login_fail@example.com", "wrong_password") - -# assert exc_info.value.status_code == 401 -# assert exc_info.value.message == "Incorrect email or password" - - -# @pytest.mark.asyncio -# async def test_login_failure_unverified_user(auth_service, auth_provider): -# # Register a new user but don't verify -# await auth_service.register( -# email="unverified@example.com", password="password123" -# ) - -# # Attempt login with correct password but unverified account -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("unverified@example.com", "password123") - -# assert exc_info.value.status_code == 401 -# assert exc_info.value.message == "Email not verified" - - -# @pytest.mark.asyncio -# async def test_login_failure_nonexistent_user(auth_service): -# # Attempt login with non-existent user -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("nonexistent@example.com", "password123") - -# assert exc_info.value.status_code == 404 -# assert exc_info.value.message == "User not found" - - -# @pytest.mark.asyncio -# async def test_login_with_non_existent_user(auth_service): -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("nonexistent@example.com", "password123") -# assert "User not found" in str(exc_info.value) - - -# @pytest.mark.asyncio -# async def test_verify_email_with_expired_code(auth_service, auth_provider): -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="verify_expired@example.com", password="password123" -# ) - -# # Get the verification code - -# # Manually expire the verification code -# auth_provider.db_provider.relational.expire_verification_code( -# new_user.id -# ) - -# with pytest.raises(R2RException) as exc_info: -# await auth_service.verify_email( -# "verify_expired@example.com", "123456" -# ) -# assert "Invalid or expired verification code" in str(exc_info.value) - - -# @pytest.mark.asyncio -# async def test_refresh_token_flow(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="refresh@example.com", password="password123" -# ) - -# await auth_service.verify_email("refresh@example.com", "123456") - -# # Login to get initial tokens -# tokens = await auth_service.login("refresh@example.com", "password123") -# initial_access_token = tokens["access_token"] -# refresh_token = tokens["refresh_token"] - -# # Use refresh token to get new access token -# new_tokens = await auth_service.refresh_access_token(refresh_token.token) -# assert "access_token" in new_tokens -# assert new_tokens["access_token"].token != initial_access_token.token - - -# @pytest.mark.asyncio -# async def test_get_current_user_with_expired_token( -# auth_service, auth_provider -# ): -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="expired_token@example.com", password="password123" -# ) - -# await auth_service.verify_email("expired_token@example.com", "123456") - -# # Manually expire the token -# auth_provider.access_token_lifetime_in_minutes = ( -# -1 -# ) # This will create an expired token -# auth_provider.refresh_token_lifetime_in_days = ( -# -1 -# ) # This will create an expired token - -# tokens = await auth_service.login( -# "expired_token@example.com", "password123" -# ) -# access_token = tokens["refresh_token"] - -# with pytest.raises(R2RException) as exc_info: -# result = await auth_service.user(access_token.token) -# assert "Token has expired" in str(exc_info.value) - -# # Reset the token lifetime -# auth_provider.access_token_lifetime_in_minutes = 30 - - -# @pytest.mark.asyncio -# async def test_change_password(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="change_password@example.com", password="old_password" -# ) -# await auth_service.verify_email("change_password@example.com", "123456") - -# # Change password -# await auth_service.change_password( -# new_user, "old_password", "new_password" -# ) - -# # Try logging in with old password -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("change_password@example.com", "old_password") -# assert exc_info.value.status_code == 401 - -# # Login with new password -# login_result = await auth_service.login( -# "change_password@example.com", "new_password" -# ) -# assert "access_token" in login_result - - -# @pytest.mark.asyncio -# async def test_reset_password_flow( -# auth_service, auth_provider, mock_email_provider -# ): -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="reset_password@example.com", password="old_password" -# ) -# await auth_service.verify_email("reset_password@example.com", "123456") - -# # Request password reset -# await auth_service.request_password_reset("reset_password@example.com") - -# # Verify that an email was "sent" -# # mock_email_provider.send_reset_email.assert_called_once() - -# # Mock getting the reset token from the email -# reset_token = "mocked_reset_token" -# with patch.object( -# auth_provider.db_provider.relational, -# "get_user_id_by_reset_token", -# return_value=new_user.id, -# ): -# # Confirm password reset -# await auth_service.confirm_password_reset(reset_token, "new_password") - -# # Try logging in with old password -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("reset_password@example.com", "old_password") -# assert exc_info.value.status_code == 401 - -# # Login with new password -# login_result = await auth_service.login( -# "reset_password@example.com", "new_password" -# ) -# assert "access_token" in login_result - - -# @pytest.mark.asyncio -# async def test_logout(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="logout@example.com", password="password123" -# ) -# await auth_service.verify_email("logout@example.com", "123456") - -# # Login to get tokens -# tokens = await auth_service.login("logout@example.com", "password123") -# access_token = tokens["access_token"].token - -# # Logout -# await auth_service.logout(access_token) - -# # Try to use the logged out token -# with pytest.raises(R2RException) as exc_info: -# await auth_service.user(access_token) -# assert exc_info.value.status_code == 401 - - -# @pytest.mark.asyncio -# async def test_update_user_profile(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="update_profile@example.com", password="password123" -# ) -# await auth_service.verify_email("update_profile@example.com", "123456") - -# # Update user profile -# updated_profile = await auth_service.update_user( -# new_user.id, -# name="John Doe", -# bio="Test bio", -# profile_picture="http://example.com/pic.jpg", -# ) -# assert updated_profile.name == "John Doe" -# assert updated_profile.bio == "Test bio" -# assert updated_profile.profile_picture == "http://example.com/pic.jpg" - - -# @pytest.mark.asyncio -# async def test_delete_user_account(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="delete_user@example.com", password="password123" -# ) -# await auth_service.verify_email("delete_user@example.com", "123456") - -# # Delete user account -# await auth_service.delete_user(new_user.id, "password123") - -# # Try to get the deleted user's profile -# with pytest.raises(R2RException) as exc_info: -# result = auth_provider.db_provider.relational.get_user_by_email( -# "delete_user@example.com" -# ) -# assert exc_info.value.status_code == 404 - -# # Try to login with deleted account -# with pytest.raises(R2RException) as exc_info: -# await auth_service.login("delete_user@example.com", "password123") -# assert exc_info.value.status_code == 404 - - -# @pytest.mark.asyncio -# async def test_token_blacklist_cleanup(auth_service, auth_provider): -# # Register and verify a user -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# await auth_service.register( -# email="cleanup@example.com", password="password123" -# ) -# await auth_service.verify_email("cleanup@example.com", "123456") - -# # Login and logout to create a blacklisted token -# tokens = await auth_service.login("cleanup@example.com", "password123") -# access_token = tokens["access_token"].token -# await auth_service.logout(access_token) - -# # Manually insert an "old" blacklisted token -# old_token = "old_token" -# # with patch('datetime.datetime') as mock_datetime: -# # mock_datetime.utcnow.return_value = datetime.utcnow() - timedelta(hours=7*25) -# auth_provider.db_provider.relational.blacklist_token( -# old_token, datetime.now(timezone.utc) - timedelta(hours=7 * 25) -# ) - -# # Verify both tokens are in the blacklist before cleanup -# assert auth_provider.db_provider.relational.is_token_blacklisted(old_token) -# assert auth_provider.db_provider.relational.is_token_blacklisted( -# access_token -# ) - -# # Run cleanup (tokens older than 24 hours will be removed) -# await auth_service.clean_expired_blacklisted_tokens() - -# # Check that the old token was removed and the newer one remains -# assert not auth_provider.db_provider.relational.is_token_blacklisted( -# old_token -# ) -# assert auth_provider.db_provider.relational.is_token_blacklisted( -# access_token -# ) - - -# @pytest.mark.asyncio -# async def test_register_and_verify(auth_service, auth_provider): -# # new_user = await auth_service.register(user) -# # Mock verification code generation -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="newuser@example.com", password="password123" -# ) -# assert new_user.email == "newuser@example.com" -# assert not new_user.is_verified - -# await auth_service.verify_email("newuser@example.com", "123456") - -# new_user = auth_provider.db_provider.relational.get_user_by_email( -# "newuser@example.com" -# ) -# assert new_user.email == "newuser@example.com" -# assert new_user.is_verified - - -# @pytest.mark.asyncio -# async def test_login_logout(auth_service, auth_provider): -# # Mock reset token generation -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# await auth_service.register( -# email="loginuser@example.com", password="password123" -# ) -# await auth_service.verify_email("loginuser@example.com", "123456") - -# tokens = await auth_service.login("loginuser@example.com", "password123") -# assert "access_token" in tokens -# assert "refresh_token" in tokens - -# logout_result = await auth_service.logout(tokens["access_token"].token) -# assert logout_result["message"] == "Logged out successfully" - - -# @pytest.mark.asyncio -# async def test_refresh_token(auth_service, auth_provider): -# # Mock reset token generation -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# await auth_service.register( -# email="refreshuser@example.com", password="password123" -# ) -# await auth_service.verify_email("refreshuser@example.com", "123456") - -# tokens = await auth_service.login("refreshuser@example.com", "password123") -# new_tokens = await auth_service.refresh_access_token( -# tokens["refresh_token"].token -# ) -# assert new_tokens["access_token"].token != tokens["access_token"].token - - -# @pytest.mark.asyncio -# async def test_change_password(auth_service, auth_provider): -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# new_user = await auth_service.register( -# email="changepass@example.com", password="oldpassword" -# ) -# await auth_service.verify_email("changepass@example.com", "123456") - -# result = await auth_service.change_password( -# new_user, "oldpassword", "newpassword" -# ) -# assert result["message"] == "Password changed successfully" - -# with pytest.raises(R2RException): -# await auth_service.login("changepass@example.com", "oldpassword") - -# tokens = await auth_service.login("changepass@example.com", "newpassword") -# assert "access_token" in tokens - - -# @pytest.mark.asyncio -# async def test_request_reset_password(auth_service): -# await auth_service.register( -# email="resetpass@example.com", password="password123" -# ) - -# result = await auth_service.request_password_reset("resetpass@example.com") -# assert ( -# result["message"] == "If the email exists, a reset link has been sent" -# ) - - -# @pytest.mark.asyncio -# async def test_confirm_reset_password(auth_service, auth_provider): -# # Mock reset token generation -# with patch.object( -# auth_provider.crypto_provider, -# "generate_verification_code", -# return_value="123456", -# ): -# await auth_service.register( -# email="confirmreset@example.com", password="oldpassword" -# ) -# await auth_service.verify_email("confirmreset@example.com", "123456") -# await auth_service.request_password_reset("confirmreset@example.com") -# result = await auth_service.confirm_password_reset( -# "123456", "newpassword" -# ) -# assert result["message"] == "Password reset successfully" - -# tokens = await auth_service.login( -# "confirmreset@example.com", "newpassword" -# ) -# assert "access_token" in tokens - - -# @pytest.mark.asyncio -# async def test_get_user_profile(auth_service, auth_provider): -# await auth_service.register( -# email="profile@example.com", password="password123" -# ) - -# profile = auth_provider.db_provider.relational.get_user_by_email( -# "profile@example.com" -# ) -# assert profile.email == "profile@example.com" - - -# @pytest.mark.asyncio -# async def test_update_user_profile(auth_service): -# new_user = await auth_service.register( -# email="updateprofile@example.com", password="password123" -# ) - -# updated_user = await auth_service.update_user(new_user.id, name="John Doe") -# assert updated_user.name == "John Doe" - - -# @pytest.mark.asyncio -# async def test_delete_user_account_2(auth_service): -# new_user = await auth_service.register( -# email="deleteuser@example.com", password="password123" -# ) - -# result = await auth_service.delete_user(new_user.id, "password123") -# assert "deleted" in result["message"] - -# with pytest.raises(R2RException): -# await auth_service.login("deleteuser@example.com", "password123") diff --git a/py/tests/test_client.py b/py/tests/test_client.py deleted file mode 100644 index b7aa68fa0..000000000 --- a/py/tests/test_client.py +++ /dev/null @@ -1,365 +0,0 @@ -import uuid -from datetime import datetime -from unittest.mock import MagicMock - -import pytest -from fastapi import Body, Depends -from fastapi.security import OAuth2PasswordBearer -from fastapi.testclient import TestClient - -from core import ( - DocumentInfo, - HatchetOrchestrationProvider, - PostgresDBProvider, - R2RAuthProvider, - R2RBuilder, - R2RConfig, - Token, - UnstructuredParsingProvider, - UserResponse, - VectorSearchResult, -) - -# TODO: need to import this from the package, not from the local directory -from r2r import R2RClient - -oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") - - -def create_user(email: str, password: str): - return UserResponse( - id=uuid.UUID("12345678-1234-5678-1234-567812345678"), - email=email, - hashed_password="hashed_" + password, - is_active=True, - is_superuser=False, - is_verified=False, - name="Test User", - bio="Test Bio", - profile_picture="http://example.com/pic.jpg", - created_at=datetime.utcnow(), - updated_at=datetime.utcnow(), - ) - - -@pytest.fixture(scope="function") -def mock_auth_wrapper(): - def auth_wrapper(token: str = Depends(oauth2_scheme)): - return UserResponse( - id=uuid.UUID("12345678-1234-5678-1234-567812345678"), - email="test@example.com", - is_active=True, - hashed_password="xxx", - is_superuser=False, - ) - - return auth_wrapper - - -@pytest.fixture(scope="function") -def mock_auth_wrapper(): - def auth_wrapper(token: str = Depends(oauth2_scheme)): - return UserResponse( - id=uuid.UUID("12345678-1234-5678-1234-567812345678"), - email="test@example.com", - is_active=True, - hashed_password="xxx", - is_superuser=True, - ) - - return auth_wrapper - - -@pytest.fixture(scope="function") -def mock_super_auth_wrapper(): - def auth_wrapper(token: str = Depends(oauth2_scheme)): - return UserResponse( - id=uuid.UUID("12345678-1234-5678-1234-567812345678"), - email="test@example.com", - is_active=True, - hashed_password="xxx", - is_superuser=True, - ) - - return auth_wrapper - - -@pytest.fixture(scope="function") -def mock_db(): - db = MagicMock() - db.relational.get_user_by_email.return_value = ( - None # Simulate empty database - ) - - db.relational.create_user.side_effect = create_user - db.relational.get_user_by_id.return_value = create_user( - email="test@example.com", password="password" - ) - - def update_user(user): - updated_user = create_user(email=user.email, password="password") - updated_user.name = user.name - updated_user.bio = user.bio - updated_user.profile_picture = user.profile_picture - return updated_user - - db.relational.update_user.side_effect = update_user - db.relational.documents_in_group.return_value = [ - DocumentInfo( - user_id=uuid.uuid4(), - id=uuid.uuid4(), - title=f"Document {i}", - type="txt", - group_ids=[uuid.uuid4()], - created_at=datetime.utcnow(), - updated_at=datetime.utcnow(), - version="1", - metadata={}, - size_in_bytes=1000, - ) - for i in range(100) - ] - - return db - - -def mock_search(*args, **kwargs): - return [ - VectorSearchResult( - **{ - "fragment_id": "c68dc72e-fc23-5452-8f49-d7bd46088a96", - "extraction_id": "3f3d47f3-8baf-58eb-8bc2-0171fb1c6e09", - "document_id": "3e157b3a-8469-51db-90d9-52e7d896b49b", - "user_id": "2acb499e-8428-543b-bd85-0d9098718220", - "group_ids": [], - "score": 0.23943702876567796, - "text": "Alternate Base Rate means, for any day, a rate per annum equal to the greatest of (i) the Prime Rate in effect on such day, (ii) the Federal Funds Effective Rate in effect on such day \nplus \u00bd of 1% and (iii) the sum of (a) the Adjusted LIBO Rate that would be payable onsuch day for a Eurodollar Borrowing with a one-month interest period", - "metadata": { - "title": "uber_2021.pdf", - "associated_query": "What is the capital of France?", - }, - } - ), - ] - - -@pytest.fixture(scope="function") -def app_client(mock_db, mock_auth_wrapper): - config = R2RConfig.load(config_name="default") - providers = MagicMock() - providers.auth = MagicMock(spec=R2RAuthProvider) - providers.database = MagicMock(spec=PostgresDBProvider) - providers.database.vector = MagicMock() - providers.orchestration = MagicMock(spec=HatchetOrchestrationProvider) - providers.database.vx = MagicMock() - providers.parsing = MagicMock(spec=UnstructuredParsingProvider) - providers.auth.login.return_value = { - "access_token": Token(token="access_token", token_type="access"), - "refresh_token": Token(token="refresh_token", token_type="refresh"), - } - providers.auth.auth_wrapper = mock_auth_wrapper - providers.auth.register.side_effect = mock_db.relational.create_user - providers.auth.verify_email.return_value = { - "message": "Email verified successfully" - } - providers.auth.change_password.return_value = { - "message": "Password changed successfully" - } - providers.auth.request_password_reset.return_value = { - "message": "If the email exists, a reset link has been sent" - } - providers.auth.confirm_password_reset.return_value = { - "message": "Password reset successfully" - } - providers.auth.logout.return_value = {"message": "Logged out successfully"} - providers.database.vector.semantic_search = mock_search - providers.database.vector.hybrid_search = mock_search - - app = ( - R2RBuilder(config) - .with_provider("auth_provider_override", providers.auth) - .with_provider( - "orchestration_provider_override", providers.orchestration - ) - .with_provider("database_provider_override", providers.database) - .with_provider("parsing_provider_override", providers.parsing) - .build() - ) - - return TestClient(app.app) - - -@pytest.fixture(scope="function") -def r2r_client(app_client): - return R2RClient(base_url="http://testserver", custom_client=app_client) - - -def test_health_check(r2r_client): - response = r2r_client.health() - print("response = ", response) - assert response["results"] == {"response": "ok"} - - -def test_register_user(r2r_client, mock_db): - user_data = {"email": "test@example.com", "password": "testpassword"} - response = r2r_client.register(**user_data) - assert "results" in response - assert response["results"]["email"] == user_data["email"] - assert "id" in response["results"] - assert "hashed_password" in response["results"] - mock_db.relational.create_user.assert_called_once() - - -def test_login_user(r2r_client, mock_db): - user_data = {"email": "login_test@example.com", "password": "testpassword"} - mock_db.relational.get_user_by_email.return_value = None - response = r2r_client.register(**user_data) - - mock_db.relational.get_user_by_email.return_value = UserResponse( - id=uuid.UUID("12345678-1234-5678-1234-567812345678"), - email=user_data["email"], - hashed_password="hashed_" + user_data["password"], - is_active=True, - is_superuser=False, - verification_code_expiry=None, - ) - response = r2r_client.login(**user_data) - assert "results" in response - assert "access_token" in response["results"] - assert "refresh_token" in response["results"] - - -def test_authenticated_search(r2r_client, mock_db): - # Register and login - user_data = { - "email": "search_test@example.com", - "password": "testpassword", - } - r2r_client.register(**user_data) - login_response = r2r_client.login(**user_data) - - # Perform search - search_query = "test query" - search_response = r2r_client.search(search_query) - results = search_response["results"] - print("results = ", results) - assert "vector_search_results" in results - assert len(results["vector_search_results"]) > 0 - assert ( - results["vector_search_results"][0]["fragment_id"] - == "c68dc72e-fc23-5452-8f49-d7bd46088a96" - ) - assert ( - results["vector_search_results"][0]["text"] - == "Alternate Base Rate means, for any day, a rate per annum equal to the greatest of (i) the Prime Rate in effect on such day, (ii) the Federal Funds Effective Rate in effect on such day \nplus \u00bd of 1% and (iii) the sum of (a) the Adjusted LIBO Rate that would be payable onsuch day for a Eurodollar Borrowing with a one-month interest period" - ) - assert results["vector_search_results"][0]["score"] == 0.23943702876567796 - - -@pytest.mark.asyncio -async def test_change_password(r2r_client, mock_db): - # Register and login - user_data = { - "email": "change_pass@example.com", - "password": "old_password", - } - r2r_client.register(**user_data) - r2r_client.login(**user_data) - - # Change password - response = r2r_client.change_password("old_password", "new_password") - print("response = ", response) - assert response["results"]["message"] == "Password changed successfully" - - # Try logging in with new password - login_response = r2r_client.login( - email="change_pass@example.com", password="new_password" - ) - assert "access_token" in login_response["results"] - - -@pytest.mark.asyncio -async def test_password_reset_flow(r2r_client, mock_db): - # Register a user - user_data = {"email": "reset_pass@example.com", "password": "old_password"} - r2r_client.register(**user_data) - - # Request password reset - reset_response = r2r_client.request_password_reset( - "reset_pass@example.com" - ) - print("reset_response = ", reset_response) - assert "message" in reset_response["results"] - - # Confirm password reset (we'll need to mock the reset token) - mock_reset_token = "mock_reset_token" - confirm_response = r2r_client.confirm_password_reset( - mock_reset_token, "new_password" - ) - assert ( - confirm_response["results"]["message"] == "Password reset successfully" - ) - - # Try logging in with new password - login_response = r2r_client.login( - email="reset_pass@example.com", password="new_password" - ) - assert "access_token" in login_response["results"] - - -@pytest.mark.asyncio -async def test_logout(r2r_client, mock_db): - # Register and login - user_data = {"email": "logout@example.com", "password": "password123"} - r2r_client.register(**user_data) - r2r_client.login(**user_data) - - # Logout - logout_response = r2r_client.logout() - assert logout_response["results"]["message"] == "Logged out successfully" - - # Ensure client's tokens are cleared - assert r2r_client.access_token is None - assert r2r_client._refresh_token is None - - -# @pytest.mark.asyncio -# async def test_user_profile(r2r_client, mock_db): -# # Register and login -# user_data = {"email": "profile@example.com", "password": "password123"} -# r2r_client.register(**user_data) -# r2r_client.login(**user_data) - -# # Get user profile -# # mock_db.relational.get_user_by_id.return_value = create_user( -# # UserCreate(email="profile@example.com", password="password") -# # ) -# # profile = r2r_client.user() - -# # assert profile["results"]["email"] == "profile@example.com" - -# # Update user profile -# updated_profile = r2r_client.update_user(name="John Doe", bio="Test bio") -# assert updated_profile["results"]["name"] == "John Doe" -# assert updated_profile["results"]["bio"] == "Test bio" - - -# @pytest.mark.asyncio -# async def test_documents_in_group(r2r_client, mock_db): -# # Register and login as a superuser -# user_data = {"email": "superuser@example.com", "password": "password123"} -# r2r_client.register(**user_data) - -# # Set the mock user as a superuser -# # mock_db.relational.get_user_by_email.return_value.is_superuser = True - -# r2r_client.login(**user_data) - -# # Get documents in group -# group_id = uuid.uuid4() -# response = r2r_client.documents_in_group(group_id) - -# assert "results" in response -# assert len(response["results"]) == 100 # Default limit -# assert response["results"][0]["title"] == "Document 0" -# assert response["results"][0]["type"] == "txt" diff --git a/py/tests/test_config.py b/py/tests/test_config.py deleted file mode 100644 index 53b36e74e..000000000 --- a/py/tests/test_config.py +++ /dev/null @@ -1,199 +0,0 @@ -import asyncio -from unittest.mock import Mock, mock_open, patch - -import pytest -import toml - -from core import DocumentType, R2RConfig - - -@pytest.fixture(scope="session", autouse=True) -def event_loop_policy(): - asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -@pytest.fixture(scope="function", autouse=True) -async def cleanup_tasks(): - yield - tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - [task.cancel() for task in tasks] - await asyncio.gather(*tasks, return_exceptions=True) - - -@pytest.fixture(autouse=True) -async def manage_async_pipes(): - async_pipes = [] - yield async_pipes - for pipe in async_pipes: - await pipe.shutdown() - - -@pytest.fixture -def mock_bad_file(): - mock_data = toml.dumps({}) - with patch("builtins.open", mock_open(read_data=mock_data)) as m: - yield m - - -@pytest.fixture -def mock_file(): - mock_data = toml.dumps( - { - "auth": { - "provider": "r2r", - "access_token_lifetime_in_minutes": 60, - "refresh_token_lifetime_in_days": 7, - "require_authentication": False, - "require_email_verification": False, - "default_admin_email": "admin@example.com", - "default_admin_password": "change_me_immediately", - }, - "completion": { - "provider": "litellm", - "concurrent_request_limit": 16, - "generation_config": { - "model": "openai/gpt-4o", - "temperature": 0.1, - "top_p": 1, - "max_tokens_to_sample": 1024, - "stream": False, - "add_generation_kwargs": {}, - }, - }, - "crypto": {"provider": "bcrypt"}, - "database": {"provider": "postgres"}, - "parsing": {"provider": "r2r", "excluded_parsers": ["mp4"]}, - "chunking": {"provider": "r2r", "method": "recursive"}, - "embedding": { - "provider": "litellm", - "base_model": "text-embedding-3-small", - "base_dimension": 512, - "batch_size": 128, - "add_title_as_prefix": False, - "rerank_model": "None", - "concurrent_request_limit": 256, - }, - "ingestion": { - "excluded_parsers": ["mp4"], - "override_parsers": [ - {"document_type": "pdf", "parser": "PDFParser"} - ], - "text_splitter": { - "type": "recursive_character", - "chunk_size": 512, - "chunk_overlap": 20, - }, - }, - "kg": {"provider": "None"}, - "logging": { - "provider": "local", - "log_table": "logs", - "log_info_table": "log_info", - }, - "agent": { - "system_instruction_name": "rag_agent", - "tool_names": ["search"], - }, - "prompt": {"provider": "r2r"}, - "file": {"provider": "postgres"}, - } - ) - with patch("builtins.open", mock_open(read_data=mock_data)) as m: - yield m - - -@pytest.mark.asyncio -async def test_r2r_config_loading_required_keys(mock_bad_file): - with pytest.raises(KeyError): - R2RConfig.from_toml("r2r.toml") - - -@pytest.mark.asyncio -async def test_r2r_config_loading(mock_file): - try: - config = R2RConfig.from_toml("r2r.toml") - assert ( - config.embedding.provider == "litellm" - ), "Provider should match the mock data" - except Exception as e: - pytest.fail(f"Test failed with exception: {e}") - - -@pytest.fixture -def mock_redis_client(): - return Mock() - - -def test_r2r_config_serialization(mock_file, mock_redis_client): - config = R2RConfig.from_toml("r2r.toml") - config.save_to_redis(mock_redis_client, "test_key") - mock_redis_client.set.assert_called_once() - saved_data = toml.loads(mock_redis_client.set.call_args[0][1]) - assert saved_data["embedding"]["provider"] == "litellm" - - -def test_r2r_config_deserialization(mock_file, mock_redis_client): - config_data = { - "embedding": { - "provider": "litellm", - "base_model": "text-embedding-3-small", - "base_dimension": 512, - "batch_size": 128, - "add_title_as_prefix": False, - "rerank_model": "None", - "concurrent_request_limit": 256, - }, - "kg": {"provider": "None"}, - "parsing": {"provider": "r2r"}, - "chunking": {"provider": "r2r"}, - "completion": {"provider": "litellm"}, - "logging": { - "provider": "local", - "log_table": "logs", - "log_info_table": "log_info", - }, - "prompt": {"provider": "r2r"}, - "database": {"provider": "postgres"}, - } - mock_redis_client.get.return_value = toml.dumps(config_data) - config = R2RConfig.load_from_redis(mock_redis_client, "test_key") - assert config.chunking.provider == "r2r" - - -def test_r2r_config_missing_section(): - invalid_data = { - "embedding": { - "provider": "litellm", - "base_model": "text-embedding-3-small", - "base_dimension": 512, - "batch_size": 128, - "add_title_as_prefix": False, - } - } - with patch("builtins.open", mock_open(read_data=toml.dumps(invalid_data))): - with pytest.raises(KeyError): - R2RConfig.from_toml("r2r.toml") - - -def test_r2r_config_missing_required_key(): - invalid_data = { - "auth": {"access_token_lifetime_in_minutes": 60}, - "embedding": { - "base_model": "text-embedding-3-small", - "base_dimension": 512, - "batch_size": 128, - "add_title_as_prefix": False, - }, - "kg": {"provider": "None"}, - "completion": {"provider": "litellm"}, - "logging": { - "provider": "local", - "log_table": "logs", - "log_info_table": "log_info", - }, - "prompt": {"provider": "r2r"}, - "database": {"provider": "postgres"}, - } - with patch("builtins.open", mock_open(read_data=toml.dumps(invalid_data))): - with pytest.raises(KeyError): - R2RConfig.from_toml("r2r.toml") diff --git a/py/tests/test_database.py b/py/tests/test_database.py deleted file mode 100644 index 3f27836a5..000000000 --- a/py/tests/test_database.py +++ /dev/null @@ -1,175 +0,0 @@ -import random -import uuid - -import pytest -from dotenv import load_dotenv - -from core import ( - DatabaseConfig, - DatabaseProvider, - Vector, - VectorEntry, - VectorSearchSettings, - generate_id_from_label, -) -from core.providers import PostgresDBProvider - -load_dotenv() - - -# Sample vector entries -def generate_random_vector_entry(id: str, dimension: int) -> VectorEntry: - vector_data = [random.random() for _ in range(dimension)] - metadata = {"key": f"value_{id}"} - return VectorEntry( - fragment_id=generate_id_from_label(id), - extraction_id=uuid.uuid4(), - document_id=uuid.uuid4(), - user_id=uuid.uuid4(), - group_ids=[uuid.uuid4()], - vector=Vector(data=vector_data), - text=f"Sample text for {id}", - metadata=metadata, - ) - - -dimension = 3 -num_entries = 100 -sample_entries = [ - generate_random_vector_entry(f"id_{i}", dimension) - for i in range(num_entries) -] - - -# Fixture for PostgresDBProvider -@pytest.fixture -def pg_vector_db(): - random_collection_name = ( - f"test_collection_{random.randint(0, 1_000_000_000)}" - ) - config = DatabaseConfig.create( - provider="postgres", vecs_collection=random_collection_name - ) - db = PostgresDBProvider(config, dimension=3) - yield db - # Teardown - db.vx.delete_collection(db.config.vecs_collection) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_db_initialization(request, db_fixture): - db = request.getfixturevalue(db_fixture) - assert isinstance(db, DatabaseProvider) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_db_copy_and_search(request, db_fixture): - db = request.getfixturevalue(db_fixture) - db.vector.upsert(sample_entries[0]) - results = db.vector.semantic_search( - query_vector=sample_entries[0].vector.data, - search_settings=VectorSearchSettings(), - ) - assert len(results) == 1 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].score == pytest.approx(1.0, rel=1e-3) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_db_upsert_and_search(request, db_fixture): - db = request.getfixturevalue(db_fixture) - db.vector.upsert(sample_entries[0]) - results = db.vector.semantic_search( - query_vector=sample_entries[0].vector.data, - search_settings=VectorSearchSettings(), - ) - assert len(results) == 1 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].score == pytest.approx(1.0, rel=1e-3) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_imperfect_match(request, db_fixture): - db = request.getfixturevalue(db_fixture) - db.vector.upsert(sample_entries[0]) - query_vector = [val + 0.1 for val in sample_entries[0].vector.data] - results = db.vector.semantic_search( - query_vector=query_vector, search_settings=VectorSearchSettings() - ) - assert len(results) == 1 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].score < 1.0 - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_bulk_insert_and_search(request, db_fixture): - db = request.getfixturevalue(db_fixture) - for entry in sample_entries: - db.vector.upsert(entry) - - query_vector = sample_entries[0].vector.data - results = db.vector.semantic_search( - query_vector=query_vector, - search_settings=VectorSearchSettings(search_limit=5), - ) - assert len(results) == 5 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].score == pytest.approx(1.0, rel=1e-3) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_search_with_filters(request, db_fixture): - db = request.getfixturevalue(db_fixture) - for entry in sample_entries: - db.vector.upsert(entry) - - filtered_id = sample_entries[0].metadata["key"] - query_vector = sample_entries[0].vector.data - results = db.vector.semantic_search( - query_vector=query_vector, - search_settings=VectorSearchSettings(filters={"key": filtered_id}), - ) - assert len(results) == 1 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].metadata["key"] == filtered_id - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_delete(request, db_fixture): - db = request.getfixturevalue(db_fixture) - for entry in sample_entries: - db.vector.upsert(entry) - - key_to_delete = sample_entries[0].metadata["key"] - db.vector.delete(filters={"key": {"$eq": key_to_delete}}) - - results = db.vector.semantic_search( - query_vector=sample_entries[0].vector.data, - search_settings=VectorSearchSettings(), - ) - assert all(result.metadata["key"] != key_to_delete for result in results) - - -@pytest.mark.parametrize("db_fixture", ["pg_vector_db"]) -def test_upsert(request, db_fixture): - db = request.getfixturevalue(db_fixture) - db.vector.upsert(sample_entries[0]) - modified_entry = VectorEntry( - fragment_id=sample_entries[0].fragment_id, - extraction_id=sample_entries[0].extraction_id, - document_id=sample_entries[0].document_id, - user_id=sample_entries[0].user_id, - group_ids=sample_entries[0].group_ids, - vector=Vector(data=[0.5, 0.5, 0.5]), - text="Modified text", - metadata={"key": "new_value"}, - ) - db.vector.upsert(modified_entry) - - results = db.vector.semantic_search( - query_vector=[0.5, 0.5, 0.5], search_settings=VectorSearchSettings() - ) - assert len(results) == 1 - assert results[0].fragment_id == sample_entries[0].fragment_id - assert results[0].metadata["key"] == "new_value" - assert results[0].text == "Modified text" diff --git a/py/tests/test_embedding.py b/py/tests/test_embedding.py deleted file mode 100644 index d9ffe67c3..000000000 --- a/py/tests/test_embedding.py +++ /dev/null @@ -1,338 +0,0 @@ -import asyncio -import contextlib - -import pytest - -from core import EmbeddingConfig - - -@pytest.fixture(scope="session", autouse=True) -def event_loop_policy(): - asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -@pytest.fixture(scope="function", autouse=True) -async def cleanup_tasks(): - yield - tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - [task.cancel() for task in tasks] - await asyncio.gather(*tasks, return_exceptions=True) - - -from core.providers import LiteLLMEmbeddingProvider - - -@pytest.fixture -def litellm_provider(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - return LiteLLMEmbeddingProvider(config) - - -def test_litellm_initialization(litellm_provider): - assert isinstance(litellm_provider, LiteLLMEmbeddingProvider) - assert litellm_provider.base_model == "text-embedding-3-small" - assert litellm_provider.base_dimension == 1536 - - -def test_litellm_invalid_provider_initialization(): - config = EmbeddingConfig(provider="invalid_provider") - with pytest.raises(ValueError): - LiteLLMEmbeddingProvider(config) - - -def test_litellm_get_embedding(litellm_provider): - embedding = litellm_provider.get_embedding("test text") - assert len(embedding) == 1536 - assert isinstance(embedding, list) - - -@pytest.mark.asyncio -async def test_litellm_async_get_embedding(litellm_provider): - with contextlib.suppress(asyncio.CancelledError): - embedding = await litellm_provider.async_get_embedding("test text") - assert len(embedding) == 1536 - assert isinstance(embedding, list) - - -def test_litellm_get_embeddings(litellm_provider): - embeddings = litellm_provider.get_embeddings(["text1", "text2"]) - assert len(embeddings) == 2 - assert all(len(emb) == 1536 for emb in embeddings) - - -@pytest.mark.asyncio -async def test_litellm_async_get_embeddings(litellm_provider): - with contextlib.suppress(asyncio.CancelledError): - embeddings = await litellm_provider.async_get_embeddings( - ["text1", "text2"] - ) - assert len(embeddings) == 2 - assert all(len(emb) == 1536 for emb in embeddings) - - -def test_litellm_missing_provider(): - config = EmbeddingConfig() - with pytest.raises(ValueError, match="Must set provider"): - LiteLLMEmbeddingProvider(config) - - -def test_litellm_incorrect_provider(): - config = EmbeddingConfig(provider="not_litellm") - with pytest.raises( - ValueError, match="Provider 'not_litellm' is not supported" - ): - LiteLLMEmbeddingProvider(config) - - -def test_litellm_rerank_model_not_supported(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - rerank_model="some-model", - ) - with pytest.raises( - ValueError, match="does not support separate reranking" - ): - LiteLLMEmbeddingProvider(config) - - -def test_litellm_unsupported_stage(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - with pytest.raises( - ValueError, match="LiteLLMEmbeddingProvider only supports search stage" - ): - provider.get_embedding( - "test", stage=LiteLLMEmbeddingProvider.PipeStage.RERANK - ) - - -@pytest.mark.asyncio -async def test_litellm_async_unsupported_stage(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - with pytest.raises( - ValueError, match="LiteLLMEmbeddingProvider only supports search stage" - ): - await provider.async_get_embedding( - "test", stage=LiteLLMEmbeddingProvider.PipeStage.RERANK - ) - - -def test_litellm_tokenize_string_not_implemented(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - with pytest.raises( - NotImplementedError, - match="Tokenization is not supported by LiteLLMEmbeddingProvider", - ): - provider.tokenize_string( - "test", - "text-embedding-3-small", - LiteLLMEmbeddingProvider.PipeStage.BASE, - ) - - -@pytest.mark.asyncio -async def test_litellm_async_get_embeddings_unsupported_stage(): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - with pytest.raises( - ValueError, match="LiteLLMEmbeddingProvider only supports search stage" - ): - await provider.async_get_embeddings( - ["test1", "test2"], stage=LiteLLMEmbeddingProvider.PipeStage.RERANK - ) - - -# You might also want to test error handling in get_embedding and get_embeddings -def test_litellm_get_embedding_error_handling(mocker): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - mocker.patch.object( - provider, "litellm_embedding", side_effect=Exception("Test error") - ) - with pytest.raises(Exception, match="Test error"): - provider.get_embedding("test") - - -@pytest.mark.asyncio -async def test_litellm_async_get_embedding_error_handling(mocker): - config = EmbeddingConfig( - provider="litellm", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - provider = LiteLLMEmbeddingProvider(config) - mocker.patch.object( - provider, "litellm_aembedding", side_effect=Exception("Test error") - ) - with pytest.raises(Exception, match="Test error"): - await provider.async_get_embedding("test") - - -from core.providers import OpenAIEmbeddingProvider - - -@pytest.fixture -def openai_provider(): - config = EmbeddingConfig( - provider="openai", - base_model="text-embedding-3-small", - base_dimension=1536, - ) - return OpenAIEmbeddingProvider(config) - - -def test_openai_initialization(openai_provider): - assert isinstance(openai_provider, OpenAIEmbeddingProvider) - assert openai_provider.base_model == "text-embedding-3-small" - assert openai_provider.base_dimension == 1536 - - -def test_openai_invalid_provider_initialization(): - config = EmbeddingConfig(provider="invalid_provider") - with pytest.raises(ValueError): - OpenAIEmbeddingProvider(config) - - -def test_openai_get_embedding(openai_provider): - embedding = openai_provider.get_embedding("test text") - assert len(embedding) == 1536 - assert isinstance(embedding, list) - - -@pytest.mark.asyncio -async def test_openai_async_get_embedding(openai_provider): - with contextlib.suppress(asyncio.CancelledError): - embedding = await openai_provider.async_get_embedding("test text") - assert len(embedding) == 1536 - assert isinstance(embedding, list) - - -def test_openai_get_embeddings(openai_provider): - embeddings = openai_provider.get_embeddings(["text1", "text2"]) - assert len(embeddings) == 2 - assert all(len(emb) == 1536 for emb in embeddings) - - -@pytest.mark.asyncio -async def test_openai_async_get_embeddings(openai_provider): - with contextlib.suppress(asyncio.CancelledError): - embeddings = await openai_provider.async_get_embeddings( - ["text1", "text2"] - ) - assert len(embeddings) == 2 - assert all(len(emb) == 1536 for emb in embeddings) - - -def test_openai_tokenize_string(openai_provider): - tokens = openai_provider.tokenize_string( - "test text", "text-embedding-3-small" - ) - assert isinstance(tokens, list) - assert all(isinstance(token, int) for token in tokens) - - -def test_openai_missing_provider(): - config = EmbeddingConfig() - with pytest.raises(ValueError, match="Must set provider"): - OpenAIEmbeddingProvider(config) - - -def test_openai_incorrect_provider(): - config = EmbeddingConfig(provider="not_openai") - with pytest.raises( - ValueError, match="Provider 'not_openai' is not supported" - ): - OpenAIEmbeddingProvider(config) - - -def test_openai_unsupported_model(): - config = EmbeddingConfig( - provider="openai", base_model="unsupported-model", base_dimension=1536 - ) - with pytest.raises(ValueError, match="embedding model .* not supported"): - OpenAIEmbeddingProvider(config) - - -def test_openai_wrong_dimension(): - config = EmbeddingConfig( - provider="openai", - base_model="text-embedding-3-small", - base_dimension=2048, - ) - with pytest.raises(ValueError, match="Dimensions .* are not supported"): - OpenAIEmbeddingProvider(config) - - -def test_openai_missing_model_or_dimension(): - config = EmbeddingConfig(provider="openai") - with pytest.raises( - ValueError, - match="Must set base_model in order to initialize OpenAIEmbeddingProvider.", - ): - OpenAIEmbeddingProvider(config) - - -def test_openai_rerank_model_not_supported(): - config = EmbeddingConfig( - provider="openai", - base_model="text-embedding-3-small", - base_dimension=1536, - rerank_model="some-model", - ) - with pytest.raises( - ValueError, match="does not support separate reranking" - ): - OpenAIEmbeddingProvider(config) - - -from core.providers import OllamaEmbeddingProvider - - -@pytest.fixture -def ollama_provider(): - config = EmbeddingConfig( - provider="ollama", - base_model="mxbai-embed-large", - base_dimension=1024, - ) - return OllamaEmbeddingProvider(config) - - -def test_ollama_initialization(ollama_provider): - assert isinstance(ollama_provider, OllamaEmbeddingProvider) - assert ollama_provider.base_model == "mxbai-embed-large" - assert ollama_provider.base_dimension == 1024 - - -def test_ollama_invalid_provider_initialization(): - config = EmbeddingConfig(provider="invalid_provider") - with pytest.raises(ValueError): - OllamaEmbeddingProvider(config) diff --git a/py/tests/test_end_to_end.py b/py/tests/test_end_to_end.py deleted file mode 100644 index 4d5e1890d..000000000 --- a/py/tests/test_end_to_end.py +++ /dev/null @@ -1,388 +0,0 @@ -# import asyncio -# import os -# import uuid - -# import pytest -# from fastapi.datastructures import UploadFile - -# from core import ( -# Document, -# DocumentInfo, -# DocumentType, -# GenerationConfig, -# R2RConfig, -# R2REngine, -# R2RPipeFactory, -# R2RPipelineFactory, -# R2RProviderFactory, -# RunLoggingSingleton, -# UserResponse, -# VectorSearchSettings, -# generate_id_from_label, -# ) - - -# @pytest.fixture(scope="session", autouse=True) -# def event_loop_policy(): -# asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -# @pytest.fixture(scope="function", autouse=True) -# async def cleanup_tasks(): -# yield -# tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] -# [task.cancel() for task in tasks] -# await asyncio.gather(*tasks, return_exceptions=True) - - -# @pytest.fixture(scope="function") -# def app(request): -# config = R2RConfig.from_toml() -# config.logging.provider = "local" -# config.logging.logging_path = uuid.uuid4().hex - -# config.database.provider = "postgres" -# config.database.vecs_collection = config.logging.logging_path -# try: -# providers = R2RProviderFactory(config).create_providers() -# pipes = R2RPipeFactory(config, providers).create_pipes() -# pipelines = R2RPipelineFactory(config, pipes).create_pipelines() - -# r2r = R2REngine( -# config=config, -# providers=providers, -# pipelines=pipelines, -# agents={}, -# ) - -# try: -# if os.path.exists(config.logging.logging_path): -# os.remove(config.logging.logging_path) - -# RunLoggingSingleton.configure(config.logging) -# except: -# RunLoggingSingleton._config.logging_path = ( -# config.logging.logging_path -# ) - -# yield r2r -# finally: -# if os.path.exists(config.logging.logging_path): -# os.remove(config.logging.logging_path) - - -# @pytest.fixture -# def logging_connection(): -# return RunLoggingSingleton() - - -# @pytest.fixture -# def user(): -# return UserResponse( -# id=generate_id_from_label("user"), -# email="test@test.com", -# hashed_password="test", -# ) - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_ingest_txt_document(app, logging_connection): -# user_id = uuid.uuid4() -# group_id = uuid.uuid4() -# doc_id = uuid.uuid4() -# await app.aingest_documents( -# [ -# Document( -# id=doc_id, -# group_ids=[group_id], -# user_id=user_id, -# data="The quick brown fox jumps over the lazy dog.", -# type=DocumentType.TXT, -# metadata={"author": "John Doe"}, -# ), -# ] -# ) - -# # Verify the document was ingested correctly -# docs_overview = await app.adocuments_overview(document_ids=[doc_id]) -# assert len(docs_overview) == 1 -# assert docs_overview[0].id == doc_id -# assert docs_overview[0].group_ids == [group_id] -# assert docs_overview[0].user_id == user_id -# assert docs_overview[0].type == DocumentType.TXT -# assert docs_overview[0].metadata["author"] == "John Doe" -# assert docs_overview[0].ingestion_status == DocumentStatus.SUCCESS - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_ingest_txt_file(app, user): -# # Prepare the test data -# metadata = {"author": "John Doe"} -# files = [ -# UploadFile( -# filename="test.txt", -# file=open( -# os.path.join( -# os.path.dirname(__file__), -# "..", -# "core", -# "examples", -# "data", -# "test.txt", -# ), -# "rb", -# ), -# ) -# ] -# # Set file size manually -# for file in files: -# file.file.seek(0, 2) # Move to the end of the file -# file.size = file.file.tell() # Get the file size -# file.file.seek(0) # Move back to the start of the file -# await app.ingest_files(metadatas=[metadata], files=files, user=user) - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_ingest_search_txt_file(app, user, logging_connection): - -# # Convert metadata to JSON string -# run_info = await logging_connection.get_info_logs(run_type_filter="search") - -# # Prepare the test data -# metadata = {} -# files = [ -# UploadFile( -# filename="aristotle.txt", -# file=open( -# os.path.join( -# os.path.dirname(__file__), -# "..", -# "core", -# "examples", -# "data", -# "aristotle.txt", -# ), -# "rb", -# ), -# ), -# ] - -# # Set file size manually -# for file in files: -# file.file.seek(0, 2) # Move to the end of the file -# file.size = file.file.tell() # Get the file size -# file.file.seek(0) # Move back to the start of the file - -# # Convert metadata to JSON string -# run_info = await logging_connection.get_info_logs(run_type_filter="search") - -# ingestion_result = await app.ingest_files( -# files=files, user=user, metadatas=[metadata] -# ) - -# run_info = await logging_connection.get_info_logs(run_type_filter="search") - -# search_results = await app.asearch("who was aristotle?") -# print("search results = ", search_results["vector_search_results"][0]) -# assert len(search_results["vector_search_results"]) == 10 -# assert ( -# "was an Ancient Greek philosopher and polymath" -# in search_results["vector_search_results"][0]["text"] -# ) - -# search_results = await app.asearch( -# "who was aristotle?", -# vector_search_settings=VectorSearchSettings(search_limit=20), -# ) -# assert len(search_results["vector_search_results"]) == 20 -# assert ( -# "was an Ancient Greek philosopher and polymath" -# in search_results["vector_search_results"][0]["text"] -# ) -# ## test stream -# response = await app.rag( -# query="Who was aristotle?", -# rag_generation_config=GenerationConfig( -# **{"model": "gpt-4o-mini", "stream": True} -# ), -# ) -# collector = "" -# async for chunk in response: -# collector += chunk -# assert "Aristotle" in collector -# assert "Greek" in collector -# assert "philosopher" in collector -# assert "polymath" in collector -# assert "Ancient" in collector - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_double_ingest(app, logging_connection): -# await app.aingest_documents( -# [ -# Document( -# id=generate_id_from_label("doc_1"), -# group_ids=[generate_id_from_label("group_1")], -# user_id=generate_id_from_label("user_id"), -# data="The quick brown fox jumps over the lazy dog.", -# type=DocumentType.TXT, -# metadata={"author": "John Doe"}, -# ), -# ] -# ) -# search_results = await app.asearch("who was aristotle?") - -# assert len(search_results["vector_search_results"]) == 1 -# with pytest.raises(Exception): -# try: -# await app.aingest_documents( -# [ -# Document( -# id=generate_id_from_label("doc_1"), -# group_ids=[generate_id_from_label("group_1")], -# user_id=generate_id_from_label("user_id"), -# data="The quick brown fox jumps over the lazy dog.", -# type=DocumentType.TXT, -# metadata={"author": "John Doe"}, -# ), -# ] -# ) -# except asyncio.CancelledError: -# pass - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_ingest_search_then_delete(app, logging_connection): -# # Ingest a document -# await app.aingest_documents( -# [ -# Document( -# id=generate_id_from_label("doc_1"), -# group_ids=[generate_id_from_label("group_1")], -# user_id=generate_id_from_label("user_1"), -# data="The quick brown fox jumps over the lazy dog.", -# type="txt", -# metadata={"author": "John Doe"}, -# ), -# ] -# ) - -# # Search for the document -# search_results = await app.asearch("who was aristotle?") - -# # Verify that the search results are not empty -# assert ( -# len(search_results["vector_search_results"]) > 0 -# ), "Expected search results, but got none" -# assert ( -# search_results["vector_search_results"][0]["text"] -# == "The quick brown fox jumps over the lazy dog." -# ) - -# # Delete the document -# delete_result = await app.adelete(filters={"author": {"$eq": "John Doe"}}) - -# # Verify the deletion was successful -# assert delete_result is None -# # Search for the document again -# search_results_2 = await app.asearch("who was aristotle?") - -# # Verify that the search results are empty -# assert ( -# len(search_results_2["vector_search_results"]) == 0 -# ), f"Expected no search results, but got {search_results_2['vector_search_results']}" - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_ingest_user_documents(app, logging_connection): - -# # user_id_0 = generate_id_from_label("user_0") -# # user_id_1 = generate_id_from_label("user_1") -# user_0 = app.register("user_0@test.com", "password") -# user_id_0 = user_0.id -# user_1 = app.register("user_1@test.com", "password") -# user_id_1 = user_1.id -# doc_id_0 = generate_id_from_label("doc_01") -# doc_id_1 = generate_id_from_label("doc_11") - -# await app.aingest_documents( -# [ -# Document( -# id=doc_id_0, -# group_ids=[generate_id_from_label("group_0")], -# user_id=user_id_0, -# data="The quick brown fox jumps over the lazy dog.", -# type="txt", -# metadata={"author": "John Doe"}, -# ), -# Document( -# id=doc_id_1, -# group_ids=[generate_id_from_label("group_1")], -# user_id=user_id_1, -# data="The lazy dog jumps over the quick brown fox.", -# type="txt", -# metadata={"author": "John Doe"}, -# ), -# ] -# ) -# user_stats_results = await app.ausers_overview([user_id_0, user_id_1]) -# print("user_stats_results = ", user_stats_results) -# user_id_results = [stats.user_id for stats in user_stats_results] -# print("user_id_results = ", user_stats_results) -# assert set([user_id_0, user_id_1]) == set( -# user_id_results -# ), f"Expected user ids {user_id_0} and {user_id_1}, but got {user_id_results}" - -# user_0_docs = await app.adocuments_overview(user_ids=[user_id_0]) -# user_1_docs = await app.adocuments_overview(user_ids=[user_id_1]) - -# assert ( -# len(user_0_docs) == 1 -# ), f"Expected 1 document for user {user_id_0}, but got {len(user_0_docs)}" -# assert ( -# len(user_1_docs) == 1 -# ), f"Expected 1 document for user {user_id_1}, but got {len(user_1_docs)}" -# assert ( -# user_0_docs[0].id == doc_id_0 -# ), f"Expected document id {doc_id_0} for user {user_id_0}, but got {user_0_docs[0].id}" -# assert ( -# user_1_docs[0].id == doc_id_1 -# ), f"Expected document id {doc_id_1} for user {user_id_1}, but got {user_1_docs[0].id}" - -# # Clean up -# delete_result = await app.adelete( -# filters={"document_id": {"$in": [doc_id_0, doc_id_1]}} -# ) - -# assert delete_result is None - - -# @pytest.mark.parametrize("app", ["postgres"], indirect=True) -# @pytest.mark.asyncio -# async def test_delete_by_id(app, logging_connection): -# doc_id = generate_id_from_label("doc_0") -# await app.aingest_documents( -# [ -# Document( -# id=doc_id, -# group_ids=[], -# user_id=generate_id_from_label("user_0"), -# data="The quick brown fox jumps over the lazy dog.", -# type="txt", -# metadata={"author": "John Doe"}, -# ), -# ] -# ) -# search_results = await app.asearch("who was aristotle?") - -# assert len(search_results["vector_search_results"]) > 0 -# delete_result = await app.adelete(filters={"document_id": {"$eq": doc_id}}) -# assert delete_result is None -# search_results = await app.asearch("who was aristotle?") -# assert len(search_results["vector_search_results"]) == 0 diff --git a/py/tests/test_groups.py b/py/tests/test_groups.py deleted file mode 100644 index 9393ba272..000000000 --- a/py/tests/test_groups.py +++ /dev/null @@ -1,406 +0,0 @@ -import random -from datetime import datetime -from uuid import UUID - -import pytest - -from core import DatabaseConfig, R2RException -from core.base.abstractions import DocumentInfo, DocumentType, IngestionStatus -from core.providers import BCryptConfig, BCryptProvider, PostgresDBProvider - - -# Add this fixture to create test documents -@pytest.fixture -def test_documents(pg_db, test_group): - documents = [] - for i in range(5): - doc = DocumentInfo( - id=UUID(f"00000000-0000-0000-0000-{i:012d}"), - group_ids=[test_group.group_id], - user_id=UUID("00000000-0000-0000-0000-000000000000"), - type=DocumentType.PDF, - metadata={}, - title=f"Test Document {i}", - version="1.0", - size_in_bytes=1000, - ingestion_status=IngestionStatus.PARSING, - ) - pg_db.relational.upsert_documents_overview([doc]) - documents.append(doc) - yield documents - # Clean up documents after the test - for doc in documents: - pg_db.relational.delete_from_documents_overview(doc.id) - - -# Fixture for PostgresDBProvider -@pytest.fixture(scope="function") -def pg_db(): - config = DatabaseConfig.create( - provider="postgres", - vecs_collection=f"test_collection_{random.randint(1, 1_000_000_000_000_000_000)}", - ) - crypto_provider = BCryptProvider(BCryptConfig()) - db = PostgresDBProvider( - config, dimension=3, crypto_provider=crypto_provider - ) - yield db - # Teardown - db.vx.delete_collection(db.collection_name) - - -# Improvement: Use a fixture for creating a test group -@pytest.fixture -def test_group(pg_db): - group = pg_db.relational.create_group( - "Test Group", "This is a test group." - ) - yield group - pg_db.relational.delete_group(group.group_id) - - -# Improvement: Use a fixture for creating a test user -@pytest.fixture -def test_user(pg_db): - - created_user = pg_db.relational.create_user( - email=f"test_{datetime.now().timestamp()}@example.com", - password="password", - ) - yield created_user - pg_db.relational.delete_user(created_user.id) - - -def test_create_group(pg_db): - group_name = "Test Group" - group_description = "This is a test group." - group = pg_db.relational.create_group( - name=group_name, description=group_description - ) - assert isinstance(group.group_id, UUID) - assert group.name == group_name - assert group.description == group_description - # Improvement: Check for created_at and updated_at fields - assert isinstance(group.created_at, datetime) - assert isinstance(group.updated_at, datetime) - - -def test_get_group(pg_db, test_group): - fetched_group = pg_db.relational.get_group(test_group.group_id) - assert fetched_group == test_group - - -def test_update_group(pg_db, test_group): - new_name = "Updated Group" - new_description = "This is an updated test group." - updated = pg_db.relational.update_group( - test_group.group_id, name=new_name, description=new_description - ) - assert updated - fetched_group = pg_db.relational.get_group(test_group.group_id) - assert fetched_group.name == new_name - assert fetched_group.description == new_description - # Improvement: Check that updated_at has changed - assert fetched_group.updated_at > test_group.updated_at - - -def test_delete_group(pg_db): - group = pg_db.relational.create_group( - "Temporary Group", "This group will be deleted" - ) - pg_db.relational.delete_group(group.group_id) - with pytest.raises(R2RException): - fetched_group = pg_db.relational.get_group(group.group_id) - - -def test_list_groups(pg_db, test_group): - # First, ensure we have at least two groups - second_group = pg_db.relational.create_group( - "Second Test Group", "This is another test group." - ) - - # Now test listing groups - groups = pg_db.relational.list_groups() - assert len(groups) >= 2 - assert any(group.group_id == test_group.group_id for group in groups) - assert any(group.group_id == second_group.group_id for group in groups) - - # Test pagination - first_page = pg_db.relational.list_groups(limit=1) - assert len(first_page) == 1 - second_page = pg_db.relational.list_groups(offset=1, limit=1) - assert len(second_page) == 1 - - # Ensure first and second pages are different - assert first_page[0].group_id != second_page[0].group_id - - # Test requesting more groups than exist - all_groups = pg_db.relational.list_groups(limit=1000) - assert len(all_groups) >= 2 - - # Clean up the second group - pg_db.relational.delete_group(second_group.group_id) - - -def test_add_user_to_group(pg_db, test_group, test_user): - added = pg_db.relational.add_user_to_group( - test_user.id, test_group.group_id - ) - user_groups = pg_db.relational.get_groups_for_user(test_user.id) - assert any(g.group_id == test_group.group_id for g in user_groups) - - test_group = pg_db.relational.create_group( - "Another Group", "Another test group" - ) - # # Improvement: Test adding the same user twice - pg_db.relational.add_user_to_group(test_user.id, test_group.group_id) - - -def test_remove_user_from_group(pg_db, test_group, test_user): - # TODO - modify this test to use a fixture for creating a test group - test_group_ = pg_db.relational.create_group( - "Another Group", "Another test group" - ) - - pg_db.relational.add_user_to_group(test_user.id, test_group_.group_id) - removed = pg_db.relational.remove_user_from_group( - test_user.id, test_group_.group_id - ) - user_groups = pg_db.relational.get_groups_for_user(test_user.id) - assert all(g.group_id != test_group_.group_id for g in user_groups) - # Improvement: Test removing a user that's not in the group - with pytest.raises(R2RException): - pg_db.relational.remove_user_from_group( - test_user.id, test_group_.group_id - ) - - -def test_get_users_in_group(pg_db, test_group, test_user): - pg_db.relational.add_user_to_group(test_user.id, test_group.group_id) - users_in_group = pg_db.relational.get_users_in_group(test_group.group_id) - assert any(u.id == test_user.id for u in users_in_group) - # Improvement: Test pagination - first_page = pg_db.relational.get_users_in_group( - test_group.group_id, limit=1 - ) - assert len(first_page) == 1 - - -def test_get_all_groups(pg_db, test_group): - # Create multiple groups - group1 = pg_db.relational.create_group("Group 1", "Description 1") - group2 = pg_db.relational.create_group("Group 2", "Description 2") - - all_groups = pg_db.relational.list_groups() - - assert len(all_groups) >= 3 # Including test_group - assert any(g.group_id == test_group.group_id for g in all_groups) - assert any(g.group_id == group1.group_id for g in all_groups) - assert any(g.group_id == group2.group_id for g in all_groups) - - -def test_get_groups_by_ids(pg_db): - group1 = pg_db.relational.create_group("Group 1", "Description 1") - group2 = pg_db.relational.create_group("Group 2", "Description 2") - - groups = pg_db.relational.get_groups_by_ids( - [group1.group_id, group2.group_id] - ) - - assert len(groups) == 2 - assert any(g.group_id == group1.group_id for g in groups) - assert any(g.group_id == group2.group_id for g in groups) - - -def test_get_groups_overview(pg_db, test_group, test_user): - pg_db.relational.add_user_to_group(test_user.id, test_group.group_id) - - overview = pg_db.relational.get_groups_overview([test_group.group_id]) - - assert len(overview) == 1 - assert overview[0].group_id == test_group.group_id - assert overview[0].name == test_group.name - assert overview[0].description == test_group.description - assert overview[0].user_count == 1 - - -# Test for adding the same user twice (idempotency) -def test_add_user_to_group_idempotency(pg_db, test_group, test_user): - # Add user for the first time - added1 = pg_db.relational.add_user_to_group( - test_user.id, test_group.group_id - ) - assert added1 - - # Try to add the same user again - added2 = pg_db.relational.add_user_to_group( - test_user.id, test_group.group_id - ) - assert not added2 # Should return False as user is already in the group - - # Verify user is in the group only once - users_in_group = pg_db.relational.get_users_in_group(test_group.group_id) - assert len([u for u in users_in_group if u.id == test_user.id]) == 1 - - -# Test for removing a user that's not in the group -def test_remove_user_not_in_group(pg_db, test_group, test_user): - # Ensure user is not in the group - pg_db.relational.add_user_to_group(test_user.id, test_group.group_id) - pg_db.relational.remove_user_from_group(test_user.id, test_group.group_id) - - # Try to remove the user again - with pytest.raises(R2RException): - pg_db.relational.remove_user_from_group( - test_user.id, test_group.group_id - ) - - -# Improvement: Add test for non-existent group and user -def test_edge_cases(pg_db): - non_existent_id = UUID("00000000-0000-0000-0000-000000000000") - # assert pg_db.relational.get_group(non_existent_id) is None - # ensure error - with pytest.raises(R2RException): - pg_db.relational.get_group(non_existent_id) - with pytest.raises(R2RException): - pg_db.relational.update_group( - non_existent_id, name="New Name", description="New Description" - ) - with pytest.raises(R2RException): - pg_db.relational.delete_group(non_existent_id) - with pytest.raises(R2RException): - pg_db.relational.add_user_to_group(non_existent_id, non_existent_id) - with pytest.raises(R2RException): - assert not pg_db.relational.remove_user_from_group( - non_existent_id, non_existent_id - ) - with pytest.raises(R2RException): - assert pg_db.relational.get_users_in_group(non_existent_id) == [] - - -def test_get_users_in_group_with_pagination(pg_db, test_group): - # Create multiple users and add them to the group - users = [] - for i in range(5): - user = pg_db.relational.create_user( - email=f"test_user_{i}@example.com", password="password" - ) - pg_db.relational.add_user_to_group(user.id, test_group.group_id) - users.append(user) - - # Test first page - first_page = pg_db.relational.get_users_in_group( - test_group.group_id, offset=0, limit=3 - ) - assert len(first_page) == 3 - - # Test second page - second_page = pg_db.relational.get_users_in_group( - test_group.group_id, offset=3, limit=3 - ) - assert len(second_page) == 2 - - # Ensure all users are different - all_users = first_page + second_page - assert len({u.id for u in all_users}) == 5 - - # Clean up - for user in users: - pg_db.relational.delete_user(user.id) - - -def test_get_groups_overview_with_pagination(pg_db): - # Create multiple groups - groups = [ - pg_db.relational.create_group(f"Group {i}", f"Description {i}") - for i in range(5) - ] - - # Test first page - first_page = pg_db.relational.get_groups_overview(offset=0, limit=3) - assert len(first_page) == 3 - - # Test second page - second_page = pg_db.relational.get_groups_overview(offset=3, limit=3) - assert len(second_page) == 2 - - # Ensure all groups are different - all_groups = first_page + second_page - assert len({g.group_id for g in all_groups}) == 5 - - # Clean up - for group in groups: - pg_db.relational.delete_group(group.group_id) - - -def test_get_groups_for_user_with_pagination(pg_db, test_user): - # Create multiple groups and add the user to them - groups = [] - for i in range(5): - group = pg_db.relational.create_group(f"Group {i}", f"Description {i}") - pg_db.relational.add_user_to_group(test_user.id, group.group_id) - groups.append(group) - - # Test first page - first_page = pg_db.relational.get_groups_for_user( - test_user.id, offset=0, limit=3 - ) - assert len(first_page) == 3 - - # Test second page - second_page = pg_db.relational.get_groups_for_user( - test_user.id, offset=3, limit=3 - ) - assert len(second_page) == 2 - - # Ensure all groups are different - all_groups = first_page + second_page - assert len({g.group_id for g in all_groups}) == 5 - - # Clean up - for group in groups: - pg_db.relational.delete_group(group.group_id) - - -def test_documents_in_group(pg_db, test_group, test_documents): - # Test getting all documents - all_docs = pg_db.relational.documents_in_group(test_group.group_id) - assert len(all_docs) == 5 - assert all(isinstance(doc, DocumentInfo) for doc in all_docs) - assert all(test_group.group_id in doc.group_ids for doc in all_docs) - - # Test pagination - first page - first_page = pg_db.relational.documents_in_group( - test_group.group_id, offset=0, limit=3 - ) - assert len(first_page) == 3 - - # Test pagination - second page - second_page = pg_db.relational.documents_in_group( - test_group.group_id, offset=3, limit=3 - ) - assert len(second_page) == 2 - - # Ensure all documents are different - all_docs = first_page + second_page - assert len({doc.id for doc in all_docs}) == 5 - - # Test ordering (should be in descending order of created_at) - assert all( - all_docs[i].created_at >= all_docs[i + 1].created_at - for i in range(len(all_docs) - 1) - ) - - # Test with non-existent group - non_existent_id = UUID("00000000-0000-0000-0000-000000000000") - with pytest.raises(R2RException): - pg_db.relational.documents_in_group(non_existent_id) - - # Test with empty group - empty_group = pg_db.relational.create_group("Empty Group", "No documents") - empty_docs = pg_db.relational.documents_in_group(empty_group.group_id) - assert len(empty_docs) == 0 - - # Clean up - pg_db.relational.delete_group(empty_group.group_id) diff --git a/py/tests/test_groups_client.py b/py/tests/test_groups_client.py deleted file mode 100644 index 1c0c15717..000000000 --- a/py/tests/test_groups_client.py +++ /dev/null @@ -1,380 +0,0 @@ -# import uuid -# from datetime import datetime -# from unittest.mock import MagicMock - -# import pytest -# from fastapi import Depends -# from fastapi.security import OAuth2PasswordBearer -# from fastapi.testclient import TestClient - -# from core import R2RApp, R2RBuilder, Token, UserResponse -# from core.base import GroupResponse -# from r2r import R2RClient - -# oauth2_scheme = OAuth2PasswordBearer(tokenUrl="token") - - -# def create_superuser(email: str, password: str): -# return UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email=email, -# hashed_password="hashed_" + password, -# is_active=True, -# is_superuser=True, -# is_verified=True, -# name="Test Superuser", -# bio="Test Superuser Bio", -# profile_picture="http://example.com/superuser_pic.jpg", -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ) - - -# @pytest.fixture(scope="function") -# def mock_auth_wrapper(): -# def auth_wrapper(token: str = Depends(oauth2_scheme)): -# return UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email="admin@example.com", -# is_active=True, -# hashed_password="xxx", -# is_superuser=True, -# ) - -# return auth_wrapper - - -# @pytest.fixture(scope="function") -# def mock_db(): -# db = MagicMock() -# db.relational.get_user_by_email.return_value = ( -# None # Simulate empty database -# ) -# db.relational.create_user.side_effect = create_superuser -# db.relational.get_user_by_id.return_value = create_superuser( -# email="admin@example.com", password="adminpassword" -# ) - -# def mock_update_user(user): -# return UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email=user.email, -# hashed_password="hashed_password", -# is_active=True, -# is_superuser=True, -# is_verified=True, -# name=user.name, -# bio=user.bio, -# profile_picture=user.profile_picture, -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ) - -# db.relational.update_user.side_effect = mock_update_user - -# def mock_create_group(**kwargs): -# return GroupResponse( -# group_id=uuid.uuid4(), -# name=kwargs.get("name", "Test Group"), -# description=kwargs.get("description", "A test group"), -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ).dict() - -# db.relational.create_group = MagicMock(side_effect=mock_create_group) - -# def mock_get_group(group_id): -# return GroupResponse( -# group_id=group_id, -# name="Test Group", -# description="A test group", -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ).dict() - -# db.relational.get_group = MagicMock(side_effect=mock_get_group) - -# def mock_update_group(group_id, name, description): -# return GroupResponse( -# group_id=group_id, -# name=name, -# description=description, -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ).dict() - -# db.relational.update_group = MagicMock(side_effect=mock_update_group) - -# db.relational.delete_group = MagicMock(return_value=True) - -# def mock_list_groups(offset=0, limit=100): -# return [ -# GroupResponse( -# group_id=uuid.uuid4(), -# name=f"Group {i}", -# description=f"Description {i}", -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ).dict() -# for i in range(1, 3) -# ] - -# db.relational.list_groups = MagicMock(side_effect=mock_list_groups) - -# db.relational.add_user_to_group = MagicMock(return_value=True) -# db.relational.remove_user_from_group = MagicMock(return_value=True) -# db.relational.get_users_in_group = MagicMock( -# return_value=[ -# UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email="test1@gmail.com", -# hashed_password="hashed_password", -# is_active=True, -# is_superuser=True, -# is_verified=True, -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ), -# UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email="test2@gmail.com", -# hashed_password="hashed_password", -# is_active=True, -# is_superuser=True, -# is_verified=True, -# created_at=datetime.utcnow(), -# updated_at=datetime.utcnow(), -# ), -# ] -# ) -# db.relational.get_groups_for_user = MagicMock(side_effect=mock_list_groups) - -# def mock_groups_overview(group_ids, offset=0, limit=100): -# return [ -# { -# "group_id": str(uuid.uuid4()), -# "name": f"Group {i}", -# "description": f"Description {i}", -# "created_at": datetime.utcnow(), -# "updated_at": datetime.utcnow(), -# "user_count": i * 2, -# "document_count": i * 2, -# } -# for i in range(1, 3) -# ] - -# db.relational.get_groups_overview = MagicMock( -# side_effect=mock_groups_overview -# ) - -# return db - - -# @pytest.fixture(scope="function") -# def r2r_client(app_client): -# return R2RClient(base_url="http://testserver", custom_client=app_client) - - -# def authenticate_superuser(r2r_client, mock_db): -# user_data = {"email": "admin@example.com", "password": "adminpassword"} -# mock_db.relational.get_user_by_email.return_value = None -# r2r_client.register(**user_data) - -# # Create a superuser -# superuser = UserResponse( -# id=uuid.UUID("12345678-1234-5678-1234-567812345678"), -# email=user_data["email"], -# hashed_password="hashed_" + user_data["password"], -# is_active=True, -# is_superuser=True, -# verification_code_expiry=None, -# ) - -# mock_db.relational.get_user_by_email.return_value = superuser -# mock_db.relational.get_user_by_id.return_value = superuser - -# # Login as superuser -# response = r2r_client.login(**user_data) -# assert "access_token" in response["results"] - - -# @pytest.fixture(scope="function") -# def app_client(mock_db, mock_auth_wrapper): -# config = R2RBuilder._get_config("auth") -# providers = MagicMock() -# providers.auth.login.return_value = { -# "access_token": Token(token="access_token", token_type="access"), -# "refresh_token": Token(token="refresh_token", token_type="refresh"), -# } -# providers.auth.auth_wrapper = mock_auth_wrapper -# providers.auth.register.side_effect = mock_db.relational.create_user -# providers.auth.verify_email.return_value = { -# "message": "Email verified successfully" -# } -# providers.auth.change_password.return_value = { -# "message": "Password changed successfully" -# } -# providers.auth.request_password_reset.return_value = { -# "message": "If the email exists, a reset link has been sent" -# } -# providers.auth.confirm_password_reset.return_value = { -# "message": "Password reset successfully" -# } -# providers.auth.logout.return_value = {"message": "Logged out successfully"} - -# providers.database = mock_db -# pipelines = MagicMock() -# agents = MagicMock() -# engine = R2REngine( -# config=config, -# providers=providers, -# pipelines=pipelines, -# agents=agents, -# ) - -# async def mock_asearch(*args, **kwargs): -# return { -# "vector_search_results": [ -# { -# "id": "doc1", -# "metadata": {"text": "Sample search result"}, -# "score": 0.95, -# } -# ] -# } - -# engine.asearch = mock_asearch -# app = R2RApp(engine) -# return TestClient(app.app) - - -# @pytest.fixture -# def group_id(): -# return uuid.uuid4() - - -# @pytest.fixture -# def user_id(): -# return uuid.uuid4() - - -# @pytest.mark.asyncio -# async def test_user_profile(r2r_client, mock_db): -# r2r_client.register(email="profile@example.com", password="password123") -# r2r_client.login(email="profile@example.com", password="password123") - -# updated_profile = r2r_client.update_user(name="John Doe", bio="Test bio") -# assert updated_profile["results"]["name"] == "John Doe" -# assert updated_profile["results"]["bio"] == "Test bio" - - -# # TODO - Revive these tests later. -# # def test_create_group(r2r_client, mock_db): -# # authenticate_superuser(r2r_client, mock_db) -# # group_data = {"name": "Test Group", "description": "A test group"} -# # response = r2r_client.create_group(**group_data) -# # assert "results" in response -# # print('response = ', response) -# # assert response["results"]["name"] == group_data["name"] -# # assert response["results"]["description"] == group_data["description"] -# # mock_db.relational.create_group.assert_called_once_with(**group_data) - - -# @pytest.mark.asyncio -# async def test_get_group(r2r_client, mock_db, group_id): -# authenticate_superuser(r2r_client, mock_db) -# response = r2r_client.get_group(group_id) -# assert "results" in response -# assert response["results"]["group_id"] == str(group_id) -# assert response["results"]["name"] == "Test Group" -# mock_db.relational.get_group.assert_called_once_with(group_id) - - -# @pytest.mark.asyncio -# async def test_update_group(r2r_client, mock_db, group_id): -# authenticate_superuser(r2r_client, mock_db) -# update_data = { -# "name": "Test Group", -# "description": "An updated test group", -# } -# # mock_db.relational.update_group.return_value = True -# response = r2r_client.update_group(group_id, **update_data) -# assert "results" in response -# assert ( -# response["results"]["description"] == "An updated test group" -# ) # is True -# mock_db.relational.update_group.assert_called_once_with( -# *(group_id, "Test Group", "An updated test group") -# ) - - -# @pytest.mark.asyncio -# async def test_list_groups(r2r_client, mock_db): -# authenticate_superuser(r2r_client, mock_db) -# # mock_db.relational.list_groups.return_value = mock_groups -# response = r2r_client.list_groups(0, 100) -# assert "results" in response -# assert len(response["results"]) == 2 - -# mock_db.relational.list_groups.assert_called_once_with(offset=0, limit=100) - - -# @pytest.mark.asyncio -# async def test_get_users_in_group(r2r_client, mock_db, group_id): -# authenticate_superuser(r2r_client, mock_db) -# response = r2r_client.get_users_in_group(group_id) -# assert "results" in response -# assert len(response["results"]) == 2 -# mock_db.relational.get_users_in_group.assert_called_once_with( -# group_id, offset=0, limit=100 -# ) - - -# # @pytest.mark.asyncio -# # async def test_get_groups_for_user(r2r_client, mock_db, user_id): -# # authenticate_superuser(r2r_client, mock_db) -# # # mock_groups = [ -# # # {"id": str(uuid.uuid4()), "name": "Group 1"}, -# # # {"id": str(uuid.uuid4()), "name": "Group 2"}, -# # # ] -# # # mock_db.relational.get_groups_for_user.return_value = mock_groups -# # response = r2r_client.user_groups(user_id) -# # assert "results" in response -# # assert len(response["results"]) == 2 -# # # assert response["results"] == mock_groups -# # mock_db.relational.get_groups_for_user.assert_called_once_with(user_id, offset=0, limit=100) - - -# @pytest.mark.asyncio -# async def test_groups_overview(r2r_client, mock_db): -# authenticate_superuser(r2r_client, mock_db) -# # mock_overview = [ -# # {"id": str(uuid.uuid4()), "name": "Group 1", "member_count": 5}, -# # {"id": str(uuid.uuid4()), "name": "Group 2", "member_count": 3}, -# # ] -# # mock_db.relational.get_groups_overview.return_value = mock_overview -# response = r2r_client.groups_overview() -# assert "results" in response -# assert len(response["results"]) == 2 -# # assert response["results"] == mock_overview -# mock_db.relational.get_groups_overview.assert_called_once_with( -# None, offset=0, limit=100 -# ) - - -# @pytest.mark.asyncio -# async def test_groups_overview_with_ids(r2r_client, mock_db): -# authenticate_superuser(r2r_client, mock_db) -# group_ids = [uuid.uuid4(), uuid.uuid4()] -# # mock_overview = [ -# # {"id": str(group_ids[0]), "name": "Group 1", "member_count": 5}, -# # {"id": str(group_ids[1]), "name": "Group 2", "member_count": 3}, -# # ] -# # mock_db.relational.get_groups_overview.return_value = mock_overview -# response = r2r_client.groups_overview(group_ids, 10, 100) -# assert "results" in response -# assert len(response["results"]) == 2 -# # assert response["results"] == mock_overview -# mock_db.relational.get_groups_overview.assert_called_once_with( -# [str(gid) for gid in group_ids], offset=10, limit=100 -# ) diff --git a/py/tests/test_ingestion_service.py b/py/tests/test_ingestion_service.py deleted file mode 100644 index 1dc205ee3..000000000 --- a/py/tests/test_ingestion_service.py +++ /dev/null @@ -1,435 +0,0 @@ -# import asyncio -# import io -# import logging -# import uuid -# from datetime import datetime -# from unittest.mock import AsyncMock, MagicMock, Mock - -# import pytest -# from fastapi import UploadFile - -# from core import R2RAgents -# from core.base import ( -# Document, -# DocumentInfo, -# R2RDocumentProcessingError, -# R2RException, -# RunManager, -# UserResponse, -# generate_id_from_label, -# ) -# from core.main import R2RPipelines, R2RProviders -# from core.main.services.ingestion_service import IngestionService - -# logger = logging.getLogger(__name__) - - -# @pytest.fixture(scope="session", autouse=True) -# def event_loop_policy(): -# asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -# @pytest.fixture(scope="function", autouse=True) -# async def cleanup_tasks(): -# yield -# tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] -# [task.cancel() for task in tasks] -# await asyncio.gather(*tasks, return_exceptions=True) - - -# @pytest.fixture -# def mock_vector_db(): -# mock_db = MagicMock() -# mock_db.relational = MagicMock() -# mock_db.relational.get_documents_overview.return_value = ( -# [] -# ) # Default to empty list -# return mock_db - - -# @pytest.fixture -# def mock_embedding_model(): -# return Mock() - - -# @pytest.fixture -# def mock_pipes(): -# pipes = Mock() -# pipes.parsing_pipe = AsyncMock() -# pipes.chunking_pipe = AsyncMock() -# pipes.embedding_pipe = AsyncMock() -# pipes.vector_storage_pipe = AsyncMock() -# return pipes - - -# @pytest.fixture -# def ingestion_service(mock_vector_db, mock_embedding_model, mock_pipes): -# config = MagicMock() -# config.app.get.return_value = 32 # Default max file size -# providers = Mock(spec=R2RProviders) -# providers.database = mock_vector_db -# providers.embedding_model = mock_embedding_model -# pipelines = Mock(spec=R2RPipelines) -# pipelines.ingestion_pipeline = AsyncMock() -# pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [] -# } -# run_manager = RunManager(logger) -# logging_connection = Mock() -# agents = Mock(spec=R2RAgents) - -# return IngestionService( -# config, -# providers, -# mock_pipes, -# pipelines, -# agents, -# run_manager, -# logging_connection=logging_connection, -# ) - - -# @pytest.mark.asyncio -# async def test_ingest_single_document( -# ingestion_service, mock_vector_db, mock_pipes -# ): -# document = Document( -# id=generate_id_from_label("test_id"), -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# data="Test content", -# metadata={}, -# ) - -# mock_pipes.parsing_pipe.run.return_value = [{"content": "Test content"}] -# mock_pipes.chunking_pipe.run.return_value = [{"chunk": "Test content"}] -# mock_pipes.embedding_pipe.run.return_value = [ -# {"embedding": [0.1, 0.2, 0.3]} -# ] -# mock_pipes.vector_storage_pipe.run.return_value = ["stored_id"] - -# ingestion_service.pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [(document.id, None)] -# } -# mock_vector_db.relational.get_documents_overview.return_value = ( -# [] -# ) # No existing documents - -# result = await ingestion_service.ingest_documents([document]) - -# assert result["processed_documents"][0].id == generate_id_from_label( -# "test_id" -# ) -# assert not result["failed_documents"] -# assert not result["skipped_documents"] - -# mock_pipes.parsing_pipe.run.assert_called_once() -# mock_pipes.chunking_pipe.run.assert_called_once() -# mock_pipes.embedding_pipe.run.assert_called_once() -# mock_pipes.vector_storage_pipe.run.assert_called_once() - - -# @pytest.mark.asyncio -# async def test_ingest_duplicate_document(ingestion_service, mock_vector_db): -# document = Document( -# id=generate_id_from_label("test_id"), -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ) -# mock_vector_db.relational.get_documents_overview.return_value = [ -# DocumentInfo( -# id=document.id, -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# version="v0", -# size_in_bytes=1024, -# metadata={}, -# title=str(document.id), -# type="txt", -# created_at=datetime.now(), -# updated_at=datetime.now(), -# ingestion_status="success", -# ) -# ] - -# with pytest.raises(R2RException) as exc_info: -# await ingestion_service.ingest_documents([document]) - -# assert ( -# f"Document with ID {document.id} was already successfully processed" -# in str(exc_info.value) -# ) - - -# @pytest.mark.asyncio -# async def test_ingest_file(ingestion_service): -# file_content = b"Test content" -# file_mock = UploadFile(filename="test.txt", file=io.BytesIO(file_content)) -# file_mock.file.seek(0) -# file_mock.size = len(file_content) # Set file size manually - -# user = UserResponse( -# id=generate_id_from_label("user1"), -# email="email@test.com", -# hashed_password="password", -# ) -# ingestion_service.pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [ -# (generate_id_from_label(f"test.txt-{user.id}"), None) -# ] -# } - -# result = await ingestion_service.ingest_files([file_mock], user=user) - -# assert len(result["processed_documents"]) == 1 -# assert not result["failed_documents"] -# assert not result["skipped_documents"] - - -# @pytest.mark.asyncio -# async def test_ingest_mixed_success_and_failure( -# ingestion_service, mock_vector_db -# ): -# documents = [ -# Document( -# id=generate_id_from_label("success_id"), -# data="Success content", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ), -# Document( -# id=generate_id_from_label("failure_id"), -# data="Failure content", -# type="txt", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# metadata={}, -# ), -# ] - -# ingestion_service.pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [ -# ( -# documents[0].id, -# f"Processed 1 vectors for document {documents[0].id}.", -# ), -# ( -# documents[1].id, -# R2RDocumentProcessingError( -# error_message="Embedding failed", -# document_id=documents[1].id, -# ), -# ), -# ] -# } - -# result = await ingestion_service.ingest_documents(documents) - -# assert len(result["processed_documents"]) == 1 -# assert len(result["failed_documents"]) == 1 -# assert documents[0].id in [doc.id for doc in result["processed_documents"]] -# assert documents[1].id in [ -# doc["document_id"] for doc in result["failed_documents"] -# ] -# assert "Embedding failed" in str(result["failed_documents"][0]["result"]) - -# assert mock_vector_db.relational.upsert_documents_overview.call_count == 2 -# upserted_docs = ( -# mock_vector_db.relational.upsert_documents_overview.call_args[0][0] -# ) -# assert len(upserted_docs) == 2 -# assert upserted_docs[0].id == documents[0].id -# assert upserted_docs[0].ingestion_status == "success" -# assert upserted_docs[1].id == documents[1].id -# assert upserted_docs[1].ingestion_status == "failure" - - -# @pytest.mark.asyncio -# async def test_ingest_unsupported_file_type(ingestion_service): -# file_mock = UploadFile( -# filename="test.unsupported", file=io.BytesIO(b"Test content") -# ) -# file_mock.file.seek(0) -# file_mock.size = 12 # Set file size manually - -# user = UserResponse( -# id=generate_id_from_label("user1"), -# email="email@test.com", -# hashed_password="password", -# ) - -# with pytest.raises(R2RException) as exc_info: -# await ingestion_service.ingest_files([file_mock], user=user) - -# assert "is not a valid DocumentType" in str(exc_info.value) - - -# @pytest.mark.asyncio -# async def test_partial_ingestion_success(ingestion_service, mock_vector_db): -# documents = [ -# Document( -# id=generate_id_from_label("success_1"), -# data="Success content 1", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ), -# Document( -# id=generate_id_from_label("fail"), -# data="Fail content", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ), -# Document( -# id=generate_id_from_label("success_2"), -# data="Success content 2", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ), -# ] - -# ingestion_service.pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [ -# (documents[0].id, None), -# ( -# documents[1].id, -# R2RDocumentProcessingError( -# error_message="Embedding failed", -# document_id=documents[1].id, -# ), -# ), -# (documents[2].id, None), -# ] -# } - -# result = await ingestion_service.ingest_documents(documents) - -# assert len(result["processed_documents"]) == 2 -# assert len(result["failed_documents"]) == 1 -# assert documents[1].id in [ -# doc["document_id"] for doc in result["failed_documents"] -# ] - - -# @pytest.mark.asyncio -# async def test_version_increment(ingestion_service, mock_vector_db): - -# user = UserResponse( -# id=generate_id_from_label("user1"), -# email="email@test.com", -# hashed_password="password", -# ) - -# document = Document( -# id=generate_id_from_label("test_id"), -# data="Test content", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ) -# mock_vector_db.relational.get_documents_overview.return_value = [ -# DocumentInfo( -# id=document.id, -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# version="v2", -# ingestion_status="success", -# size_in_bytes=0, -# metadata={}, -# ) -# ] - -# file_mock = UploadFile( -# filename="test.txt", file=io.BytesIO(b"Updated content") -# ) -# await ingestion_service.update_files( -# files=[file_mock], document_ids=[document.id], user=user -# ) - -# calls = mock_vector_db.relational.upsert_documents_overview.call_args_list -# assert len(calls) == 2 -# assert calls[1][0][0][0].version == "v3" - - -# @pytest.mark.asyncio -# async def test_process_ingestion_results_error_handling(ingestion_service): -# document_infos = [ -# DocumentInfo( -# id=uuid.uuid4(), -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# version="v0", -# ingestion_status="processing", -# size_in_bytes=0, -# metadata={}, -# ) -# ] -# ingestion_results = { -# "embedding_pipeline_output": [ -# ( -# document_infos[0].id, -# R2RDocumentProcessingError( -# "Unexpected error", -# document_id=document_infos[0].id, -# ), -# ) -# ] -# } - -# result = await ingestion_service._process_ingestion_results( -# ingestion_results, -# document_infos, -# [], -# ) - -# assert len(result["failed_documents"]) == 1 -# assert "Unexpected error" in str(result["failed_documents"][0]) - - -# @pytest.mark.asyncio -# async def test_document_status_update_after_ingestion( -# ingestion_service, mock_vector_db -# ): -# document = Document( -# id=generate_id_from_label("test_id"), -# data="Test content", -# group_ids=[], -# user_id=generate_id_from_label("user_1"), -# type="txt", -# metadata={}, -# ) - -# ingestion_service.pipelines.ingestion_pipeline.run.return_value = { -# "embedding_pipeline_output": [(document.id, None)] -# } -# mock_vector_db.relational.get_documents_overview.return_value = ( -# [] -# ) # No existing documents - -# await ingestion_service.ingest_documents([document]) - -# # Check that upsert_documents_overview was called twice -# assert mock_vector_db.relational.upsert_documents_overview.call_count == 2 - -# # Check the second call to upsert_documents_overview (status update) -# second_call_args = ( -# mock_vector_db.relational.upsert_documents_overview.call_args_list[1][ -# 0 -# ][0] -# ) -# assert len(second_call_args) == 1 -# assert second_call_args[0].id == document.id -# assert second_call_args[0].ingestion_status == "success" diff --git a/py/tests/test_llms.py b/py/tests/test_llms.py deleted file mode 100644 index 6b9fb8029..000000000 --- a/py/tests/test_llms.py +++ /dev/null @@ -1,181 +0,0 @@ -import asyncio -import time -from unittest.mock import AsyncMock, MagicMock, patch - -import pytest - -from core import CompletionConfig, GenerationConfig -from core.base.abstractions.llm import ( - LLMChatCompletion, - LLMChatCompletionChunk, -) -from core.providers import LiteCompletionProvider, OpenAICompletionProvider - - -class MockCompletionResponse: - def __init__(self, content): - self.id = "mock_id" - self.created = int(time.time()) - self.model = "gpt-4o-mini" - self.object = "chat.completion" - self.choices = [ - { - "index": 0, - "message": {"role": "assistant", "content": content}, - "finish_reason": "stop", - } - ] - - def dict(self): - return { - "id": self.id, - "created": self.created, - "model": self.model, - "object": self.object, - "choices": self.choices, - } - - -class MockStreamResponse: - def __init__(self, content): - self.id = "mock_id" - self.created = int(time.time()) - self.model = "gpt-4o-mini" - self.object = "chat.completion.chunk" - self.choices = [ - {"index": 0, "delta": {"content": content}, "finish_reason": None} - ] - - def dict(self): - return { - "id": self.id, - "created": self.created, - "model": self.model, - "object": self.object, - "choices": self.choices, - } - - -@pytest.fixture -def lite_llm(): - config = CompletionConfig(provider="litellm") - return LiteCompletionProvider(config) - - -@pytest.fixture -def openai_llm(): - config = CompletionConfig(provider="openai") - return OpenAICompletionProvider(config) - - -@pytest.fixture -def messages(): - return [ - { - "role": "user", - "content": "This is a test, return only the word `True`", - } - ] - - -@pytest.fixture -def generation_config(): - return GenerationConfig( - model="gpt-4o-mini", - temperature=0.0, - top_p=0.9, - max_tokens_to_sample=50, - stream=False, - ) - - -@pytest.mark.parametrize("llm_fixture", ["lite_llm", "openai_llm"]) -def test_get_completion(request, llm_fixture, messages, generation_config): - llm = request.getfixturevalue(llm_fixture) - - with patch.object( - llm, "_execute_task_sync", return_value=MockCompletionResponse("True") - ): - completion = llm.get_completion(messages, generation_config) - assert isinstance(completion, LLMChatCompletion) - assert completion.choices[0].message.role == "assistant" - assert completion.choices[0].message.content.strip() == "True" - assert completion.id == "mock_id" - assert completion.model == "gpt-4o-mini" - assert completion.object == "chat.completion" - - -@pytest.mark.parametrize("llm_fixture", ["lite_llm", "openai_llm"]) -def test_get_completion_stream( - request, llm_fixture, messages, generation_config -): - llm = request.getfixturevalue(llm_fixture) - generation_config.stream = True - - mock_responses = [ - MockStreamResponse("T"), - MockStreamResponse("ru"), - MockStreamResponse("e"), - ] - with patch.object(llm, "_execute_task_sync", return_value=mock_responses): - stream = llm.get_completion_stream(messages, generation_config) - chunks = list(stream) - assert all( - isinstance(chunk, LLMChatCompletionChunk) for chunk in chunks - ) - assert len(chunks) == 3 - assert ( - "".join(chunk.choices[0].delta.content for chunk in chunks) - == "True" - ) - assert all(chunk.object == "chat.completion.chunk" for chunk in chunks) - - -@pytest.mark.asyncio -@pytest.mark.parametrize("llm_fixture", ["lite_llm", "openai_llm"]) -async def test_aget_completion( - request, llm_fixture, messages, generation_config -): - llm = request.getfixturevalue(llm_fixture) - - with patch.object( - llm, - "_execute_task", - AsyncMock(return_value=MockCompletionResponse("True")), - ): - completion = await llm.aget_completion(messages, generation_config) - assert isinstance(completion, LLMChatCompletion) - assert completion.choices[0].message.role == "assistant" - assert completion.choices[0].message.content.strip() == "True" - assert completion.id == "mock_id" - assert completion.model == "gpt-4o-mini" - assert completion.object == "chat.completion" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("llm_fixture", ["lite_llm", "openai_llm"]) -async def test_aget_completion_stream( - request, llm_fixture, messages, generation_config -): - llm = request.getfixturevalue(llm_fixture) - generation_config.stream = True - - async def mock_stream(): - yield MockStreamResponse("T") - yield MockStreamResponse("ru") - yield MockStreamResponse("e") - - with patch.object( - llm, "_execute_task", AsyncMock(return_value=mock_stream()) - ): - stream = llm.aget_completion_stream(messages, generation_config) - chunks = [chunk async for chunk in stream] - assert all( - isinstance(chunk, LLMChatCompletionChunk) for chunk in chunks - ) - assert len(chunks) == 3 - assert ( - "".join(chunk.choices[0].delta.content for chunk in chunks) - == "True" - ) - assert all(chunk.object == "chat.completion.chunk" for chunk in chunks) diff --git a/py/tests/test_logging.py b/py/tests/test_logging.py deleted file mode 100644 index 860d8a8bc..000000000 --- a/py/tests/test_logging.py +++ /dev/null @@ -1,207 +0,0 @@ -import asyncio -import logging -import os -import uuid -from uuid import UUID - -import pytest - -from core import ( - LocalRunLoggingProvider, - LoggingConfig, - PostgresLoggingConfig, - PostgresRunLoggingProvider, - RedisLoggingConfig, - RedisRunLoggingProvider, - generate_run_id, -) - -logger = logging.getLogger(__name__) - - -@pytest.fixture(scope="session", autouse=True) -def event_loop_policy(): - asyncio.set_event_loop_policy(asyncio.DefaultEventLoopPolicy()) - - -@pytest.fixture(scope="function", autouse=True) -async def cleanup_tasks(): - yield - tasks = [t for t in asyncio.all_tasks() if t is not asyncio.current_task()] - [task.cancel() for task in tasks] - await asyncio.gather(*tasks, return_exceptions=True) - - -@pytest.fixture(scope="function") -async def local_provider(): - unique_id = str(uuid.uuid4()) - logging_path = f"test_{unique_id}.sqlite" - provider = LocalRunLoggingProvider( - LoggingConfig(logging_path=logging_path) - ) - await provider._init() - yield provider - await provider.close() - if os.path.exists(logging_path): - os.remove(logging_path) - - -@pytest.fixture(scope="function") -async def postgres_provider(): - log_table = f"logs_{str(uuid.uuid4()).replace('-', '_')}" - log_info_table = f"log_info_{str(uuid.uuid4()).replace('-', '_')}" - provider = PostgresRunLoggingProvider( - PostgresLoggingConfig( - log_table=log_table, log_info_table=log_info_table - ) - ) - await provider._init() - yield provider - await provider.close() - - -@pytest.fixture -async def provider(request): - return request.getfixturevalue(request.param) - - -all_providers = [ - pytest.param("local_provider", id="local"), - pytest.param("postgres_provider", id="postgres"), -] - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_logging(provider): - run_id = generate_run_id() - await provider.log(run_id, "key", "value") - logs = await provider.get_logs([run_id]) - assert len(logs) == 1 - assert logs[0]["key"] == "key" - assert logs[0]["value"] == "value" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_multiple_log_entries(provider): - run_ids = [generate_run_id() for _ in range(3)] - entries = [ - (run_id, f"key_{i}", f"value_{i}") for i, run_id in enumerate(run_ids) - ] - for run_id, key, value in entries: - await provider.log(run_id, key, value) - - logs = await provider.get_logs(run_ids) - assert len(logs) == 3, f"Expected 3 logs, got {len(logs)}" - - for log in logs: - run_id = log.get("run_id") - assert run_id is not None, f"Log entry is missing 'run_id': {log}" - - if isinstance(run_id, str): - run_id = UUID(run_id) - - matching_entries = [entry for entry in entries if entry[0] == run_id] - assert ( - len(matching_entries) == 1 - ), f"No matching entry found for run_id {run_id}" - - selected_entry = matching_entries[0] - assert log["key"] == selected_entry[1] - assert log["value"] == selected_entry[2] - - # Additional check to ensure all entries were logged - logged_ids = set( - ( - UUID(log["run_id"]) - if isinstance(log["run_id"], str) - else log["run_id"] - ) - for log in logs - ) - entry_ids = set(entry[0] for entry in entries) - assert ( - logged_ids == entry_ids - ), f"Mismatch between logged IDs {logged_ids} and entry IDs {entry_ids}" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_log_retrieval_limit(provider): - run_ids = [] - for i in range(10): - run_ids.append(generate_run_id()) - await provider.log(run_ids[-1], f"key_{i}", f"value_{i}") - - logs = await provider.get_logs(run_ids[:5]) - assert len(logs) == 5 - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_specific_run_type_retrieval(provider): - run_id_0, run_id_1 = generate_run_id(), generate_run_id() - - await provider.log(run_id_0, "run_type", "RETRIEVAL") - await provider.log(run_id_0, "key_0", "value_0") - await provider.log(run_id_1, "run_type", "MANAGEMENT") - await provider.log(run_id_1, "key_1", "value_1") - - # Log info for both run IDs - await provider.info_log(run_id_0, "RETRIEVAL", uuid.uuid4()) - await provider.info_log(run_id_1, "MANAGEMENT", uuid.uuid4()) - - run_info = await provider.get_info_logs(run_type_filter="RETRIEVAL") - assert ( - len(run_info) == 1 - ), f"Expected 1 'RETRIEVAL' log, got {len(run_info)}" - assert ( - run_info[0].run_id == run_id_0 - ), f"Expected run_id {run_id_0}, got {run_info[0].run_id}" - - logs = await provider.get_logs([run.run_id for run in run_info]) - assert len(logs) == 2, f"Expected 2 logs for run_id_0, got {len(logs)}" - assert all( - log["run_id"] == run_id_0 for log in logs - ), "All logs should be for run_id_0" - assert any( - log["key"] == "run_type" and log["value"] == "RETRIEVAL" - for log in logs - ), "Should have a 'RETRIEVAL' log" - assert any( - log["key"] == "key_0" and log["value"] == "value_0" for log in logs - ), "Should have a 'key_0' log" - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_info_logging(provider): - run_id = generate_run_id() - user_id = uuid.uuid4() - run_type = "RETRIEVAL" - await provider.info_log(run_id, run_type, user_id) - info_logs = await provider.get_info_logs() - assert len(info_logs) == 1 - assert info_logs[0].run_id == run_id - assert info_logs[0].run_type == run_type - assert info_logs[0].user_id == user_id - - -@pytest.mark.asyncio -@pytest.mark.parametrize("provider", all_providers, indirect=True) -async def test_get_info_logs_with_user_filter(provider): - user_id_1, user_id_2 = uuid.uuid4(), uuid.uuid4() - await provider.info_log(generate_run_id(), "RETRIEVAL", user_id_1) - await provider.info_log(generate_run_id(), "MANAGEMENT", user_id_2) - - info_logs = await provider.get_info_logs(user_ids=[user_id_1]) - assert len(info_logs) == 1 - assert info_logs[0].user_id == user_id_1 - - info_logs = await provider.get_info_logs( - run_type_filter="MANAGEMENT", user_ids=[user_id_2] - ) - assert len(info_logs) == 1 - assert info_logs[0].user_id == user_id_2 - assert info_logs[0].run_type == "MANAGEMENT" diff --git a/py/tests/test_vector_db.py b/py/tests/test_vector_db.py deleted file mode 100644 index 65a8c3f49..000000000 --- a/py/tests/test_vector_db.py +++ /dev/null @@ -1,307 +0,0 @@ -import random -from uuid import uuid4 - -import pytest - -from core.base import DatabaseConfig, Vector, VectorEntry, VectorSearchSettings -from core.providers import PostgresDBProvider - - -@pytest.fixture -def vector_db(): - random_collection_name = ( - f"test_collection_{random.randint(0, 1_000_000_000)}" - ) - config = DatabaseConfig.create( - provider="postgres", vecs_collection=random_collection_name - ) - db = PostgresDBProvider(config, dimension=3) - yield db.vector - # Teardown - db.vx.delete_collection(db.config.vecs_collection) - - -@pytest.fixture -def sample_entries(vector_db): - entries = [ - VectorEntry( - fragment_id=uuid4(), - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - group_ids=[uuid4()], - vector=Vector(data=[0.1, 0.2, 0.3]), - text="Apple", - metadata={ - "category": "fruit", - "color": "red", - "price": 1.0, - }, - ), - VectorEntry( - fragment_id=uuid4(), - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - group_ids=[uuid4()], - vector=Vector(data=[0.2, 0.3, 0.4]), - text="Banana", - metadata={ - "category": "fruit", - "color": "yellow", - "price": 0.5, - }, - ), - VectorEntry( - fragment_id=uuid4(), - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - group_ids=[uuid4()], - vector=Vector(data=[0.3, 0.4, 0.5]), - text="Carrot", - metadata={ - "category": "vegetable", - "color": "orange", - "price": 0.75, - }, - ), - VectorEntry( - fragment_id=uuid4(), - extraction_id=uuid4(), - document_id=uuid4(), - user_id=uuid4(), - group_ids=[uuid4()], - vector=Vector(data=[0.4, 0.5, 0.6]), - text="Durian", - metadata={ - "category": "fruit", - "color": "green", - "price": 5.0, - }, - ), - ] - for entry in entries: - vector_db.upsert(entry) - return entries - - -def test_search_equality_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, filters={"category": {"$eq": "fruit"}} - ), - ) - assert len(results) == 3 - assert all(r.metadata["category"] == "fruit" for r in results) - - -def test_search_not_equal_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, filters={"category": {"$ne": "fruit"}} - ), - ) - assert len(results) == 1 - assert results[0].metadata["category"] == "vegetable" - - -def test_search_greater_than_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings(search_limit=10, filters={"price": {"$gt": 1.0}}), - ) - assert len(results) == 1 - assert results[0].text == "Durian" - - -def test_search_less_than_or_equal_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, filters={"price": {"$lte": 1.0}} - ), - ) - assert len(results) == 3 - assert all(r.metadata["price"] <= 1.0 for r in results) - - -def test_search_in_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, filters={"color": {"$in": ["red", "yellow"]}} - ), - ) - assert len(results) == 2 - assert all(r.metadata["color"] in ["red", "yellow"] for r in results) - - -def test_search_complex_and_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, - filters={ - "$and": [ - {"category": {"$eq": "fruit"}}, - {"price": {"$lt": 2.0}}, - {"color": {"$ne": "yellow"}}, - ] - }, - ), - ) - assert len(results) == 1 - assert results[0].text == "Apple" - - -def test_search_complex_or_filter(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, - filters={ - "$or": [ - {"category": {"$eq": "vegetable"}}, - {"price": {"$gte": 5.0}}, - ] - }, - ), - ) - assert len(results) == 2 - assert any(r.metadata["category"] == "vegetable" for r in results) - assert any(r.metadata["price"] >= 5.0 for r in results) - - -def test_search_nested_and_or_filters(vector_db, sample_entries): - query_vector = Vector(data=[0.2, 0.3, 0.4]) - results = vector_db.semantic_search( - query_vector.data, - VectorSearchSettings( - search_limit=10, - filters={ - "$and": [ - {"category": {"$eq": "fruit"}}, - { - "$or": [ - {"color": {"$in": ["red", "yellow"]}}, - {"price": {"$gt": 2.0}}, - ] - }, - ] - }, - ), - ) - assert len(results) == 3 - assert all(r.metadata["category"] == "fruit" for r in results) - assert all( - r.metadata["color"] in ["red", "yellow"] or r.metadata["price"] > 2.0 - for r in results - ) - - -def test_delete_equality(vector_db, sample_entries): - deleted_ids = vector_db.delete({"category": {"$eq": "vegetable"}}) - assert len(deleted_ids) == 1 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 3 - assert all(r.metadata["category"] == "fruit" for r in remaining) - - -def test_delete_greater_than(vector_db, sample_entries): - deleted_ids = vector_db.delete({"price": {"$gt": 1.0}}) - assert len(deleted_ids) == 1 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 3 - assert all(r.metadata["price"] <= 1.0 for r in remaining) - - -def test_delete_in(vector_db, sample_entries): - deleted_ids = vector_db.delete({"color": {"$in": ["red", "yellow"]}}) - assert len(deleted_ids) == 2 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 2 - assert all(r.metadata["color"] not in ["red", "yellow"] for r in remaining) - - -def test_delete_complex_and(vector_db, sample_entries): - deleted_ids = vector_db.delete( - { - "$and": [ - {"category": {"$eq": "fruit"}}, - {"price": {"$lt": 1.0}}, - ] - } - ) - assert len(deleted_ids) == 1 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 3 - assert not any( - r.metadata["category"] == "fruit" and r.metadata["price"] < 1.0 - for r in remaining - ) - - -def test_delete_complex_or(vector_db, sample_entries): - deleted_ids = vector_db.delete( - { - "$or": [ - {"category": {"$eq": "vegetable"}}, - {"price": {"$gte": 5.0}}, - ] - } - ) - assert len(deleted_ids) == 2 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 2 - assert all( - r.metadata["category"] != "vegetable" and r.metadata["price"] < 5.0 - for r in remaining - ) - - -def test_delete_nested_and_or(vector_db, sample_entries): - deleted_ids = vector_db.delete( - { - "$and": [ - {"category": {"$eq": "fruit"}}, - { - "$or": [ - {"color": {"$in": ["red", "yellow"]}}, - {"price": {"$gt": 2.0}}, - ] - }, - ] - } - ) - assert len(deleted_ids) == 3 - remaining = vector_db.semantic_search( - Vector(data=[0.2, 0.3, 0.4]).data, - VectorSearchSettings(search_limit=10), - ) - assert len(remaining) == 1 - assert remaining[0].metadata["category"] == "vegetable" diff --git a/templates/ycombinator_graphrag/python-backend/main.py b/templates/ycombinator_graphrag/python-backend/main.py index fcd93ddac..a68a70b8c 100644 --- a/templates/ycombinator_graphrag/python-backend/main.py +++ b/templates/ycombinator_graphrag/python-backend/main.py @@ -1,3 +1,4 @@ +import yaml import time import uuid import os @@ -26,7 +27,7 @@ def get_dataset(dataset_name, save_folder = '.data', split = "train", column_nam data = load_dataset(dataset_name) data = data[split].select(range(args.num_companies)) for item in data: - file_path = os.path.join(save_folder, f"{item['id']}.txt") + file_path = os.path.join(save_folder, f"{item['slug']}.txt") # Check if the item contains JSON data with open(file_path, "w") as f: f.write(item[column_name]) @@ -37,31 +38,73 @@ def generate_id_from_label(label: str) -> uuid.UUID: def wait_till_ready(status_var, status_value): while True: - documents_overview = client.documents_overview()['results'] + documents_overview = client.documents_overview(limit=1000)['results'] + + # print a percentage contribution of each status value value of status var + status_counts = {} + for document in documents_overview: + print(document.get("name"), document.get(status_var)) + status = document.get(status_var) + if status in status_counts: + status_counts[status] += 1 + else: + status_counts[status] = 1 + + # show fraction of each status value + for status, count in status_counts.items(): + print(f"{status}: {count / len(documents_overview) * 100:.2f}%") + + if all(document.get(status_var) == status_value for document in documents_overview): break + else: + # if at least one says failed, exit + if "failure" in status_counts or "enrichment_failure" in status_counts: + print(f"At least one document has failed {status_var} => {status_value}") + for document in documents_overview: + if document.get(status_var) == "failure": + print(document.get("id"), document.get("status")) + exit(1) time.sleep(10) def ingest_data(): + print("Ingesting data...") for text in get_dataset(args.dataset_name, args.save_folder, args.split, args.column_name): client.ingest_files(file_paths=[text]) # wait till all get ingested wait_till_ready("ingestion_status", "success") + print("Ingested data") def create_graph(): - client.create_graph() + print("Creating graph...") + entity_types = ["ORGANIZATION", "GEO", "PERSON", "INDUSTRY_SECTOR", "PRODUCT", "COMPETITOR", "TECHNOLOGY", "ACQUISITION", "INVESTOR", ] + documents_overview = client.documents_overview(limit=1000)['results'] + document_ids = [document.get("id") for document in documents_overview if document.get("restructuring_status") in ["pending", "failure", "enrichment_failure"]] + client.create_graph(document_ids = document_ids) wait_till_ready("restructuring_status", "success") def enrich_graph(): + print("Enriching graph...") client.enrich_graph() wait_till_ready("restructuring_status", "enriched") +def update_prompts(): + print("Updating prompts...") + prompts = yaml.load(open("prompts.yaml", "r"), Loader=yaml.FullLoader) + for prompt_name, prompt in prompts.items(): + client.update_prompt( + name=prompt_name, + template=prompt["template"], + input_types=prompt["input_types"] + ) + def ingest(): - ingest_data() - create_graph() + update_prompts() + # ingest_data() + # create_graph() enrich_graph() def ask(): @@ -69,6 +112,7 @@ def ask(): print(result) if __name__ == "__main__": + if args.ingest and not args.ask: ingest() print("Ingested data") diff --git a/templates/ycombinator_graphrag/python-backend/prompts.yaml b/templates/ycombinator_graphrag/python-backend/prompts.yaml new file mode 100644 index 000000000..de652edc8 --- /dev/null +++ b/templates/ycombinator_graphrag/python-backend/prompts.yaml @@ -0,0 +1,282 @@ +graphrag_triplet_extraction_zero_shot: + template: > + -Goal- + Given a text document, identify all entities and their entity types from the text and all relationships among the identified entities. + Given the text, extract up to {max_knowledge_triples} entity-relation triplets. + -Steps- + 1. Identify all entities. For each identified entity, extract the following information: + - entity_name: Name of the entity, capitalized + - entity_type: Type of the entity. If the list below is not empty, only extract entities of the given types. If this list is empty, extract all entities. + {entity_types} + - entity_description: Comprehensive description of the entity's attributes and activities that is explicitly mentioned in the text. + Format each entity as ("entity"$$$$$$$$$$$$). + Output additional entities based on the entity_description if they contain more named entities. You will later use this to create relationships between them. + 2. From the entities identified in step 1, identify all pairs of (source_entity, target_entity) that are *clearly related* to each other. + For each pair of related entities, extract the following information: + - source_entity: name of the source entity, as identified in step 1 + - target_entity: name of the target entity, as identified in step 1 + - relation: relationship between source_entity and target_entity. If the list below is not empty, only extract relations of the given types. If this list is empty, extract all relations. + {relation_types} + - relationship_description: explanation as to why you think the source entity and the target entity are related to each other + - relationship_weight: a weight between 0 and 10 that represents the strength of the relationship between the source entity and the target entity + Format each relationship as ("relationship"$$$$$$$$$$$$$$$$$$$$) + 3. Make sure that each entity has at least one relationship. If you are not able to find a relationship from the pair of entities above, first create a new entity based on the source entity description and then create a relationship connecting them. + Again, make sure that each entity has at least one relationship. If you don't do this, you will be fired. + 3. When finished, output in the format in as given in the examples below. Do not repeat the same entity or relationship multiple times. + + Example 1: + If the list is empty, extract all entities and relations. + Entity_types: + Relation_types: + Text: + The Verdantis's Central Institution is scheduled to meet on Monday and Thursday, with the institution planning to release its latest policy decision on Thursday at 1:30 p.m. PDT, followed by a press conference where Central Institution Chair Martin Smith will take questions. Investors expect the Market Strategy Committee to hold its benchmark interest rate steady in a range of 3.5%-3.75%. + ###################### + Output: + ("entity"$$$$Central Institution$$$$Organization$$$$The central bank of Verdantis, responsible for monetary policy and setting interest rates) + ("entity"$$$$Martin Smith$$$$Person$$$$Chair of the Central Institution of Verdantis) + ("entity"$$$$Market Strategy Committee$$$$Organization$$$$Committee within the Central Institution that makes key decisions on monetary policy) + ("entity"$$$$Monday$$$$Time$$$$First meeting day of the Central Institution) + ("entity"$$$$Thursday$$$$Time$$$$Second meeting day of the Central Institution, when policy decisions are announced) + ("entity"$$$$1:30 PM PDT$$$$Time$$$$Scheduled time for the Central Institution's policy decision release on Thursday) + ("entity"$$$$Press Conference$$$$Event$$$$Media briefing held by the Central Institution following the policy decision release) + ("entity"$$$$Interest Rate$$$$Economic Concept$$$$Key monetary policy tool used by the Central Institution to influence the economy) + ("entity"$$$$3.5%-3.75%$$$$Economic Value$$$$Expected range for the benchmark interest rate) + ("relationship"$$$$Martin Smith$$$$Central Institution$$$$Chairs$$$$Martin Smith is the Chair of the Central Institution and will lead the press conference$$$$9) + ("relationship"$$$$Central Institution$$$$Press Conference$$$$Conducts$$$$The Central Institution conducts a press conference following its policy decision release$$$$9) + ("relationship"$$$$Market Strategy Committee$$$$Central Institution$$$$Part Of$$$$The Market Strategy Committee is a key decision-making body within the Central Institution$$$$9) + ("relationship"$$$$Market Strategy Committee$$$$Interest Rate$$$$Sets$$$$The Market Strategy Committee determines the benchmark interest rate$$$$9) + ("relationship"$$$$Central Institution$$$$Interest Rate$$$$Controls$$$$The Central Institution controls interest rates as part of its monetary policy$$$$9) + ("relationship"$$$$3.5%-3.75%$$$$Interest Rate$$$$Expected Range$$$$Investors anticipate the benchmark interest rate to remain within this range$$$$8) + ("relationship"$$$$Monday$$$$Central Institution$$$$Meeting Day$$$$The Central Institution holds its first meeting of the week on Monday$$$$7) + ("relationship"$$$$Thursday$$$$Central Institution$$$$Decision Day$$$$The Central Institution announces its policy decision on Thursday$$$$9) + ("relationship"$$$$1:30 PM PDT$$$$Central Institution$$$$Press Conference$$$$The policy decision release at 1:30 PM PDT is followed by the press conference$$$$8) + + ###################### + Example 2: + If the list is empty, extract all entities and relations. + Entity_types: Organization + Relation_types: Formerly Owned By + + Text: + TechGlobal's (TG) stock skyrocketed in its opening day on the Global Exchange Thursday. But IPO experts warn that the semiconductor corporation's debut on the public markets isn't indicative of how other newly listed companies may perform. + + TechGlobal, a formerly public company, was taken private by Vision Holdings in 2014. The well-established chip designer says it powers 85% of premium smartphones. + ###################### + Output: + ("entity"$$$$TECHGLOBAL$$$$Organization$$$$TechGlobal is a stock now listed on the Global Exchange which powers 85% of premium smartphones) + ("entity"$$$$VISION HOLDINGS$$$$Organization$$$$Vision Holdings is a firm that previously owned TechGlobal) + ("relationship"$$$$TECHGLOBAL$$$$VISION HOLDINGS$$$$Formerly Owned By$$$$Vision Holdings formerly owned TechGlobal from 2014 until present$$$$5) + + ###################### + Example 3: + If the list is empty, extract all entities and relations. + Entity_types: Organization,Geo,Person + Relation_types: "" + Text: + Five Aurelians jailed for 8 years in Firuzabad and widely regarded as hostages are on their way home to Aurelia. + + The swap orchestrated by Quintara was finalized when $8bn of Firuzi funds were transferred to financial institutions in Krohaara, the capital of Quintara. + + The exchange initiated in Firuzabad's capital, Tiruzia, led to the four men and one woman, who are also Firuzi nationals, boarding a chartered flight to Krohaara. + + They were welcomed by senior Aurelian officials and are now on their way to Aurelia's capital, Cashion. + + The Aurelians include 39-year-old businessman Samuel Namara, who has been held in Tiruzia's Alhamia Prison, as well as journalist Durke Bataglani, 59, and environmentalist Meggie Tazbah, 53, who also holds Bratinas nationality. + ###################### + Output: + ("entity"$$$$FIRUZABAD$$$$Geo$$$$Firuzabad held Aurelians as hostages) + ("entity"$$$$AURELIA$$$$Geo$$$$Country seeking to release hostages) + ("entity"$$$$QUINTARA$$$$Geo$$$$Country that negotiated a swap of money in exchange for hostages) + ("entity"$$$$TIRUZIA$$$$Geo$$$$Capital of Firuzabad where the Aurelians were being held) + ("entity"$$$$KROHAARA$$$$Geo$$$$Capital city in Quintara) + ("entity"$$$$CASHION$$$$Geo$$$$Capital city in Aurelia) + ("entity"$$$$SAMUEL NAMARA$$$$Person$$$$Aurelian who spent time in Tiruzia's Alhamia Prison) + ("entity"$$$$ALHAMIA PRISON$$$$Geo$$$$Prison in Tiruzia) + ("entity"$$$$DURKE BATAGLANI$$$$Person$$$$Aurelian journalist who was held hostage) + ("entity"$$$$MEGGIE TAZBAH$$$$Person$$$$Bratinas national and environmentalist who was held hostage) + ("relationship"$$$$FIRUZABAD$$$$AURELIA$$$$Negotiated Hostage Exchange$$$$Firuzabad negotiated a hostage exchange with Aurelia$$$$2) + ("relationship"$$$$QUINTARA$$$$AURELIA$$$$Negotiated Hostage Exchange$$$$Quintara brokered the hostage exchange between Firuzabad and Aurelia$$$$2) + ("relationship"$$$$QUINTARA$$$$FIRUZABAD$$$$Negotiated Hostage Exchange$$$$Quintara brokered the hostage exchange between Firuzabad and Aurelia$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$ALHAMIA PRISON$$$$Held At Alhamia Prison$$$$Samuel Namara was a prisoner at Alhamia prison$$$$8) + ("relationship"$$$$SAMUEL NAMARA$$$$MEGGIE TAZBAH$$$$Exchanged Hostages$$$$Samuel Namara and Meggie Tazbah were exchanged in the same hostage release$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$DURKE BATAGLANI$$$$Exchanged Hostages$$$$Samuel Namara and Durke Bataglani were exchanged in the same hostage release$$$$2) + ("relationship"$$$$MEGGIE TAZBAH$$$$DURKE BATAGLANI$$$$Exchanged Hostages$$$$Meggie Tazbah and Durke Bataglani were exchanged in the same hostage release$$$$2) + ("relationship"$$$$SAMUEL NAMARA$$$$FIRUZABAD$$$$Held As Hostage$$$$Samuel Namara was a hostage in Firuzabad$$$$2) + ("relationship"$$$$MEGGIE TAZBAH$$$$FIRUZABAD$$$$Held As Hostage$$$$Meggie Tazbah was a hostage in Firuzabad$$$$2) + ("relationship"$$$$DURKE BATAGLANI$$$$FIRUZABAD$$$$Held As Hostage$$$$Durke Bataglani was a hostage in Firuzabad$$$$2) + + -Real Data- + ###################### + If the list is empty, extract all entities and relations. + Entity_types: {entity_types} + Relation_types: {relation_types} + + Text: + {input} + ###################### + Output: + + input_types: + max_knowledge_triples: int + input: str + entity_types: list[str] + relation_types: list[str] + +graphrag_community_reports_prompt: + template: | + You are an AI assistant that helps a human analyst perform information discovery within the ecosystem of Y Combinator startups. Information discovery is the process of identifying and assessing relevant information associated with certain startups and their interconnections within the Y Combinator network. + + # Goal + Write a comprehensive report of a Y Combinator community, given a list of startups that belong to the community, their relationships, and optional associated claims. The report will be used to inform decision-makers about information related to the startups and their potential impact. The content of this report includes an overview of the community's key startups, their funding status, technical capabilities, reputation, and noteworthy claims. + + # Report Structure + + The report should include the following sections: + + - TITLE: The community's name that represents its key startups - the title should be short but specific. When possible, include representative startup names in the title. + - SUMMARY: An executive summary of the community's overall structure, how its startups are related to each other, and significant information associated with its startups. + - IMPACT SEVERITY RATING: A float score between 0-10 that represents the severity of IMPACT posed by startups within the community. IMPACT is the scored importance of the community. + - RATING EXPLANATION: Provide a single sentence explanation of the IMPACT severity rating. + - DETAILED FINDINGS: A list of 5-10 key insights about the community. Each finding should be a comprehensive statement grounded according to the grounding rules below. + + Return output as a well-formed JSON-formatted string with the following format: + ```json + {{ + "title": , + "summary": , + "rating": , + "rating_explanation": , + "findings": [ + "", + "", + "", + "", + "" + // Additional findings... + ] + }} + ``` + + # Example Input + ----------- + Text: + + Entities + + id,startup,description + 101,OpenAI,OpenAI is an AI research and deployment company. + 102,Stripe,Stripe is a technology company that builds economic infrastructure for the internet. + 103,Airbnb,Airbnb is an online marketplace for lodging and tourism experiences. + + Relationships + + id,source,target,description + 201,OpenAI,Stripe,OpenAI partnered with Stripe to integrate payment solutions. + 202,Stripe,Airbnb,Stripe provides payment processing services to Airbnb. + 203,Airbnb,OpenAI,Airbnb utilizes OpenAI's AI tools for customer service. + 204,Stripe,OpenAI,Stripe invested in OpenAI's latest funding round. + 205,Airbnb,Stripe,Airbnb and Stripe collaborate on expanding global payment options. + + Output: + {{ + "title": "OpenAI, Stripe, and Airbnb Community", + "summary": "The community comprises key startups like OpenAI, Stripe, and Airbnb, which are interconnected through strategic partnerships and investments. These relationships highlight a robust network focused on advancing AI technologies, payment infrastructure, and online marketplaces.", + "rating": 7.5, + "rating_explanation": "The impact severity rating is high due to the significant influence these startups have on technology, finance, and the global economy.", + "findings": [ + "OpenAI stands out as a leader in artificial intelligence research and deployment within the Y Combinator community. Its partnerships with companies like Stripe and Airbnb demonstrate its integral role in integrating AI solutions across various industries. OpenAI's influence is further amplified by its involvement in key projects that drive innovation and efficiency. [Data: Entities (101), Relationships (201, 203, 204, +more)]", + "Stripe serves as a critical financial infrastructure provider, facilitating payment processing for startups like Airbnb and partnering with OpenAI to enhance payment solutions. Its strategic investments and collaborations underscore its importance in the Y Combinator ecosystem, enabling seamless financial transactions and supporting startup growth. [Data: Entities (102), Relationships (201, 202, 204, 205, +more)]", + "Airbnb leverages OpenAI's artificial intelligence tools to enhance its customer service capabilities, showcasing the practical application of AI in improving user experience. This integration highlights Airbnb's commitment to innovation and efficiency, positioning it as a forward-thinking leader within the community. [Data: Entities (103), Relationships (203, 205, +more)]", + "Stripe's investment in OpenAI's latest funding round illustrates the strategic financial moves that drive growth and innovation within the Y Combinator community. Such investments not only strengthen partnerships but also foster an environment of collaboration and shared success among startups. [Data: Relationships (204)]", + "The collaboration between Airbnb and Stripe to expand global payment options demonstrates a commitment to scalability and accessibility in the Y Combinator ecosystem. This initiative is pivotal in enabling startups to reach a broader international market, thereby increasing their impact and revenue potential. [Data: Relationships (205)]" + ] + }} + + # Real Data + + Use the following text for your answer. Do not make anything up in your answer. + + Text: + {input_text} + + Output: + input_types: + input_text: str + +graphrag_map_system_prompt: + template : | + ---Role--- + You are a helpful assistant responding to questions about data in the tables provided. + ---Goal--- + Generate a response consisting of a list of key points that responds to the user's question, summarizing all relevant information in the input data tables. + You should use the data provided in the data tables below as the primary context for generating the response. + If you don't know the answer or if the input data tables do not contain sufficient information to provide an answer, just say so. Do not make anything up. + Each key point in the response should have the following element: + - Description: A comprehensive description of the point. + - Importance Score: An integer score between 0-100 that indicates how important the point is in answering the user's question. An 'I don't know' type of response should have a score of 0. + The response should be JSON formatted as follows: + {{ + "points": [ + {{"description": "Description of point 1 [Data: Reports (report ids)]", "score": score_value}}, + {{"description": "Description of point 2 [Data: Reports (report ids)]", "score": score_value}} + ] + }} + The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will". + Points supported by data should list the relevant reports as references as follows: + "This is an example sentence supported by data references [Data: Reports (report ids)]" + **Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. + For example: + "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 64, 46, 34, +more)]. He is also CEO of company X [Data: Reports (1, 3)]" + where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data report in the provided tables. + Do not include information where the supporting evidence for it is not provided. + ---Data tables--- + {context_data} + ---Input--- + {input} + input_types: + context_data: str + input: str + +graphrag_reduce_system_prompt: + template: | + ---Role--- + You are a helpful assistant responding to questions about a dataset by synthesizing perspectives from multiple analysts. + ---Goal--- + Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset. + Note that the analysts' reports provided below are ranked in the **descending order of importance**. + If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up. + The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format. + Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown. + The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will". + The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process. + **Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. + + For example: + "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]" + where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record. + Do not include information where the supporting evidence for it is not provided. + ---Target response length and format--- + {response_type} + ---Analyst Reports--- + {report_data} + ---Goal--- + Generate a response of the target length and format that responds to the user's question, summarize all the reports from multiple analysts who focused on different parts of the dataset. + Note that the analysts' reports provided below are ranked in the **descending order of importance**. + If you don't know the answer or if the provided reports do not contain sufficient information to provide an answer, just say so. Do not make anything up. + The final response should remove all irrelevant information from the analysts' reports and merge the cleaned information into a comprehensive answer that provides explanations of all the key points and implications appropriate for the response length and format. + The response shall preserve the original meaning and use of modal verbs such as "shall", "may" or "will". + The response should also preserve all the data references previously included in the analysts' reports, but do not mention the roles of multiple analysts in the analysis process. + **Do not list more than 5 record ids in a single reference**. Instead, list the top 5 most relevant record ids and add "+more" to indicate that there are more. + For example: + "Person X is the owner of Company Y and subject to many allegations of wrongdoing [Data: Reports (2, 7, 34, 46, 64, +more)]. He is also CEO of company X [Data: Reports (1, 3)]" + where 1, 2, 3, 7, 34, 46, and 64 represent the id (not the index) of the relevant data record. + Do not include information where the supporting evidence for it is not provided. + ---Target response length and format--- + {response_type} + Add sections and commentary to the response as appropriate for the length and format. Style the response in markdown. + -- Query -- + {input} + input_types: + response_type: str + report_data: str + input: str diff --git a/templates/ycombinator_graphrag/python-backend/yc_scraper.ipynb b/templates/ycombinator_graphrag/python-backend/yc_scraper.ipynb index 15af7589f..be6269ba5 100644 --- a/templates/ycombinator_graphrag/python-backend/yc_scraper.ipynb +++ b/templates/ycombinator_graphrag/python-backend/yc_scraper.ipynb @@ -125,6 +125,26 @@ "print(\"Dataset uploaded successfully to Hugging Face Hub!\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "ename": "", + "evalue": "", + "output_type": "error", + "traceback": [ + "\u001b[1;31mRunning cells with 'z (Python 3.12.6)' requires the ipykernel package.\n", + "\u001b[1;31mRun the following command to install 'ipykernel' into the Python environment. \n", + "\u001b[1;31mCommand: 'conda install -p /Users/shreyas/micromamba/envs/z ipykernel --update-deps --force-reinstall'" + ] + } + ], + "source": [ + "# V2" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/templates/ycombinator_graphrag/web-app/.eslintrc.json b/templates/ycombinator_graphrag/web-app/.eslintrc.json new file mode 100644 index 000000000..7d3ff4ebf --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/.eslintrc.json @@ -0,0 +1,4 @@ +{ + "ignorePatterns": ["public/data/**"], + "extends": ["next", "next/core-web-vitals"] +} diff --git a/templates/ycombinator_graphrag/web-app/.gitignore b/templates/ycombinator_graphrag/web-app/.gitignore new file mode 100644 index 000000000..fd3dbb571 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/.gitignore @@ -0,0 +1,36 @@ +# See https://help.github.com/articles/ignoring-files/ for more about ignoring files. + +# dependencies +/node_modules +/.pnp +.pnp.js +.yarn/install-state.gz + +# testing +/coverage + +# next.js +/.next/ +/out/ + +# production +/build + +# misc +.DS_Store +*.pem + +# debug +npm-debug.log* +yarn-debug.log* +yarn-error.log* + +# local env files +.env*.local + +# vercel +.vercel + +# typescript +*.tsbuildinfo +next-env.d.ts diff --git a/templates/ycombinator_graphrag/web-app/.prettierignore b/templates/ycombinator_graphrag/web-app/.prettierignore new file mode 100644 index 000000000..b8ec90b05 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/.prettierignore @@ -0,0 +1 @@ +public/data/** diff --git a/templates/ycombinator_graphrag/web-app/.prettierrc b/templates/ycombinator_graphrag/web-app/.prettierrc new file mode 100644 index 000000000..92f97e756 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/.prettierrc @@ -0,0 +1,6 @@ +{ + "semi": true, + "singleQuote": true, + "tabWidth": 2, + "trailingComma": "es5" +} diff --git a/templates/ycombinator_graphrag/web-app/README.md b/templates/ycombinator_graphrag/web-app/README.md new file mode 100644 index 000000000..a75ac5248 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/README.md @@ -0,0 +1,40 @@ +This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next-app`](https://github.com/vercel/next.js/tree/canary/packages/create-next-app). + +## Getting Started + +First, run the development server: + +```bash +npm run dev +# or +yarn dev +# or +pnpm dev +# or +bun dev +``` + +Open [http://localhost:3000](http://localhost:3000) with your browser to see the result. + +You can start editing the page by modifying `pages/index.tsx`. The page auto-updates as you edit the file. + +[API routes](https://nextjs.org/docs/api-routes/introduction) can be accessed on [http://localhost:3000/api/hello](http://localhost:3000/api/hello). This endpoint can be edited in `pages/api/hello.ts`. + +The `pages/api` directory is mapped to `/api/*`. Files in this directory are treated as [API routes](https://nextjs.org/docs/api-routes/introduction) instead of React pages. + +This project uses [`next/font`](https://nextjs.org/docs/basic-features/font-optimization) to automatically optimize and load Inter, a custom Google Font. + +## Learn More + +To learn more about Next.js, take a look at the following resources: + +- [Next.js Documentation](https://nextjs.org/docs) - learn about Next.js features and API. +- [Learn Next.js](https://nextjs.org/learn) - an interactive Next.js tutorial. + +You can check out [the Next.js GitHub repository](https://github.com/vercel/next.js/) - your feedback and contributions are welcome! + +## Deploy on Vercel + +The easiest way to deploy your Next.js app is to use the [Vercel Platform](https://vercel.com/new?utm_medium=default-template&filter=next.js&utm_source=create-next-app&utm_campaign=create-next-app-readme) from the creators of Next.js. + +Check out our [Next.js deployment documentation](https://nextjs.org/docs/deployment) for more details. diff --git a/templates/ycombinator_graphrag/web-app/components.json b/templates/ycombinator_graphrag/web-app/components.json new file mode 100644 index 000000000..66f6c0d6c --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/components.json @@ -0,0 +1,21 @@ +{ + "$schema": "https://ui.shadcn.com/schema.json", + "style": "new-york", + "rsc": false, + "tsx": true, + "tailwind": { + "config": "tailwind.config.ts", + "css": "styles/globals.css", + "baseColor": "neutral", + "cssVariables": true, + "prefix": "" + }, + "aliases": { + "components": "@/components", + "types": "@/types", + "utils": "@/lib/utils", + "ui": "@/components/ui", + "lib": "@/lib", + "hooks": "@/hooks" + } +} diff --git a/templates/ycombinator_graphrag/web-app/components/Answer.tsx b/templates/ycombinator_graphrag/web-app/components/Answer.tsx new file mode 100644 index 000000000..d5b6c5c7a --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/components/Answer.tsx @@ -0,0 +1,305 @@ +import React, { useState, useEffect } from 'react'; +import { FC } from 'react'; +import Markdown from 'react-markdown'; + +import { + Popover, + PopoverContent, + PopoverTrigger, +} from '@/components/ui/popover'; +import { Skeleton } from '@/components/ui/skeleton'; +import { Logo } from '@/components/Logo'; +import { + Accordion, + AccordionContent, + AccordionItem, + AccordionTrigger, +} from '@/components/ui/accordion'; +import { SearchResults } from '@/components/SearchResults'; +import { KGSearchResult } from '@/types'; + +interface Message { + role: 'system' | 'user' | 'assistant'; + content: string; + id?: string; + timestamp?: number; + isStreaming?: boolean; + sources?: Record; + searchPerformed?: boolean; +} + +interface Source { + id: string; + score: number; + metadata: { + title?: string; + text?: string; + documentid?: string; + snippet?: string; + }; +} + +const AnimatedEllipsis: FC = () => { + const [dots, setDots] = useState(''); + + useEffect(() => { + const interval = setInterval(() => { + setDots((prevDots) => (prevDots.length >= 3 ? '' : prevDots + '.')); + }, 200); + + return () => clearInterval(interval); + }, []); + + return ( + + {dots} + + ); +}; + +const SourceItem: FC<{ source: Source }> = ({ source }) => { + const { score, metadata } = source; + + return ( +
+

+ {metadata.title || 'Untitled'} (Similarity: {score.toFixed(3)}) +

+

+ {metadata.text || 'No content available'} +

+
+ ); +}; + +function formatMarkdownNewLines(markdown: string): string { + return markdown + .replace(/\[(\d+)]/g, '[$1]($1)') + .split(`"queries":`)[0] + .replace(/\\u[\dA-F]{4}/gi, (match: string) => + String.fromCharCode(parseInt(match.replace(/\\u/g, ''), 16)) + ); +} + +const parseVectorSearchSources = (sources: string | object): Source[] => { + if (typeof sources === 'string') { + try { + const cleanedSources = sources; + return JSON.parse(cleanedSources); + } catch (error) { + console.error('Failed to parse sources:', error); + return []; + } + } + return sources as Source[]; +}; + +const parseKGSearchResult = (sources: string | object): KGSearchResult[] => { + if (typeof sources === 'string') { + try { + const cleanedSources = sources; + return JSON.parse(cleanedSources); + } catch (error) { + console.error('Failed to parse sources:', error); + return []; + } + } + return sources as KGSearchResult[]; +}; + +interface KGSearchResultState { + entities: KGSearchResult[]; + communities: KGSearchResult[]; +} + +export const Answer: FC<{ + message: Message; + isStreaming: boolean; + isSearching: boolean; +}> = ({ message, isStreaming, isSearching }) => { + const [isOpen, setIsOpen] = useState(false); + const [parsedVectorSources, setParsedVectorSources] = useState([]); + const [parsedEntities, setParsedEntities] = useState([]); + const [parsedCommunities, setParsedCommunities] = useState( + [] + ); + useEffect(() => { + if (message.sources.vector) { + const parsed = parseVectorSearchSources(message.sources.vector); + setParsedVectorSources(parsed); + } + if (message.sources.kg) { + console.log('message.sources.kg = ', message.sources.kg); + let kgLocalResult: KGSearchResult[] = JSON.parse(message.sources.kg); + + const entitiesArray = kgLocalResult.filter( + (item: any) => item.result_type === 'entity' + ); + const communitiesArray = kgLocalResult.filter( + (item: any) => item.result_type === 'community' + ); + setParsedEntities(entitiesArray); + setParsedCommunities(communitiesArray); + // setParsedKGSearchResult(parsedKGSearchResult); + // debugger; + } + }, [message.sources]); + + const renderContent = () => { + const paragraphs = message.content.split('\n\n'); + return paragraphs.map((paragraph, index) => ( +

, + h2: (props) =>

, + h3: (props) =>

, + h4: (props) =>

, + h5: (props) =>

, + h6: (props) =>
, + strong: (props) => ( + + ), + p: ({ children }) => ( +

+ {children} + {isStreaming && index === paragraphs.length - 1 && ( + + )} +

+ ), + li: (props) =>
  • , + blockquote: (props) => ( +
    + ), + em: (props) => , + code: (props) => , + pre: (props) =>
    ,
    +          a: ({ href, ...props }) => {
    +            if (!href) return null;
    +            const source = parsedVectorSources[+href - 1];
    +            if (!source) return null;
    +            const metadata = source.metadata;
    +            return (
    +              
    +                
    +                  
    +                    
    +                      {href}
    +                    
    +                  
    +                  
    +                    
    + {metadata.title ? `Title: ${metadata.title}` : ''} + {metadata?.documentid + ? `, DocumentId: ${metadata.documentid.slice(0, 8)}` + : ''} +
    +
    +
    +
    + {metadata?.snippet ?? ''} +
    +
    + {source.text ?? ''} +
    +
    +
    +
    +
    +
    + ); + }, + }} + > + {formatMarkdownNewLines(paragraph)} + + )); + }; + return ( +
    + {parsedVectorSources.length > 0 || isSearching ? ( + setIsOpen(value === 'answer')} + > + + +
    + + + {isSearching ? ( + + Searching over sources... + + ) : ( + `View ${parsedVectorSources.length} Sources` + )} + +
    +
    + + {!isSearching && ( + + )} + +
    +
    + ) : message.searchPerformed ? ( +
    + + No sources found +
    + ) : null} + +
    + {message.content || isStreaming ? ( +
    + {message.content ? ( + renderContent() + ) : ( +
    + +
    + )} +
    + ) : ( +
    + + + + +
    + )} +
    +
    + ); +}; diff --git a/templates/ycombinator_graphrag/web-app/components/ChatWindow.tsx b/templates/ycombinator_graphrag/web-app/components/ChatWindow.tsx new file mode 100644 index 000000000..f9e481909 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/components/ChatWindow.tsx @@ -0,0 +1,303 @@ +import React, { FC, useEffect, useState, useRef, useCallback } from 'react'; +import { Alert, AlertDescription, AlertTitle } from '@/components/ui/alert'; +import { Info, X } from 'lucide-react'; + +import MessageBubble from '@/components/MessageBubble'; +import { Answer } from '@/components/Answer'; +import { DefaultQueries } from '@/components/DefaultQueries'; + +const FUNCTION_START_TOKEN = ''; +const FUNCTION_END_TOKEN = ''; +const LLM_START_TOKEN = ''; +const LLM_END_TOKEN = ''; + +interface Message { + role: 'system' | 'user' | 'assistant'; + content: string; + id: string; + timestamp: number; + isStreaming?: boolean; + sources?: Record; + searchPerformed?: boolean; +} + +interface ChatWindowProps { + query: string; + setQuery: (query: string) => void; + agentUrl: string; + messages: Message[]; + setMessages: React.Dispatch>; + isStreaming: boolean; + setIsStreaming: React.Dispatch>; +} + +export const ChatWindow: FC = ({ + query, + setQuery, + agentUrl, + messages, + setMessages, + isStreaming, + setIsStreaming, +}) => { + const [isSearching, setIsSearching] = useState(false); + const [error, setError] = useState(null); + const messagesEndRef = useRef(null); + const [showInfoAlert, setShowInfoAlert] = useState(true); + + const scrollToBottom = useCallback(() => { + messagesEndRef.current?.scrollIntoView({ behavior: 'smooth' }); + }, []); + + useEffect(() => { + if (messages.length > 0) { + scrollToBottom(); + } + }, [messages, scrollToBottom]); + + const updateLastMessage = useCallback( + ( + content?: string, + sources?: Record, + isStreaming?: boolean, + searchPerformed?: boolean + ) => { + setMessages((prevMessages) => { + const updatedMessages = [...prevMessages]; + const lastMessage = updatedMessages[updatedMessages.length - 1]; + if (lastMessage.role === 'assistant') { + return [ + ...updatedMessages.slice(0, -1), + { + ...lastMessage, + ...(content !== undefined && { content }), + ...(sources !== undefined && { sources }), + ...(isStreaming !== undefined && { isStreaming }), + ...(searchPerformed !== undefined && { searchPerformed }), + }, + ]; + } + return prevMessages; + }); + }, + [setMessages] + ); + + useEffect(() => { + if (!query || isStreaming) { + return; + } + + setShowInfoAlert(false); + + const parseStreaming = async () => { + setIsStreaming(true); + setIsSearching(true); + setError(null); + + const newUserMessage: Message = { + role: 'user', + content: query, + id: Date.now().toString(), + timestamp: Date.now(), + sources: {}, + }; + + const newAssistantMessage: Message = { + role: 'assistant', + content: '', + id: (Date.now() + 1).toString(), + timestamp: Date.now() + 1, + isStreaming: true, + sources: {}, + searchPerformed: false, + }; + + setMessages((prevMessages) => [ + ...prevMessages, + newUserMessage, + newAssistantMessage, + ]); + + let buffer = ''; + let inLLMResponse = false; + let fullContent = ''; + + try { + const response = await fetch('/api/agent', { + method: 'POST', + headers: { 'Content-Type': 'application/json' }, + body: JSON.stringify({ + messages: [...messages, newUserMessage], + apiUrl: agentUrl, + use_vector_search: true, + search_filters: {}, + search_limit: 20, + do_hybrid_search: false, + use_kg_search: true, + rag_generation_config: { + stream: true, + }, + }), + }); + + const reader = response.body!.getReader(); + const decoder = new TextDecoder(); + + while (true) { + const { done, value } = await reader.read(); + if (done) { + break; + } + + buffer += decoder.decode(value, { stream: true }); + + if (buffer.includes(FUNCTION_END_TOKEN)) { + const [results, rest] = buffer.split(FUNCTION_END_TOKEN); + + console.log('results = ', results); + const vectorSearchSources = results.includes('') + ? results.split('')[1].split('')[0] + : null; + + const kgSearchResult = results.includes('') + ? results.split('')[1].split('')[0] + : null; + + updateLastMessage( + undefined, + { vector: vectorSearchSources, kg: kgSearchResult }, + undefined, + true + ); + buffer = rest || ''; + setIsSearching(false); + } + + if (buffer.includes(LLM_START_TOKEN)) { + inLLMResponse = true; + buffer = buffer.split(LLM_START_TOKEN)[1] || ''; + } + + if (inLLMResponse) { + const endTokenIndex = buffer.indexOf(LLM_END_TOKEN); + let chunk = ''; + + if (endTokenIndex !== -1) { + chunk = buffer.slice(0, endTokenIndex); + buffer = buffer.slice(endTokenIndex + LLM_END_TOKEN.length); + inLLMResponse = false; + } else { + chunk = buffer; + buffer = ''; + } + + fullContent += chunk; + updateLastMessage(fullContent, undefined, true); + } + } + } catch (err: unknown) { + console.error('Error in streaming:', err); + setError(err instanceof Error ? err.message : String(err)); + } finally { + setIsStreaming(false); + setIsSearching(false); + updateLastMessage(fullContent, undefined, false); + setQuery(''); + } + }; + + parseStreaming(); + }, [ + query, + agentUrl, + setMessages, + setIsStreaming, + messages, + updateLastMessage, + isStreaming, + setQuery, + ]); + + return ( +
    + {/* Info Alert */} + {showInfoAlert && ( + +
    + + + Powered by R2R's GraphRAG — + + Learn how to implement your own GraphRAG here! + + +
    + + GraphRAG excels at answering complex questions that other methods of + search struggle with. By developing deep understanding of complex + datasets and their relationships, GraphRAG can provide more accurate + and informative answers to your users. +

    + Learn more about GraphRAG from   + + Microsoft's research + +   or from our blog post on   + + production ready GrapRAG + +

    + Here, we've connected to am R2R server and built a knowledge + graph over the profiles of the YC S24 companies. Feel free to ask + any questions you have about the companies, their founders, or + anything else you'd like to know! +
    + +
    + )} + + {/* Chat Messages */} +
    +
    + {messages.map((message, index) => ( + + {message.role === 'user' ? ( + + ) : ( + + )} + + ))} +
    +
    + {error &&
    Error: {error}
    } + {messages.length === 0 && } +
    +
    + ); +}; diff --git a/templates/ycombinator_graphrag/web-app/components/DefaultQueries.tsx b/templates/ycombinator_graphrag/web-app/components/DefaultQueries.tsx new file mode 100644 index 000000000..3abf4b94c --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/components/DefaultQueries.tsx @@ -0,0 +1,53 @@ +import { Lightbulb, FlaskConical, Flame, Earth } from 'lucide-react'; +import { FC } from 'react'; + +import { Logo } from '@/components/Logo'; +import { Alert, AlertDescription } from '@/components/ui/alert'; + +interface DefaultQueriesProps { + setQuery: (query: string) => void; +} + +export const DefaultQueries: FC = ({ setQuery }) => { + const defaultQueries = [ + { + query: 'What does the company Decisional AI do?', + icon: , + }, + { + query: + 'Which companies will most likely succeed and which ones will fail?', + icon: , + }, + { + query: 'Which companies are NOT building AI agents?', + icon: , + }, + { + query: "How can I get in touch with R2R's founders?", + icon: , + }, + ]; + + return ( +
    + +
    + {defaultQueries.map(({ query, icon }, index) => ( + = 2 ? 'hidden sm:flex' : '' + }`} + onClick={() => setQuery(query)} + > +
    {icon}
    + + {query} + +
    + ))} +
    +
    + ); +}; diff --git a/templates/ycombinator_graphrag/web-app/components/DocumentCard.tsx b/templates/ycombinator_graphrag/web-app/components/DocumentCard.tsx new file mode 100644 index 000000000..bf5df15f5 --- /dev/null +++ b/templates/ycombinator_graphrag/web-app/components/DocumentCard.tsx @@ -0,0 +1,59 @@ +import React, { useState } from 'react'; +import Modal from './Modal'; + +interface Document { + id: number; + name: string; + type: string; + url: string; +} + +interface DocumentCardProps { + document: Document; +} + +const DocumentCard: React.FC = ({ document }) => { + const [isPreviewOpen, setIsPreviewOpen] = useState(false); + + const renderPreview = () => { + switch (document.type) { + case 'pdf': + case 'txt': + case 'md': + case 'html': + return