SciPhi-AI · emrgnt-cmplxty · Sep 21, 2024 · Sep 21, 2024 · Sep 21, 2024
diff --git a/.gitignore b/.gitignore
@@ -8,6 +8,8 @@ py/workspace/
 uploads/
 env/
 **/__pycache__
+**/.mypy_cache
+**/.pytest_cache
 dump/*
 .next
 node_modules

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -23,3 +23,10 @@ repos:
         language: system
         types: [python]
         pass_filenames: false
+
+      - id: mypy
+        name: mypy
+        entry: bash -c 'cd py && poetry run mypy .'
+        language: system
+        types: [python]
+        pass_filenames: false
diff --git a/docs/api-reference/openapi.json b/docs/api-reference/openapi.json
diff --git a/docs/cookbooks/graphrag.mdx b/docs/cookbooks/graphrag.mdx
@@ -32,9 +32,9 @@ r2r serve
 [kg]
 provider = "neo4j"
 batch_size = 256
-kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
 
   [kg.kg_creation_settings]
+    kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     entity_types = [] # if empty, all entities are extracted
     relation_types = [] # if empty, all relations are extracted
     max_knowledge_triples = 100
@@ -47,15 +47,15 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     generation_config = { model = "gpt-4o-mini" } # and other params, model used for node description and graph clustering
     leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/
 
-  [kg.kg_search_config]
-    model = "gpt-4o-mini"
+  [kg.kg_search_settings]
+    generation_config = { model = "gpt-4o-mini" }
 ```
 </Accordion>
 </Tab>
 
 <Tab title="Local LLMs">
 ```bash
-r2r serve --config-name=local_llm_neo4j_kg
+r2r serve --config-name=local_llm
 ```
 
 ### Local LLM Setup (Optional)
@@ -72,7 +72,7 @@ When running with local RAG, you must have the Triplex model available locally.
   ```
 
 
-<Accordion icon="gear" title="Configuration: local_llm_neo4j_kg">
+<Accordion icon="gear" title="Configuration: local_llm">
 ``` toml
 [completion]
 provider = "litellm"
@@ -96,9 +96,9 @@ excluded_parsers = [ "gif", "jpeg", "jpg", "png", "svg", "mp3", "mp4" ]
 
 [kg]
 provider = "neo4j"
-kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
 
   [kg.kg_creation_settings]
+    kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     entity_types = [] # if empty, all entities are extracted
     relation_types = [] # if empty, all relations are extracted
     max_knowledge_triples = 100
@@ -111,8 +111,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     generation_config = { model = "ollama/llama3.1" } # and other params, model used for node description and graph clustering
     leiden_params = { max_levels = 10 } # more params here: https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/
 
-  [kg.kg_search_config]
-    model = "ollama/llama3.1"
+  [kg.kg_search_settings]
+    generation_config = { model = "ollama/llama3.1" }
 
 [database]
 provider = "postgres"

diff --git a/docs/documentation/configuration/knowledge-graph/enrichment.mdx b/docs/documentation/configuration/knowledge-graph/enrichment.mdx
@@ -11,9 +11,9 @@ You can configure knowledge graph enrichment in the R2R configuration file. To d
 [kg]
 provider = "neo4j"
 batch_size = 256
-kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
 
   [kg.kg_creation_settings]
+    kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     entity_types = [] # if empty, all entities are extracted
     relation_types = [] # if empty, all relations are extracted
     fragment_merge_count = 4 # number of fragments to merge into a single extraction
@@ -26,8 +26,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     generation_config = { model = "gpt-4o-mini" } # and other generation params
     leiden_params = { max_levels = 10 } # more params in graspologic/partition/leiden.py
 
-  [kg.kg_search_config]
-    model = "gpt-4o-mini"
+  [kg.kg_search_settings]
+    generation_config = { model = "gpt-4o-mini" }
 ```
 
 Next you can do GraphRAG with the knowledge graph. Find out more about GraphRAG in the [GraphRAG Guide](/cookbooks/graphrag).
diff --git a/docs/documentation/configuration/knowledge-graph/overview.mdx b/docs/documentation/configuration/knowledge-graph/overview.mdx
@@ -30,8 +30,8 @@ kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     generation_config = { model = "gpt-4o-mini" } # and other generation params below
     leiden_params = { max_levels = 10 } # more params in https://neo4j.com/docs/graph-data-science/current/algorithms/leiden/
 
-  [kg.kg_search_config]
-    model = "gpt-4o-mini"
+  [kg.kg_search_settings]
+    generation_config = { model = "gpt-4o-mini" }
 ```
 
 
@@ -47,7 +47,7 @@ Let's break down the knowledge graph configuration options:
 - `kg_enrichment_settings`: Similar configuration for the model used in knowledge graph enrichment.
   - `generation_config`: Configuration for the model used in knowledge graph enrichment.
   - `leiden_params`: Parameters for the Leiden algorithm.
-- `kg_search_config`: Similar configuration for the model used in knowledge graph search operations.
+- `kg_search_settings`: Similar configuration for the model used in knowledge graph search operations.
 
 ### Neo4j Configuration
 
@@ -88,7 +88,7 @@ The Neo4jKGProvider supports various operations:
 
 ### Customization
 
-You can customize the knowledge graph extraction and search processes by modifying the `kg_extraction_prompt` and adjusting the model configurations in `kg_extraction_config` and `kg_search_config`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search).
+You can customize the knowledge graph extraction and search processes by modifying the `kg_extraction_prompt` and adjusting the model configurations in `kg_extraction_settings` and `kg_search_settings`. Moreover, you can customize the LLM models used in various parts of the knowledge graph creation process. All of these options can be selected at runtime, with the only exception being the specified database provider. For more details, refer to the knowledge graph settings in the [search API](/api-reference/endpoint/search).
 
 By leveraging the knowledge graph capabilities, you can enhance R2R's understanding of document relationships and improve the quality of search and retrieval operations.
 

diff --git a/docs/documentation/configuration/postgres.mdx b/docs/documentation/configuration/postgres.mdx
@@ -35,7 +35,7 @@ password = "your_postgres_password"
 host = "your_postgres_host"
 port = "your_postgres_port"
 db_name = "your_database_name"
-your_project_name = "your_project_collection_name"
+your_project_name = "your_project_name"
 ```
 
 2. Alternatively, you can set the following environment variables:
@@ -46,7 +46,7 @@ export POSTGRES_PASSWORD=your_postgres_password
 export POSTGRES_HOST=your_postgres_host
 export POSTGRES_PORT=your_postgres_port
 export POSTGRES_DBNAME=your_database_name
-export POSTGRES_PROJECT_NAME=your_vector_collection_name
+export POSTGRES_PROJECT_NAME=your_project_name
 ```
 
 ## Advanced Postgres Features in R2R

diff --git a/docs/documentation/configuration/rag.mdx b/docs/documentation/configuration/rag.mdx
@@ -44,7 +44,7 @@ kg_search_settings = {
     "use_kg_search": True,
     "kg_search_type": "global",
     "kg_search_level": None,
-    "kg_search_generation_config": {
+    "generation_config": {
         "model": "gpt-4",
         "temperature": 0.1
     },

diff --git a/docs/documentation/configuration/retrieval/knowledge-graph.mdx b/docs/documentation/configuration/retrieval/knowledge-graph.mdx
@@ -9,7 +9,7 @@ kg_search_settings = {
     "use_kg_search": True,
     "kg_search_type": "global",
     "kg_search_level": None,
-    "kg_search_generation_config": {
+    "generation_config": {
         "model": "gpt-4",
         "temperature": 0.1
     },
@@ -28,7 +28,7 @@ response = client.search("query", kg_search_settings=kg_search_settings)
 1. `use_kg_search` (bool): Whether to use knowledge graph search
 2. `kg_search_type` (str): Type of knowledge graph search ('global' or 'local')
 3. `kg_search_level` (Optional[str]): Level of knowledge graph search
-4. `kg_search_generation_config` (Optional[GenerationConfig]): Configuration for knowledge graph search generation
+4. `generation_config` (Optional[GenerationConfig]): Configuration for knowledge graph search generation
 5. `entity_types` (list): Types of entities to search for
 6. `relationships` (list): Types of relationships to search for
 7. `max_community_description_length` (int): Maximum length of community descriptions (default: 65536)

diff --git a/docs/documentation/deep-dive/providers/database.mdx b/docs/documentation/deep-dive/providers/database.mdx
@@ -40,7 +40,7 @@ export POSTGRES_PASSWORD=your_postgres_password
 export POSTGRES_HOST=your_postgres_host
 export POSTGRES_PORT=your_postgres_port
 export POSTGRES_DBNAME=your_database_name
-export POSTGRES_PROJECT_NAME=your_collection_name
+export POSTGRES_PROJECT_NAME=your_project_name
 ```
 Environment variables take precedence over the config settings in case of conflicts. The R2R Docker includes configuration options that facilitate integration with a combined Postgres+pgvector database setup.
 

diff --git a/docs/documentation/deep-dive/providers/knowledge-graph.mdx b/docs/documentation/deep-dive/providers/knowledge-graph.mdx
@@ -29,13 +29,13 @@ These are located in the `r2r.toml` file, under the `[kg]` section.
 [kg]
 provider = "neo4j"
 batch_size = 256
-kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
 user = "your_neo4j_user"
 password = "your_neo4j_password"
 url = "your_neo4j_url"
 database = "your_neo4j_database"
 
   [kg.kg_creation_settings]
+    kg_extraction_prompt = "graphrag_triplet_extraction_zero_shot"
     entity_types = ["Person", "Organization", "Location"] # if empty, all entities are extracted
     relation_types = ["works at", "founded by", "invested in"] # if empty, all relations are extracted
     max_knowledge_triples = 100

diff --git a/docs/documentation/js-sdk/retrieval.mdx b/docs/documentation/js-sdk/retrieval.mdx
@@ -109,7 +109,7 @@ const searchResponse = await client.search("What was Uber's profit in 2020?");
     Level of knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
+    <ParamField path="generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
     Configuration for knowledge graph search generation.
     </ParamField>
 
@@ -331,7 +331,7 @@ const ragResponse = await client.rag("What was Uber's profit in 2020?");
     Level of knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
+    <ParamField path="generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
     Configuration for knowledge graph search generation.
     </ParamField>
 
@@ -620,7 +620,7 @@ Note that any of the customization seen in AI powered search and RAG documentati
   Whether to use knowledge graph search.
 </ParamField>
 
-<ParamField path="kg_search_generation_config" type="object">
+<ParamField path="generation_config" type="object">
   Optional configuration for knowledge graph search generation.
 </ParamField>
 

diff --git a/docs/documentation/python-sdk/retrieval.mdx b/docs/documentation/python-sdk/retrieval.mdx
@@ -129,7 +129,7 @@ search_response = client.search("What was Uber's profit in 2020?")
     Level of knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
+    <ParamField path="generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
     Configuration for knowledge graph search generation.
     </ParamField>
 
@@ -206,7 +206,7 @@ kg_search_response = client.search(
       "use_kg_search": True,
       "kg_search_type": "local",
       "kg_search_level": "0",
-      "kg_search_generation_config": {
+      "generation_config": {
           "model": "gpt-4o-mini",
           "temperature": 0.7,
       },
@@ -393,7 +393,7 @@ rag_response = client.rag("What was Uber's profit in 2020?")
     Level of knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
+    <ParamField path="generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
     Configuration for knowledge graph search generation.
     </ParamField>
 
@@ -703,7 +703,7 @@ Note that any of the customization seen in AI powered search and RAG documentati
     Level of knowledge graph search.
     </ParamField>
 
-    <ParamField path="kg_search_generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
+    <ParamField path="generation_config" type="Optional[GenerationConfig]" default="GenerationConfig()">
     Configuration for knowledge graph search generation.
     </ParamField>
 

diff --git a/js/sdk/package-lock.json b/js/sdk/package-lock.json
diff --git a/js/sdk/package.json b/js/sdk/package.json
@@ -1,6 +1,6 @@
 {
   "name": "r2r-js",
-  "version": "0.3.2",
+  "version": "0.3.3",
   "description": "",
   "main": "dist/index.js",
   "browser": "dist/index.browser.js",

diff --git a/js/sdk/src/models.tsx b/js/sdk/src/models.tsx
@@ -40,6 +40,7 @@ export interface VectorSearchSettings {
   use_hybrid_search?: boolean;
   filters?: Record<string, any>;
   search_limit?: number;
+  offset?: number;
   selected_collection_ids?: string[];
   index_measure: IndexMeasure;
   include_values?: boolean;
@@ -54,7 +55,7 @@ export interface KGSearchSettings {
   use_kg_search?: boolean;
   kg_search_type?: "global" | "local";
   kg_search_level?: number | null;
-  kg_search_generation_config?: GenerationConfig;
+  generation_config?: GenerationConfig;
   entity_types?: any[];
   relationships?: any[];
   max_community_description_length?: number;