diff --git a/docs/source/safety_and_evaluation.rst b/docs/source/safety_and_evaluation.rst index 8ef89d3..a5ef50c 100644 --- a/docs/source/safety_and_evaluation.rst +++ b/docs/source/safety_and_evaluation.rst @@ -36,7 +36,7 @@ Synthetic Dataset Generation The evaluation utilizes a synthetic dataset of 1000 queries, carefully structured to test different aspects of the system: - **Regular Situations (80%)**: Divided equally among three levels of detail: - + - Low: 1-2 sentences, brief queries with minimal context - Medium: 2-3 sentences with moderate background information - High: Detailed paragraph with extensive context @@ -165,7 +165,7 @@ Performance Metrics RAGAS Metrics By Category ^^^^^^^^^^^^^^^^^^^^^^^^^^ -Note: These metrics were obtained using a synthetic dataset specifically generated from services in the Greater Toronto Area (GTA). +Note: These metrics were obtained using a synthetic dataset specifically generated from services in the Greater Toronto Area (GTA). The RAG system evaluated used specialized prompts that differ marginally from the current system. .. list-table:: @@ -279,4 +279,4 @@ To enable re-ranking in your API calls, simply set the `rerank` parameter to `tr "longitude": -79.3832, "radius": 5000, "rerank": true - } \ No newline at end of file + } diff --git a/eval/collect_rag_outputs.py b/eval/collect_rag_outputs.py index c8d8919..611e662 100644 --- a/eval/collect_rag_outputs.py +++ b/eval/collect_rag_outputs.py @@ -2,7 +2,7 @@ import asyncio import json import logging -from typing import Dict, List, Any +from typing import Dict, Any import aiohttp from tqdm.asyncio import tqdm_asyncio @@ -27,7 +27,8 @@ async def fetch_recommendation( "query": query["query"], "answer": result["message"], "context": [ - str(service["id"]) for service in (result.get("services", []) or []) + str(service["id"]) + for service in (result.get("services", []) or []) ], "ground_truth": query["context"], } @@ -39,7 +40,9 @@ async def fetch_recommendation( return None -async def process_samples(samples_file: str, output_file: str, batch_size: int = 5) -> None: +async def process_samples( + samples_file: str, output_file: str, batch_size: int = 5 +) -> None: """Process samples in batches and save results.""" # Load samples with open(samples_file, "r") as f: @@ -90,4 +93,4 @@ def main() -> None: if __name__ == "__main__": - main() \ No newline at end of file + main()