Skip to content

Commit

Permalink
chore: move imports to top of files (#850)
Browse files Browse the repository at this point in the history
* chore: move import to the top of create_app.py

* refactor: move imports to top of SourceDocument.py

* chore: move Imports to top of Strategies.py

* chore: move imports to top of Strategies.py and remove circular dependency

* chore: split OrchestrationStrategy symbol to it's own file

* chore: move imports to top of Strategies.py
  • Loading branch information
liammoat authored May 8, 2024
1 parent e41a78a commit 78644c4
Show file tree
Hide file tree
Showing 18 changed files with 68 additions and 81 deletions.
3 changes: 1 addition & 2 deletions code/backend/batch/utilities/common/SourceDocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import json
from urllib.parse import urlparse, quote
from ..helpers.AzureBlobStorageClient import AzureBlobStorageClient
from langchain.docstore.document import Document


class SourceDocument:
Expand Down Expand Up @@ -80,8 +81,6 @@ def from_metadata(
)

def convert_to_langchain_document(self):
from langchain.docstore.document import Document

return Document(
page_content=self.content,
metadata={
Expand Down
25 changes: 25 additions & 0 deletions code/backend/batch/utilities/document_chunking/ChunkingStrategy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
from enum import Enum


class ChunkingStrategy(Enum):
LAYOUT = "layout"
PAGE = "page"
FIXED_SIZE_OVERLAP = "fixed_size_overlap"
PARAGRAPH = "paragraph"


class ChunkingSettings:
def __init__(self, chunking: dict):
self.chunking_strategy = ChunkingStrategy(chunking["strategy"])
self.chunk_size = chunking["size"]
self.chunk_overlap = chunking["overlap"]

def __eq__(self, other: object) -> bool:
if isinstance(self, other.__class__):
return (
self.chunking_strategy == other.chunking_strategy
and self.chunk_size == other.chunk_size
and self.chunk_overlap == other.chunk_overlap
)
else:
return False
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List
from abc import ABC, abstractmethod
from ..common.SourceDocument import SourceDocument
from .Strategies import ChunkingSettings
from .ChunkingStrategy import ChunkingSettings


class DocumentChunkingBase(ABC):
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List
from .DocumentChunkingBase import DocumentChunkingBase
from langchain.text_splitter import TokenTextSplitter
from .Strategies import ChunkingSettings
from .ChunkingStrategy import ChunkingSettings
from ..common.SourceDocument import SourceDocument


Expand Down
2 changes: 1 addition & 1 deletion code/backend/batch/utilities/document_chunking/Layout.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List
from .DocumentChunkingBase import DocumentChunkingBase
from langchain.text_splitter import MarkdownTextSplitter
from .Strategies import ChunkingSettings
from .ChunkingStrategy import ChunkingSettings
from ..common.SourceDocument import SourceDocument


Expand Down
2 changes: 1 addition & 1 deletion code/backend/batch/utilities/document_chunking/Page.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from typing import List
from .DocumentChunkingBase import DocumentChunkingBase
from langchain.text_splitter import MarkdownTextSplitter
from .Strategies import ChunkingSettings
from .ChunkingStrategy import ChunkingSettings
from ..common.SourceDocument import SourceDocument


Expand Down
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from typing import List
from .DocumentChunkingBase import DocumentChunkingBase
from .Strategies import ChunkingSettings
from .ChunkingStrategy import ChunkingSettings
from ..common.SourceDocument import SourceDocument


Expand Down
38 changes: 5 additions & 33 deletions code/backend/batch/utilities/document_chunking/Strategies.py
Original file line number Diff line number Diff line change
@@ -1,46 +1,18 @@
from enum import Enum


class ChunkingStrategy(Enum):
LAYOUT = "layout"
PAGE = "page"
FIXED_SIZE_OVERLAP = "fixed_size_overlap"
PARAGRAPH = "paragraph"
from .ChunkingStrategy import ChunkingStrategy
from .Layout import LayoutDocumentChunking
from .Page import PageDocumentChunking
from .FixedSizeOverlap import FixedSizeOverlapDocumentChunking
from .Paragraph import ParagraphDocumentChunking


def get_document_chunker(chunking_strategy: str):
if chunking_strategy == ChunkingStrategy.LAYOUT.value:
from .Layout import LayoutDocumentChunking

return LayoutDocumentChunking()
elif chunking_strategy == ChunkingStrategy.PAGE.value:
from .Page import PageDocumentChunking

return PageDocumentChunking()
elif chunking_strategy == ChunkingStrategy.FIXED_SIZE_OVERLAP.value:
from .FixedSizeOverlap import FixedSizeOverlapDocumentChunking

return FixedSizeOverlapDocumentChunking()
elif chunking_strategy == ChunkingStrategy.PARAGRAPH.value:
from .Paragraph import ParagraphDocumentChunking

return ParagraphDocumentChunking()
else:
raise Exception(f"Unknown chunking strategy: {chunking_strategy}")


class ChunkingSettings:
def __init__(self, chunking: dict):
self.chunking_strategy = ChunkingStrategy(chunking["strategy"])
self.chunk_size = chunking["size"]
self.chunk_overlap = chunking["overlap"]

def __eq__(self, other: object) -> bool:
if isinstance(self, other.__class__):
return (
self.chunking_strategy == other.chunking_strategy
and self.chunk_size == other.chunk_size
and self.chunk_overlap == other.chunk_overlap
)
else:
return False
12 changes: 4 additions & 8 deletions code/backend/batch/utilities/document_loading/Strategies.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,8 @@
from enum import Enum
from .Layout import LayoutDocumentLoading
from .Read import ReadDocumentLoading
from .Web import WebDocumentLoading
from .WordDocument import WordDocumentLoading


class LoadingStrategy(Enum):
Expand All @@ -10,20 +14,12 @@ class LoadingStrategy(Enum):

def get_document_loader(loader_strategy: str):
if loader_strategy == LoadingStrategy.LAYOUT.value:
from .Layout import LayoutDocumentLoading

return LayoutDocumentLoading()
elif loader_strategy == LoadingStrategy.READ.value:
from .Read import ReadDocumentLoading

return ReadDocumentLoading()
elif loader_strategy == LoadingStrategy.WEB.value:
from .Web import WebDocumentLoading

return WebDocumentLoading()
elif loader_strategy == LoadingStrategy.DOCX.value:
from .WordDocument import WordDocumentLoading

return WordDocumentLoading()
else:
raise Exception(f"Unknown loader strategy: {loader_strategy}")
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
from typing import List
from langchain.docstore.document import Document
from ..document_chunking.Strategies import (
get_document_chunker,
ChunkingSettings,
ChunkingStrategy,
)

from ..document_chunking.ChunkingStrategy import ChunkingSettings, ChunkingStrategy
from ..document_chunking.Strategies import get_document_chunker

__all__ = ["ChunkingStrategy"]

Expand Down
4 changes: 3 additions & 1 deletion code/backend/batch/utilities/helpers/OrchestratorHelper.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import List

from ..orchestrator.OrchestrationStrategy import OrchestrationStrategy
from ..orchestrator import OrchestrationSettings
from ..orchestrator.Strategies import get_orchestrator, OrchestrationStrategy
from ..orchestrator.Strategies import get_orchestrator

__all__ = ["OrchestrationStrategy"]

Expand Down
9 changes: 4 additions & 5 deletions code/backend/batch/utilities/helpers/config/ConfigHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,13 @@
import json
import logging
from string import Template

from ..AzureBlobStorageClient import AzureBlobStorageClient
from ...document_chunking.Strategies import ChunkingSettings, ChunkingStrategy
from ...document_chunking.ChunkingStrategy import ChunkingStrategy, ChunkingSettings
from ...document_loading import LoadingSettings, LoadingStrategy
from .EmbeddingConfig import EmbeddingConfig
from ..OrchestratorHelper import (
OrchestrationSettings,
OrchestrationStrategy,
)
from ...orchestrator.OrchestrationStrategy import OrchestrationStrategy
from ...orchestrator import OrchestrationSettings
from ..EnvHelper import EnvHelper

CONFIG_CONTAINER_NAME = "config"
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from enum import Enum


class OrchestrationStrategy(Enum):
OPENAI_FUNCTION = "openai_function"
LANGCHAIN = "langchain"
SEMANTIC_KERNEL = "semantic_kernel"
17 changes: 4 additions & 13 deletions code/backend/batch/utilities/orchestrator/Strategies.py
Original file line number Diff line number Diff line change
@@ -1,24 +1,15 @@
from enum import Enum


class OrchestrationStrategy(Enum):
OPENAI_FUNCTION = "openai_function"
LANGCHAIN = "langchain"
SEMANTIC_KERNEL = "semantic_kernel"
from .OrchestrationStrategy import OrchestrationStrategy
from .OpenAIFunctions import OpenAIFunctionsOrchestrator
from .LangChainAgent import LangChainAgent
from .SemanticKernel import SemanticKernelOrchestrator


def get_orchestrator(orchestration_strategy: str):
if orchestration_strategy == OrchestrationStrategy.OPENAI_FUNCTION.value:
from .OpenAIFunctions import OpenAIFunctionsOrchestrator

return OpenAIFunctionsOrchestrator()
elif orchestration_strategy == OrchestrationStrategy.LANGCHAIN.value:
from .LangChainAgent import LangChainAgent

return LangChainAgent()
elif orchestration_strategy == OrchestrationStrategy.SEMANTIC_KERNEL.value:
from .SemanticKernel import SemanticKernelOrchestrator

return SemanticKernelOrchestrator()
else:
raise Exception(f"Unknown orchestration strategy: {orchestration_strategy}")
2 changes: 1 addition & 1 deletion code/backend/batch/utilities/orchestrator/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
from typing import List
import os.path
import pkgutil
from .Strategies import OrchestrationStrategy
from .OrchestrationStrategy import OrchestrationStrategy


class OrchestrationSettings:
Expand Down
6 changes: 2 additions & 4 deletions code/create_app.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import sys
import functools
from backend.batch.utilities.helpers.EnvHelper import EnvHelper
from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator
from backend.batch.utilities.helpers.config.ConfigHelper import ConfigHelper
from azure.mgmt.cognitiveservices import CognitiveServicesManagementClient
from azure.identity import DefaultAzureCredential

Expand Down Expand Up @@ -216,14 +218,10 @@ def stream_without_data(response):


def get_message_orchestrator():
from backend.batch.utilities.helpers.OrchestratorHelper import Orchestrator

return Orchestrator()


def get_orchestrator_config():
from backend.batch.utilities.helpers.config.ConfigHelper import ConfigHelper

return ConfigHelper.get_active_config_or_default().orchestrator


Expand Down
2 changes: 1 addition & 1 deletion code/tests/utilities/helpers/test_ConfigHelper.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from unittest.mock import patch, MagicMock
from backend.batch.utilities.helpers.config.ConfigHelper import ConfigHelper, Config
from backend.batch.utilities.helpers.config.EmbeddingConfig import EmbeddingConfig
from backend.batch.utilities.document_chunking.Strategies import ChunkingSettings
from backend.batch.utilities.document_chunking.ChunkingStrategy import ChunkingSettings
from backend.batch.utilities.document_loading import LoadingSettings


Expand Down
6 changes: 3 additions & 3 deletions code/tests/utilities/helpers/test_DocumentChunkingHelper.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
from backend.batch.utilities.common.SourceDocument import SourceDocument
from backend.batch.utilities.helpers.DocumentChunkingHelper import (
DocumentChunking,
ChunkingSettings,
from backend.batch.utilities.helpers.DocumentChunkingHelper import DocumentChunking
from backend.batch.utilities.document_chunking.ChunkingStrategy import (
ChunkingStrategy,
ChunkingSettings,
)

# Create a sample document
Expand Down

0 comments on commit 78644c4

Please sign in to comment.