Skip to content

Commit

Permalink
Merge pull request #30 from mad-cat-lon/autopep8-patches
Browse files Browse the repository at this point in the history
Fixes by autopep8 action
  • Loading branch information
mad-cat-lon authored Jul 20, 2024
2 parents 60d699f + 82c7ed9 commit bbef6ae
Show file tree
Hide file tree
Showing 2 changed files with 15 additions and 16 deletions.
12 changes: 6 additions & 6 deletions core/prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@

PROMPT = """
<|system|>
You are an expert lawyer analyzing terms of service agreements. Given a statement about the service and 4 pieces of text extracted from its documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false.
Here are some examples:
You are an expert lawyer analyzing terms of service agreements. Given a statement about the service and 4 pieces of text extracted from its documents, pick the number of the text that directly answers the query in its entirety. Output a valid JSON object containing the choice of text and concise reasoning. If none of the texts can explicitly answer the statement, return 0. If there is a text that answers the question, set the "answer" field to true. In all other cases, set it to false.
Here are some examples:
Given the statement "You sign away all moral rights", which of the following texts, if any, answer it fully?
Expand All @@ -21,7 +21,7 @@
"You will not license, sell, or transfer your Account without our prior written approval."
```
4)
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
```"By submitting Your Content to the Services, you represent and warrant that you have all rights, power, and authority necessary to grant the rights to Your Content contained within these Terms. Because you alone are responsible for Your
Content, you may expose yourself to liability if you post or share Content without all necessary rights."
```
{{
Expand All @@ -30,7 +30,7 @@
"answer": true
}}
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
Given the statement 'The cookies used only collect anonymous, aggregated data that cannot be linked to a unique identity', which text answers it fully?
1)
```
personalized, unique and relevant offering, as this is why users come to the
Expand Down Expand Up @@ -104,12 +104,13 @@

n_results = 4


class RAGQueryPromptTemplate(StringPromptTemplate, BaseModel):
"""
Custom prompt template that takes in the query (a TOSDR case like "This service can read your messages")
and formats the prompt template to provide the query and the 4 texts returned from the vector store
"""

def format(self, **kwargs) -> str:
prompt = PROMPT.format(
query=kwargs["query"],
Expand All @@ -119,4 +120,3 @@ def format(self, **kwargs) -> str:
result4=kwargs["results"][3],
)
return prompt

19 changes: 9 additions & 10 deletions core/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
load_dotenv()
print("Setting up vector store...")
# Handling vector store
# Initialize persistent client and collection
# Initialize persistent client and collection
embedding_function = SentenceTransformerEmbeddings(
model_name="all-MiniLM-L6-v2"
)
Expand Down Expand Up @@ -133,15 +133,15 @@ async def add_src_document(src_doc: SourceDocument):
{src_doc.service} already exists in the database"
}
)

# Create Langchain Document object from our request
original_doc = Document(
page_content=src_doc.text,
metadata={
"service": src_doc.service,
"url": src_doc.url,
"name": src_doc.name
}
}
)
# Turn HTML of page into markdown
html2text = Html2TextTransformer()
Expand All @@ -157,7 +157,7 @@ async def add_src_document(src_doc: SourceDocument):
headers_to_split_on=headers_to_split_on
)
split_by_headers = md_header_splitter.split_text(md_doc.page_content)

# Go through each markdown chunk and recursively split
recursive_char_splitter = RecursiveCharacterTextSplitter(
chunk_size=500,
Expand Down Expand Up @@ -188,7 +188,7 @@ async def scrape_raw_document_from_url(browser, url, service):
html = await page.content()
# Only get the domain without subdomain to avoid cases
# where the service would be "github.com" but source doc links
# are in "docs.github.com"
# are in "docs.github.com"
name = await page.title()
src_doc = SourceDocument(
service=service,
Expand Down Expand Up @@ -248,7 +248,7 @@ async def make_query(query: LLMQuery):
# print(query_response)
if len(query_response) < 4:
result["error"] = 0
extension_response["results"].append(result)
extension_response["results"].append(result)
continue
# For each returned text from the vector store, insert into prompt,
# send to model and parse response
Expand All @@ -265,9 +265,9 @@ async def make_query(query: LLMQuery):
query=q["text"],
results=[doc.page_content for doc in query_response]
)
print("="*100)
print("=" * 100)
print(prompt)
print("="*100)
print("=" * 100)

llm_response = llm(prompt)
print(llm_response)
Expand All @@ -288,11 +288,10 @@ async def make_query(query: LLMQuery):
if source_text:
result["error"] = None
else:
# Model chose 0
# Model chose 0
result["error"] = 1
except json.JSONDecodeError:
print("Error decoding response from model")
result["error"] = 2
extension_response["results"].append(result)
return extension_response

0 comments on commit bbef6ae

Please sign in to comment.