-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathorch.py
108 lines (79 loc) · 2.61 KB
/
orch.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
from duckduckgo_search import DDGS
import requests
from bs4 import BeautifulSoup
import html2text
from llama_index.core.tools import FunctionTool
#from serp_api import search
from image_generator import generate_image
from llama_index.tools.code_interpreter import CodeInterpreterToolSpec
from llama_index.tools.arxiv import ArxivToolSpec
from llama_index.core.tools.ondemand_loader_tool import OnDemandLoaderTool
from llama_index.readers.wikipedia import WikipediaReader
from llama_index.agent.openai import OpenAIAgent
from llama_index.tools.exa import ExaToolSpec
from copy import copy
import os
exa_tool = ExaToolSpec(
api_key = os.getenv("EXA_API_KEY"),
)
code_spec = CodeInterpreterToolSpec()
# Initialize DuckDuckGo Search
ddgs = DDGS()
reader = WikipediaReader()
def html_to_markdown(html_content: str) -> str:
"""
Convert HTML content to Markdown.
Args:
html_content (str): HTML content to convert.
Returns:
str: Converted Markdown text.
"""
# Parse HTML content using BeautifulSoup
soup = BeautifulSoup(html_content, 'html.parser')
# Remove JavaScript code
for script in soup(['script', 'style']):
script.extract()
# Get cleaned HTML content
cleaned_html = str(soup)
# Convert HTML to Markdown using html2text
markdown_text = html2text.html2text(cleaned_html)
return markdown_text
def search(query: str) -> list:
"""
Perform a search engine query and return the results.
Args:
query (str): Search query.
Returns:
list: List of search results.
"""
results = list(ddgs.text(query, max_results=10))
return results
def crawl_site(link: str) -> str:
"""
Crawl a website, retrieve its HTML content, and convert it to Markdown.
Args:
link (str): URL of the website to crawl.
Returns:
str: Converted Markdown text.
"""
# Make a GET request to the website
response = requests.get(link)
# Convert HTML content to Markdown
markdown_text = html_to_markdown(response.text)
return markdown_text
def get_tools():
tools = [
#FunctionTool.from_defaults(fn = search),
FunctionTool.from_defaults(fn = crawl_site),
FunctionTool.from_defaults(fn = generate_image),
OnDemandLoaderTool.from_defaults(
reader,
name="WikipediaTool",
description="A tool for loading and querying articles from Wikipedia",
)
]
arxiv_codespec = ArxivToolSpec()
#tools += code_spec.to_tool_list()
tools += arxiv_codespec.to_tool_list()
tools += exa_tool.to_tool_list()
return copy(tools)