Handled JSON format structure

This commit is contained in:
Aparup Ganguly 2025-02-03 00:39:00 +05:30
parent 61b989cc64
commit d5d3df9d10

View File

@ -37,7 +37,7 @@ def find_relevant_page_via_map(objective, url, app, client):
print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}") print(f"{Colors.YELLOW}Analyzing objective to determine optimal search parameter...{Colors.RESET}")
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="o1-preview", model="o3-mini",
messages=[ messages=[
{ {
"role": "user", "role": "user",
@ -56,9 +56,28 @@ def find_relevant_page_via_map(objective, url, app, client):
print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}") print(f"{Colors.YELLOW}Mapping website using the identified search parameter...{Colors.RESET}")
map_website = app.map_url(url, params={"search": map_search_parameter}) map_website = app.map_url(url, params={"search": map_search_parameter})
# Debug print to see the response structure
print(f"{Colors.MAGENTA}Debug - Map response structure: {json.dumps(map_website, indent=2)}{Colors.RESET}")
print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}") print(f"{Colors.GREEN}Website mapping completed successfully.{Colors.RESET}")
print(f"{Colors.GREEN}Located {len(map_website)} relevant links.{Colors.RESET}")
return map_website # Handle the response based on its structure
if isinstance(map_website, dict):
# Assuming the links are in a 'urls' or similar key
links = map_website.get('urls', []) or map_website.get('links', [])
elif isinstance(map_website, str):
try:
parsed = json.loads(map_website)
links = parsed.get('urls', []) or parsed.get('links', [])
except json.JSONDecodeError:
links = []
else:
links = map_website if isinstance(map_website, list) else []
print(f"{Colors.GREEN}Located {len(links)} relevant links.{Colors.RESET}")
return links
except Exception as e: except Exception as e:
print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}") print(f"{Colors.RED}Error encountered during relevant page identification: {str(e)}{Colors.RESET}")
return None return None
@ -67,7 +86,11 @@ def find_relevant_page_via_map(objective, url, app, client):
def find_objective_in_top_pages(map_website, objective, app, client): def find_objective_in_top_pages(map_website, objective, app, client):
try: try:
# Get top 3 links from the map result # Get top 3 links from the map result
top_links = map_website[:3] if isinstance(map_website, list) else [] if not map_website:
print(f"{Colors.RED}No links found to analyze.{Colors.RESET}")
return None
top_links = map_website[:3]
print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}") print(f"{Colors.CYAN}Proceeding to analyze top {len(top_links)} links: {top_links}{Colors.RESET}")
for link in top_links: for link in top_links:
@ -93,18 +116,17 @@ def find_objective_in_top_pages(map_website, objective, app, client):
""" """
completion = client.chat.completions.create( completion = client.chat.completions.create(
model="o3-mini", model="o3-mini",
reasoning_effort="medium", messages=[
messages=[ {
{ "role": "user",
"role": "user", "content": [
"content": [ {
{ "type": "text",
"type": "text", "text": check_prompt
"text": check_prompt }
} ]
] }
}
] ]
) )
@ -129,10 +151,10 @@ def find_objective_in_top_pages(map_website, objective, app, client):
# Main function to execute the process # Main function to execute the process
def main(): def main():
# Get user input # Get user input
url = input(f"{Colors.BLUE}Enter the website to crawl using o3-mini : {Colors.RESET}") url = input(f"{Colors.BLUE}Enter the website to crawl: {Colors.RESET}")
objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}") objective = input(f"{Colors.BLUE}Enter your objective: {Colors.RESET}")
print(f"{Colors.YELLOW}Initiating web crawling process with o3 mini...{Colors.RESET}") print(f"{Colors.YELLOW}Initiating web crawling process...{Colors.RESET}")
# Find the relevant page # Find the relevant page
map_website = find_relevant_page_via_map(objective, url, app, client) map_website = find_relevant_page_via_map(objective, url, app, client)