Skip to content

Commit 7f55ed8

Browse files
Merge pull request #22 from guy-hartstein/feat/crawl
Feat/crawl
2 parents cec8d6c + 83bc3f0 commit 7f55ed8

File tree

4 files changed

+25
-103
lines changed

4 files changed

+25
-103
lines changed

.env.example

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
TAVILY_API_KEY=
2+
OPENAI_API_KEY=
3+
GEMINI_API_KEY=

backend/nodes/grounding.py

Lines changed: 17 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -51,8 +51,14 @@ async def initial_search(self, state: InputState) -> ResearchState:
5151
)
5252

5353
try:
54-
logger.info("Initiating Tavily extraction")
55-
site_extraction = await self.tavily_client.extract(url, extract_depth="basic")
54+
logger.info("Initiating Tavily crawl")
55+
site_extraction = await self.tavily_client.crawl(
56+
url=url,
57+
instructions="Find any pages that will help us understand the company's business, products, services, and any other relevant information.",
58+
max_depth=1,
59+
max_breadth=50,
60+
extract_depth="advanced"
61+
)
5662

5763
raw_contents = []
5864
for item in site_extraction.get("results", []):
@@ -64,19 +70,19 @@ async def initial_search(self, state: InputState) -> ResearchState:
6470
'title': company,
6571
'raw_content': "\n\n".join(raw_contents)
6672
}
67-
logger.info(f"Successfully extracted {len(raw_contents)} content sections")
68-
msg += "\n✅ Successfully extracted content from website"
73+
logger.info(f"Successfully crawled {len(raw_contents)} content sections")
74+
msg += "\n✅ Successfully crawled content from website"
6975
if websocket_manager := state.get('websocket_manager'):
7076
if job_id := state.get('job_id'):
7177
await websocket_manager.send_status_update(
7278
job_id=job_id,
7379
status="processing",
74-
message="Successfully extracted content from website",
80+
message="Successfully crawled and extracted content from website",
7581
result={"step": "Initial Site Scrape"}
7682
)
7783
else:
78-
logger.warning("No content found in extraction results")
79-
msg += "\n⚠️ No content found in website extraction"
84+
logger.warning("No content found in crawl results")
85+
msg += "\n⚠️ No content found in website crawl"
8086
if websocket_manager := state.get('websocket_manager'):
8187
if job_id := state.get('job_id'):
8288
await websocket_manager.send_status_update(
@@ -87,8 +93,8 @@ async def initial_search(self, state: InputState) -> ResearchState:
8793
)
8894
except Exception as e:
8995
error_str = str(e)
90-
logger.error(f"Website extraction error: {error_str}", exc_info=True)
91-
error_msg = f"⚠️ Error extracting website content: {error_str}"
96+
logger.error(f"Website crawl error: {error_str}", exc_info=True)
97+
error_msg = f"⚠️ Error crawling website content: {error_str}"
9298
print(error_msg)
9399
msg += f"\n{error_msg}"
94100
if websocket_manager := state.get('websocket_manager'):
@@ -137,8 +143,8 @@ async def initial_search(self, state: InputState) -> ResearchState:
137143
"job_id": state.get('job_id')
138144
}
139145

140-
# If there was an error in the initial extraction, store it in the state
141-
if "⚠️ Error extracting website content:" in msg:
146+
# If there was an error in the initial crawl, store it in the state
147+
if "⚠️ Error crawling website content:" in msg:
142148
research_state["error"] = error_str
143149

144150
return research_state

requirements.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ protobuf~=4.25.0
77
pydantic==2.10.6
88
pymongo==4.6.3
99
reportlab==4.3.1
10-
tavily_python==0.5.1
10+
tavily_python==0.7.1
1111
uvicorn[standard]==0.34.0
1212
websockets==12.0
1313
google-generativeai==0.8.4

ui/src/components/LocationInput.tsx

Lines changed: 4 additions & 91 deletions
Original file line numberDiff line numberDiff line change
@@ -14,97 +14,6 @@ declare global {
1414
}
1515
}
1616

17-
// Global state to track script loading
18-
let isScriptLoaded = false;
19-
let isScriptLoading = false;
20-
let scriptLoadPromise: Promise<void> | null = null;
21-
22-
const loadGoogleMapsScript = (): Promise<void> => {
23-
// If already loaded, return resolved promise
24-
if (isScriptLoaded && window.google?.maps?.places) {
25-
return Promise.resolve();
26-
}
27-
28-
// If already loading, return existing promise
29-
if (isScriptLoading && scriptLoadPromise) {
30-
return scriptLoadPromise;
31-
}
32-
33-
// Check if script is already in the document
34-
const existingScript = document.querySelector('script[src*="maps.googleapis.com/maps/api/js"]');
35-
if (existingScript && window.google?.maps?.places) {
36-
isScriptLoaded = true;
37-
return Promise.resolve();
38-
}
39-
40-
isScriptLoading = true;
41-
scriptLoadPromise = new Promise<void>((resolve, reject) => {
42-
// If API is already available, resolve immediately
43-
if (window.google?.maps?.places) {
44-
isScriptLoaded = true;
45-
isScriptLoading = false;
46-
resolve();
47-
return;
48-
}
49-
50-
// Define the callback function
51-
window.initGoogleMapsCallback = () => {
52-
isScriptLoaded = true;
53-
isScriptLoading = false;
54-
resolve();
55-
};
56-
57-
// Create script element only if it doesn't exist
58-
if (!existingScript) {
59-
const script = document.createElement('script');
60-
const apiKey = import.meta.env.VITE_GOOGLE_MAPS_API_KEY;
61-
62-
if (!apiKey) {
63-
console.error('Google Maps API key is not defined. Please set VITE_GOOGLE_MAPS_API_KEY in your environment variables.');
64-
isScriptLoading = false;
65-
reject(new Error('Google Maps API key is not defined'));
66-
return;
67-
}
68-
69-
script.src = `https://maps.googleapis.com/maps/api/js?key=${apiKey}&libraries=places&loading=async&callback=initGoogleMapsCallback`;
70-
script.async = true;
71-
script.defer = true;
72-
73-
// Handle errors
74-
script.onerror = (error) => {
75-
console.error('Error loading Google Maps script:', error);
76-
isScriptLoading = false;
77-
scriptLoadPromise = null;
78-
reject(error);
79-
};
80-
81-
// Append to document
82-
document.head.appendChild(script);
83-
} else {
84-
// Script exists but API might not be ready yet
85-
const checkInterval = setInterval(() => {
86-
if (window.google?.maps?.places) {
87-
isScriptLoaded = true;
88-
isScriptLoading = false;
89-
clearInterval(checkInterval);
90-
resolve();
91-
}
92-
}, 100);
93-
94-
// Clear interval after 10 seconds to prevent infinite checking
95-
setTimeout(() => {
96-
clearInterval(checkInterval);
97-
if (!isScriptLoaded) {
98-
isScriptLoading = false;
99-
reject(new Error('Google Maps API failed to load within timeout'));
100-
}
101-
}, 10000);
102-
}
103-
});
104-
105-
return scriptLoadPromise;
106-
};
107-
10817
const LocationInput: React.FC<LocationInputProps> = ({ value, onChange, className }) => {
10918
const inputRef = useRef<HTMLInputElement>(null);
11019
const autocompleteElementRef = useRef<any>(null);
@@ -125,6 +34,10 @@ const LocationInput: React.FC<LocationInputProps> = ({ value, onChange, classNam
12534
setIsApiLoaded(true);
12635
} catch (error) {
12736
console.error('Failed to load Google Maps API:', error);
37+
// Ensure input is visible when Google Maps fails to load
38+
if (inputRef.current) {
39+
inputRef.current.style.display = '';
40+
}
12841
}
12942
};
13043

0 commit comments

Comments
 (0)