8
8
9
9
import logging
10
10
import os
11
- import shutil
12
- import tempfile
13
11
from typing import Dict , Optional
14
12
15
13
from linkedin_scraper .exceptions import (
34
32
# Global driver storage to reuse sessions
35
33
active_drivers : Dict [str , webdriver .Chrome ] = {}
36
34
37
- # Store user data directories for cleanup
38
- user_data_dirs : Dict [str , str ] = {}
39
35
40
36
logger = logging .getLogger (__name__ )
41
37
42
38
43
- def create_chrome_driver ( session_id : str = "default" ) -> webdriver . Chrome :
39
+ def create_chrome_options ( config ) -> Options :
44
40
"""
45
- Create a new Chrome WebDriver instance with proper configuration.
41
+ Create Chrome options with all necessary configuration for LinkedIn scraping .
46
42
47
43
Args:
48
- session_id: Unique identifier for the session (used for cleanup)
44
+ config: AppConfig instance with Chrome configuration
49
45
50
46
Returns:
51
- webdriver.Chrome: Configured Chrome WebDriver instance
52
-
53
- Raises:
54
- WebDriverException: If driver creation fails
47
+ Options: Configured Chrome options object
55
48
"""
56
- config = get_config ()
57
-
58
- # Set up Chrome options
59
49
chrome_options = Options ()
50
+
60
51
logger .info (
61
52
f"Running browser in { 'headless' if config .chrome .headless else 'visible' } mode"
62
53
)
@@ -70,14 +61,15 @@ def create_chrome_driver(session_id: str = "default") -> webdriver.Chrome:
70
61
chrome_options .add_argument ("--window-size=1920,1080" )
71
62
chrome_options .add_argument ("--disable-extensions" )
72
63
chrome_options .add_argument ("--disable-background-timer-throttling" )
73
-
74
- # Create a unique user data directory to avoid conflicts
75
- user_data_dir = tempfile .mkdtemp (prefix = "linkedin_mcp_chrome_" )
76
- chrome_options .add_argument (f"--user-data-dir={ user_data_dir } " )
77
- logger .debug (f"Using Chrome user data directory: { user_data_dir } " )
78
-
79
- # Store the user data directory for cleanup
80
- user_data_dirs [session_id ] = user_data_dir
64
+ chrome_options .add_argument ("--disable-background-networking" )
65
+ chrome_options .add_argument ("--disable-default-apps" )
66
+ chrome_options .add_argument ("--disable-sync" )
67
+ chrome_options .add_argument ("--metrics-recording-only" )
68
+ chrome_options .add_argument ("--no-default-browser-check" )
69
+ chrome_options .add_argument ("--no-first-run" )
70
+ chrome_options .add_argument ("--disable-features=TranslateUI,BlinkGenPropertyTrees" )
71
+ chrome_options .add_argument ("--aggressive-cache-discard" )
72
+ chrome_options .add_argument ("--disable-ipc-flooding-protection" )
81
73
82
74
# Set user agent (configurable with sensible default)
83
75
user_agent = getattr (config .chrome , "user_agent" , DEFAULT_USER_AGENT )
@@ -87,20 +79,96 @@ def create_chrome_driver(session_id: str = "default") -> webdriver.Chrome:
87
79
for arg in config .chrome .browser_args :
88
80
chrome_options .add_argument (arg )
89
81
90
- # Initialize Chrome driver
91
- logger .info ("Initializing Chrome WebDriver..." )
82
+ return chrome_options
92
83
84
+
85
+ def create_chrome_service (config ):
86
+ """
87
+ Create Chrome service with ChromeDriver path resolution.
88
+
89
+ Args:
90
+ config: AppConfig instance with Chrome configuration
91
+
92
+ Returns:
93
+ Service or None: Chrome service if path is configured, None for auto-detection
94
+ """
93
95
# Use ChromeDriver path from environment or config
94
96
chromedriver_path = (
95
97
os .environ .get ("CHROMEDRIVER_PATH" ) or config .chrome .chromedriver_path
96
98
)
97
99
98
100
if chromedriver_path :
99
101
logger .info (f"Using ChromeDriver at path: { chromedriver_path } " )
100
- service = Service (executable_path = chromedriver_path )
101
- driver = webdriver .Chrome (service = service , options = chrome_options )
102
+ return Service (executable_path = chromedriver_path )
102
103
else :
103
104
logger .info ("Using auto-detected ChromeDriver" )
105
+ return None
106
+
107
+
108
+ def create_temporary_chrome_driver () -> webdriver .Chrome :
109
+ """
110
+ Create a temporary Chrome WebDriver instance for one-off operations.
111
+
112
+ This driver is NOT stored in the global active_drivers dict and should be
113
+ manually cleaned up by the caller.
114
+
115
+ Returns:
116
+ webdriver.Chrome: Configured Chrome WebDriver instance
117
+
118
+ Raises:
119
+ WebDriverException: If driver creation fails
120
+ """
121
+ config = get_config ()
122
+
123
+ logger .info ("Creating temporary Chrome WebDriver..." )
124
+
125
+ # Create Chrome options using shared function
126
+ chrome_options = create_chrome_options (config )
127
+
128
+ # Create Chrome service using shared function
129
+ service = create_chrome_service (config )
130
+
131
+ # Initialize Chrome driver
132
+ if service :
133
+ driver = webdriver .Chrome (service = service , options = chrome_options )
134
+ else :
135
+ driver = webdriver .Chrome (options = chrome_options )
136
+
137
+ logger .info ("Temporary Chrome WebDriver created successfully" )
138
+
139
+ # Add a page load timeout for safety
140
+ driver .set_page_load_timeout (60 )
141
+
142
+ # Set shorter implicit wait for faster operations
143
+ driver .implicitly_wait (10 )
144
+
145
+ return driver
146
+
147
+
148
+ def create_chrome_driver () -> webdriver .Chrome :
149
+ """
150
+ Create a new Chrome WebDriver instance with proper configuration.
151
+
152
+ Returns:
153
+ webdriver.Chrome: Configured Chrome WebDriver instance
154
+
155
+ Raises:
156
+ WebDriverException: If driver creation fails
157
+ """
158
+ config = get_config ()
159
+
160
+ logger .info ("Initializing Chrome WebDriver..." )
161
+
162
+ # Create Chrome options using shared function
163
+ chrome_options = create_chrome_options (config )
164
+
165
+ # Create Chrome service using shared function
166
+ service = create_chrome_service (config )
167
+
168
+ # Initialize Chrome driver
169
+ if service :
170
+ driver = webdriver .Chrome (service = service , options = chrome_options )
171
+ else :
104
172
driver = webdriver .Chrome (options = chrome_options )
105
173
106
174
logger .info ("Chrome WebDriver initialized successfully" )
@@ -229,7 +297,7 @@ def get_or_create_driver(authentication: str) -> webdriver.Chrome:
229
297
230
298
try :
231
299
# Create new driver
232
- driver = create_chrome_driver (session_id )
300
+ driver = create_chrome_driver ()
233
301
234
302
# Login to LinkedIn
235
303
login_to_linkedin (driver , authentication )
@@ -261,7 +329,7 @@ def get_or_create_driver(authentication: str) -> webdriver.Chrome:
261
329
262
330
def close_all_drivers () -> None :
263
331
"""Close all active drivers and clean up resources."""
264
- global active_drivers , user_data_dirs
332
+ global active_drivers
265
333
266
334
for session_id , driver in active_drivers .items ():
267
335
try :
@@ -270,21 +338,8 @@ def close_all_drivers() -> None:
270
338
except Exception as e :
271
339
logger .warning (f"Error closing driver { session_id } : { e } " )
272
340
273
- # Clean up user data directory
274
- if session_id in user_data_dirs :
275
- try :
276
- user_data_dir = user_data_dirs [session_id ]
277
- if os .path .exists (user_data_dir ):
278
- shutil .rmtree (user_data_dir )
279
- logger .debug (f"Cleaned up user data directory: { user_data_dir } " )
280
- except Exception as e :
281
- logger .warning (
282
- f"Error cleaning up user data directory for session { session_id } : { e } "
283
- )
284
-
285
341
active_drivers .clear ()
286
- user_data_dirs .clear ()
287
- logger .info ("All Chrome WebDriver sessions closed and cleaned up" )
342
+ logger .info ("All Chrome WebDriver sessions closed" )
288
343
289
344
290
345
def get_active_driver () -> Optional [webdriver .Chrome ]:
0 commit comments