Skip to content

Commit e710584

Browse files
qosha1claude
andcommitted
Add fast-linkedin-scraper support with scraper adapter pattern
- Add scraper_factory.py for backend selection between linkedin-scraper and fast-linkedin-scraper - Add scraper_adapter.py with unified interface for both scrapers - Add playwright_wrapper.py for Playwright session management (used by fast scraper) - Update configuration to support scraper_type selection - Update all tools (person, company, job) to use scraper adapter - Fix type checking issues in config loaders and playwright wrapper - Maintain backward compatibility with existing linkedin-scraper backend 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
1 parent 2d47b35 commit e710584

17 files changed

+1482
-256
lines changed

debug_fast_scraper.py

Lines changed: 112 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,112 @@
1+
#!/usr/bin/env python3
2+
"""
3+
Debug script to test fast-linkedin-scraper directly without our wrapper.
4+
"""
5+
6+
import threading
7+
8+
9+
def test_fast_scraper_direct():
10+
"""Test fast-linkedin-scraper directly."""
11+
print("Testing fast-linkedin-scraper directly...")
12+
13+
try:
14+
from fast_linkedin_scraper import LinkedInSession
15+
16+
print("✅ fast-linkedin-scraper imported successfully")
17+
18+
# Test getting version info
19+
try:
20+
import fast_linkedin_scraper
21+
22+
print(
23+
f"📦 fast-linkedin-scraper version: {getattr(fast_linkedin_scraper, '__version__', 'unknown')}"
24+
)
25+
except Exception:
26+
print("📦 Version info not available")
27+
except ImportError as e:
28+
print(f"❌ Cannot import fast-linkedin-scraper: {e}")
29+
return False
30+
31+
# Test with a dummy cookie to see if the library initializes properly
32+
dummy_cookie = "li_at=dummy_cookie_for_testing"
33+
34+
try:
35+
print("🔍 Testing LinkedInSession creation...")
36+
with LinkedInSession.from_cookie(dummy_cookie) as session:
37+
print(f"✅ Session created successfully: {type(session)}")
38+
print(
39+
"This would normally fail with invalid cookie, but creation succeeded"
40+
)
41+
42+
except Exception as e:
43+
error_msg = str(e).lower()
44+
print(f"❌ Session creation failed: {e}")
45+
46+
if (
47+
"'playwrightcontextmanager' object has no attribute '_connection'"
48+
in error_msg
49+
):
50+
print("🐛 This is the _connection attribute error!")
51+
return False
52+
elif "invalid" in error_msg and "cookie" in error_msg:
53+
print(
54+
"✅ Failed due to invalid cookie (expected), but no _connection error!"
55+
)
56+
return True
57+
else:
58+
print(f"❓ Unknown error: {error_msg}")
59+
return False
60+
61+
return True
62+
63+
64+
def test_in_thread():
65+
"""Test fast-linkedin-scraper in a separate thread."""
66+
print("\n" + "=" * 50)
67+
print("Testing fast-linkedin-scraper in a separate thread...")
68+
69+
result_container = {}
70+
71+
def thread_target():
72+
try:
73+
result_container["result"] = test_fast_scraper_direct()
74+
except Exception as e:
75+
result_container["error"] = e
76+
77+
thread = threading.Thread(target=thread_target)
78+
thread.start()
79+
thread.join()
80+
81+
if "error" in result_container:
82+
print(f"❌ Thread test failed: {result_container['error']}")
83+
return False
84+
85+
return result_container.get("result", False)
86+
87+
88+
if __name__ == "__main__":
89+
print("🚀 Fast-LinkedIn-Scraper Debug Test\n")
90+
91+
# Test 1: Direct execution
92+
print("=" * 50)
93+
direct_result = test_fast_scraper_direct()
94+
95+
# Test 2: In thread (simulates our async fix)
96+
thread_result = test_in_thread()
97+
98+
print("\n" + "=" * 50)
99+
print("📊 RESULTS:")
100+
print(f"Direct test: {'✅ PASSED' if direct_result else '❌ FAILED'}")
101+
print(f"Thread test: {'✅ PASSED' if thread_result else '❌ FAILED'}")
102+
103+
if direct_result and thread_result:
104+
print("\n🎉 fast-linkedin-scraper works correctly!")
105+
else:
106+
print("\n💥 fast-linkedin-scraper has compatibility issues!")
107+
print("📋 Recommendations:")
108+
print(" 1. Check fast-linkedin-scraper installation")
109+
print(" 2. Ensure playwright is installed: playwright install")
110+
print(
111+
" 3. Try upgrading: pip install --upgrade fast-linkedin-scraper playwright"
112+
)

linkedin_mcp_server/authentication.py

Lines changed: 14 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@ def clear_authentication() -> bool:
8787

8888
def ensure_authentication() -> str:
8989
"""
90-
Ensure authentication is available with clear error messages.
90+
Ensure authentication is available with backend-aware error messages.
9191
9292
Returns:
9393
str: Valid LinkedIn session cookie
@@ -100,6 +100,16 @@ def ensure_authentication() -> str:
100100
except CredentialsNotFoundError:
101101
config = get_config()
102102

103-
raise CredentialsNotFoundError(
104-
ErrorMessages.no_cookie_found(config.is_interactive)
105-
)
103+
# Provide backend-specific guidance
104+
if config.linkedin.scraper_type == "fast-linkedin-scraper":
105+
error_msg = (
106+
f"No LinkedIn cookie found for {config.linkedin.scraper_type}. "
107+
"This scraper requires a valid LinkedIn session cookie. You can:\n"
108+
" 1. Set LINKEDIN_COOKIE environment variable with a valid LinkedIn session cookie\n"
109+
" 2. Use --cookie flag to provide the cookie directly\n"
110+
" 3. Run with linkedin-scraper first using --get-cookie to extract a cookie"
111+
)
112+
else:
113+
error_msg = ErrorMessages.no_cookie_found(config.is_interactive)
114+
115+
raise CredentialsNotFoundError(error_msg)

linkedin_mcp_server/cli_main.py

Lines changed: 32 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,16 @@
3131
get_config,
3232
get_keyring_name,
3333
)
34-
from linkedin_mcp_server.drivers.chrome import close_all_drivers, get_or_create_driver
34+
35+
# Chrome driver imports are now handled by the scraper factory
3536
from linkedin_mcp_server.exceptions import CredentialsNotFoundError, LinkedInMCPError
3637
from linkedin_mcp_server.logging_config import configure_logging
3738
from linkedin_mcp_server.server import create_mcp_server, shutdown_handler
39+
from linkedin_mcp_server.scraper_factory import (
40+
cleanup_scraper_backend,
41+
get_backend_capabilities,
42+
initialize_scraper_backend,
43+
)
3844
from linkedin_mcp_server.setup import run_cookie_extraction_setup, run_interactive_setup
3945

4046
sys.stdout = io.TextIOWrapper(sys.stdout.buffer, encoding="utf-8")
@@ -242,33 +248,38 @@ def ensure_authentication_ready() -> str:
242248
return run_interactive_setup()
243249

244250

245-
def initialize_driver_with_auth(authentication: str) -> None:
251+
def initialize_backend_with_auth(authentication: str) -> None:
246252
"""
247-
Phase 2: Initialize driver using existing authentication.
253+
Phase 2: Initialize scraper backend using existing authentication.
248254
249255
Args:
250-
authentication: LinkedIn session cookie
256+
authentication: LinkedIn session cookie (not used directly, backends get auth via ensure_authentication())
251257
252258
Raises:
253-
Various exceptions if driver creation or login fails
259+
Various exceptions if backend initialization fails
254260
"""
255261
config = get_config()
256262

257263
if config.server.lazy_init:
264+
backend_info = get_backend_capabilities()
258265
logger.info(
259-
"Using lazy initialization - driver will be created on first tool call"
266+
f"Using lazy initialization - {backend_info['backend']} will be created on first tool call"
260267
)
261268
return
262269

263-
logger.info("Initializing Chrome WebDriver and logging in...")
270+
backend_info = get_backend_capabilities()
271+
logger.info(f"Initializing {backend_info['backend']} backend...")
264272

265273
try:
266-
# Create driver and login with provided authentication
267-
get_or_create_driver(authentication)
268-
logger.info("✅ Web driver initialized and authenticated successfully")
274+
# Initialize the appropriate backend (authentication is handled internally)
275+
success = initialize_scraper_backend()
276+
if success:
277+
logger.info("✅ Scraper backend initialized and authenticated successfully")
278+
else:
279+
raise Exception("Backend initialization returned False")
269280

270281
except Exception as e:
271-
logger.error(f"Failed to initialize driver: {e}")
282+
logger.error(f"Failed to initialize scraper backend: {e}")
272283
raise e
273284

274285

@@ -345,21 +356,21 @@ def main() -> None:
345356
print("\n❌ Setup failed - please try again")
346357
sys.exit(1)
347358

348-
# Phase 2: Initialize Driver (if not lazy)
359+
# Phase 2: Initialize Backend (if not lazy)
349360
try:
350-
initialize_driver_with_auth(authentication)
361+
initialize_backend_with_auth(authentication)
351362
except InvalidCredentialsError as e:
352-
logger.error(f"Driver initialization failed with invalid credentials: {e}")
363+
logger.error(f"Backend initialization failed with invalid credentials: {e}")
353364

354-
# Cookie was already cleared in driver layer
365+
# Cookie was already cleared in authentication layer
355366
# In interactive mode, try setup again
356367
if config.is_interactive:
357368
print(f"\n{str(e)}")
358369
print("🔄 Starting interactive setup for new authentication...")
359370
try:
360371
new_authentication = run_interactive_setup()
361372
# Try again with new authentication
362-
initialize_driver_with_auth(new_authentication)
373+
initialize_backend_with_auth(new_authentication)
363374
logger.info("✅ Successfully authenticated with new credentials")
364375
except Exception as setup_error:
365376
logger.error(f"Setup failed: {setup_error}")
@@ -377,13 +388,13 @@ def main() -> None:
377388
RateLimitError,
378389
LoginTimeoutError,
379390
) as e:
380-
logger.error(f"Driver initialization failed: {e}")
391+
logger.error(f"Backend initialization failed: {e}")
381392
print(f"\n{str(e)}")
382393
if not config.server.lazy_init:
383394
sys.exit(1)
384395
except Exception as e:
385-
logger.error(f"Unexpected error during driver initialization: {e}")
386-
print(f"\nDriver initialization failed: {e}")
396+
logger.error(f"Unexpected error during backend initialization: {e}")
397+
print(f"\nBackend initialization failed: {e}")
387398
if not config.server.lazy_init:
388399
sys.exit(1)
389400

@@ -437,8 +448,8 @@ def exit_gracefully(exit_code: int = 0) -> None:
437448
"""Exit the application gracefully, cleaning up resources."""
438449
print("👋 Shutting down LinkedIn MCP server...")
439450

440-
# Clean up drivers
441-
close_all_drivers()
451+
# Clean up scraper backend
452+
cleanup_scraper_backend()
442453

443454
# Clean up server
444455
shutdown_handler()

0 commit comments

Comments
 (0)