Skip to content

Feat 23 add fast linkedin option #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 2 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ __pycache__/

# C extensions
*.so

test-config.json
# Distribution / packaging
.Python
build/
Expand Down Expand Up @@ -197,3 +197,4 @@ cython_debug/
# claude code settings
.claude
CLAUDE.md
compose/local/ngrok/ngrok.yml
12 changes: 12 additions & 0 deletions compose/local/ngrok/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
FROM ngrok/ngrok:latest

RUN ngrok --version

# Add config script (if you want to set up multiple tunnels or use diff config)
# COPY --chown=ngrok ngrok.yml /home/ngrok/.ngrok2/
COPY entrypoint.sh /

USER ngrok
ENV USER=ngrok

CMD ["/entrypoint.sh"]
89 changes: 89 additions & 0 deletions compose/local/ngrok/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
#!/bin/sh -e

if [ -n "$@" ]; then
exec "$@"
fi

# Legacy compatible:
if [ -z "$NGROK_PORT" ]; then
if [ -n "$HTTPS_PORT" ]; then
NGROK_PORT="$HTTPS_PORT"
elif [ -n "$HTTP_PORT" ]; then
NGROK_PORT="$HTTP_PORT"
elif [ -n "$APP_PORT" ]; then
NGROK_PORT="$APP_PORT"
fi
fi

ARGS="ngrok"

# Set the protocol.
if [ "$NGROK_PROTOCOL" = "TCP" ]; then
ARGS="$ARGS tcp"
else
ARGS="$ARGS http"
NGROK_PORT="${NGROK_PORT:-80}"
fi

# Set the TLS binding flag
if [ -n "$NGROK_BINDTLS" ]; then
ARGS="$ARGS --bind-tls=$NGROK_BINDTLS "
fi

# Set the authorization token.
if [ -n "$NGROK_AUTH" ]; then
echo "authtoken: $NGROK_AUTH" >> ~/.ngrok2/ngrok.yml
fi

# We use the forced NGROK_HOSTNAME here.
# This requires a valid Ngrok auth token in $NGROK_AUTH
if [ -n "$NGROK_HOSTNAME" ]; then
if [ -z "$NGROK_AUTH" ]; then
echo "You must set NGROK_AUTH (your Ngrok auth token) to use a custom domain."
exit 1
fi
ARGS="$ARGS --url=$NGROK_HOSTNAME "
fi

# Set the remote-addr if specified
if [ -n "$NGROK_REMOTE_ADDR" ]; then
if [ -z "$NGROK_AUTH" ]; then
echo "You must specify an authentication token to use reserved IP addresses."
exit 1
fi
ARGS="$ARGS --remote-addr=$NGROK_REMOTE_ADDR "
fi

# Set a custom region
if [ -n "$NGROK_REGION" ]; then
ARGS="$ARGS --region=$NGROK_REGION "
fi

if [ -n "$NGROK_HEADER" ]; then
ARGS="$ARGS --host-header=$NGROK_HEADER "
fi

# HTTP Auth config
if [ -n "$NGROK_USERNAME" ] && [ -n "$NGROK_PASSWORD" ] && [ -n "$NGROK_AUTH" ]; then
ARGS="$ARGS --auth=$NGROK_USERNAME:$NGROK_PASSWORD "
elif [ -n "$NGROK_USERNAME" ] || [ -n "$NGROK_PASSWORD" ]; then
if [ -z "$NGROK_AUTH" ]; then
echo "You must specify NGROK_USERNAME, NGROK_PASSWORD, and NGROK_AUTH for custom HTTP authentication."
exit 1
fi
fi

# Always log to stdout in debug mode
ARGS="$ARGS --log stdout --log-level=debug"

# Set the port.
if [ -z "$NGROK_PORT" ]; then
echo "You must specify an NGROK_PORT to expose."
exit 1
fi

# Finally, add the port to the command
ARGS="$ARGS $(echo $NGROK_PORT | sed 's|^tcp://||')"

set -x
exec $ARGS
112 changes: 112 additions & 0 deletions debug_fast_scraper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
#!/usr/bin/env python3
"""
Debug script to test fast-linkedin-scraper directly without our wrapper.
"""

import threading


def test_fast_scraper_direct():
"""Test fast-linkedin-scraper directly."""
print("Testing fast-linkedin-scraper directly...")

try:
from fast_linkedin_scraper import LinkedInSession

print("✅ fast-linkedin-scraper imported successfully")

# Test getting version info
try:
import fast_linkedin_scraper

print(
f"📦 fast-linkedin-scraper version: {getattr(fast_linkedin_scraper, '__version__', 'unknown')}"
)
except Exception:
print("📦 Version info not available")
except ImportError as e:
print(f"❌ Cannot import fast-linkedin-scraper: {e}")
return False

# Test with a dummy cookie to see if the library initializes properly
dummy_cookie = "li_at=dummy_cookie_for_testing"

try:
print("🔍 Testing LinkedInSession creation...")
with LinkedInSession.from_cookie(dummy_cookie) as session:
print(f"✅ Session created successfully: {type(session)}")
print(
"This would normally fail with invalid cookie, but creation succeeded"
)

except Exception as e:
error_msg = str(e).lower()
print(f"❌ Session creation failed: {e}")

if (
"'playwrightcontextmanager' object has no attribute '_connection'"
in error_msg
):
print("🐛 This is the _connection attribute error!")
return False
elif "invalid" in error_msg and "cookie" in error_msg:
print(
"✅ Failed due to invalid cookie (expected), but no _connection error!"
)
return True
else:
print(f"❓ Unknown error: {error_msg}")
return False

return True


def test_in_thread():
"""Test fast-linkedin-scraper in a separate thread."""
print("\n" + "=" * 50)
print("Testing fast-linkedin-scraper in a separate thread...")

result_container = {}

def thread_target():
try:
result_container["result"] = test_fast_scraper_direct()
except Exception as e:
result_container["error"] = e

thread = threading.Thread(target=thread_target)
thread.start()
thread.join()

if "error" in result_container:
print(f"❌ Thread test failed: {result_container['error']}")
return False

return result_container.get("result", False)


if __name__ == "__main__":
print("🚀 Fast-LinkedIn-Scraper Debug Test\n")

# Test 1: Direct execution
print("=" * 50)
direct_result = test_fast_scraper_direct()

# Test 2: In thread (simulates our async fix)
thread_result = test_in_thread()

print("\n" + "=" * 50)
print("📊 RESULTS:")
print(f"Direct test: {'✅ PASSED' if direct_result else '❌ FAILED'}")
print(f"Thread test: {'✅ PASSED' if thread_result else '❌ FAILED'}")

if direct_result and thread_result:
print("\n🎉 fast-linkedin-scraper works correctly!")
else:
print("\n💥 fast-linkedin-scraper has compatibility issues!")
print("📋 Recommendations:")
print(" 1. Check fast-linkedin-scraper installation")
print(" 2. Ensure playwright is installed: playwright install")
print(
" 3. Try upgrading: pip install --upgrade fast-linkedin-scraper playwright"
)
45 changes: 45 additions & 0 deletions docker-compose.local.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
name: 'linkedin-mcp-server'

services:
linkedin-mcp-server:
build:
context: .
dockerfile: Dockerfile
container_name: linkedin-mcp-server
env_file:
- .env
ports:
- "8080:8080"
volumes:
- /tmp/chrome-profile-$(date +%s%N):/tmp/chrome-profile-$(date +%s%N)
- .:/app:z

# streamable-http or stdio
command:
python linkedin-mcp-server --no-headless --user-agent "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/138.0.0.0 Safari/537.36"

restart: unless-stopped
networks:
- linkedin-mcp-server-network

ngrok:
build:
context: ./compose/local/ngrok
dockerfile: ./Dockerfile
restart: unless-stopped
command: >
http
127.0.0.1:8000
# command: ["ngrok", "start", "--all"]
environment:
- NGROK_CONFIG_FILE=/home/ngrok/.ngrok2/ngrok.yml
ports:
- 4041:4040
depends_on:
- linkedin-mcp-server
networks:
- linkedin-mcp-server-network

networks:
linkedin-mcp-server-network:
driver: bridge
18 changes: 14 additions & 4 deletions linkedin_mcp_server/authentication.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,7 +87,7 @@ def clear_authentication() -> bool:

def ensure_authentication() -> str:
"""
Ensure authentication is available with clear error messages.
Ensure authentication is available with backend-aware error messages.

Returns:
str: Valid LinkedIn session cookie
Expand All @@ -100,6 +100,16 @@ def ensure_authentication() -> str:
except CredentialsNotFoundError:
config = get_config()

raise CredentialsNotFoundError(
ErrorMessages.no_cookie_found(config.is_interactive)
)
# Provide backend-specific guidance
if config.linkedin.scraper_type == "fast-linkedin-scraper":
error_msg = (
f"No LinkedIn cookie found for {config.linkedin.scraper_type}. "
"This scraper requires a valid LinkedIn session cookie. You can:\n"
" 1. Set LINKEDIN_COOKIE environment variable with a valid LinkedIn session cookie\n"
" 2. Use --cookie flag to provide the cookie directly\n"
" 3. Run with linkedin-scraper first using --get-cookie to extract a cookie"
)
else:
error_msg = ErrorMessages.no_cookie_found(config.is_interactive)

raise CredentialsNotFoundError(error_msg)
Loading
Loading