Skip to content

Commit c8c78dd

Browse files
committed
fetcher formatted
1 parent 507a0d5 commit c8c78dd

File tree

1 file changed

+61
-14
lines changed

1 file changed

+61
-14
lines changed

proxier/utils/fetcher.py

Lines changed: 61 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -4,15 +4,33 @@
44

55
class Fetcher:
66

7-
def __init__(self, proxy_sites):
7+
def __init__(self, proxy_sites: list):
8+
# TODO: optimize fetch_proxy and workupload_downloader
9+
""" Initialize fetcher class
10+
11+
Args:
12+
proxy_sites (list): sites to scrape
13+
"""
814
self.PROXY_SITES = proxy_sites
915
self.PATTERN = r'(\d{1,3})\.(\d{1,3})\.(\d{1,3})\.(\d{1,3}):(\d{1,5})'
1016
self.FILE_NAME = 'proxy.zip'
1117

12-
def get_proxies_sites(self):
18+
def get_proxies_sites(self) -> list:
19+
""" PROXY_SITES getter
20+
21+
Returns:
22+
list: proxy sites list
23+
"""
1324
return self.PROXY_SITES
1425

15-
def worker(self, site, result, cookies=None):
26+
def worker(self, site: str, result: list, cookies: dict = None):
27+
""" Request worker
28+
29+
Args:
30+
site (str): http request url
31+
result (list) callback var
32+
cookies (dict, optional): used cookies. Defaults to None.
33+
"""
1634
res = requests.get(
1735
site,
1836
cookies=cookies,
@@ -22,74 +40,103 @@ def worker(self, site, result, cookies=None):
2240
if res.status_code == requests.codes.ok:
2341
result.append(res)
2442

25-
def workupload_downloader(self, download_id):
43+
def workupload_downloader(self, download_id: str) -> any:
44+
""" Workupload.com files downloader
45+
46+
Args:
47+
download_id (str): file download ID
48+
49+
Yields:
50+
any: result
51+
"""
2652
token_id = []
2753
self.worker(
2854
f'https://workupload.com/file/{download_id}',
2955
token_id
3056
)
3157
token_id = token_id[0].cookies['token']
32-
temp_result = []
58+
swap_result = []
3359
self.worker(
3460
f'https://workupload.com/start/{download_id}',
35-
temp_result,
61+
swap_result,
3662
{'token': token_id}
3763
)
64+
3865
if temp_result[0].status_code == requests.codes.ok:
39-
temp_result = []
66+
swap_result = []
4067
self.worker(
4168
f'https://workupload.com/api/file/getDownloadServer/{download_id}',
42-
temp_result,
69+
swap_result,
4370
{'token': token_id}
4471
)
45-
temp_result = temp_result[0].json()
46-
if temp_result['success']:
72+
swap_result = swap_result[0].json()
73+
74+
if swap_result['success']:
4775
result = []
4876
self.worker(
49-
temp_result['data']['url'],
77+
swap_result['data']['url'],
5078
result,
5179
{'token': token_id}
5280
)
81+
5382
yield result[0]
5483

55-
def fetch_proxy(self, site):
84+
def fetch_proxy(self, site: str) -> any:
85+
""" Fetch proxies
86+
87+
Args:
88+
site (str): site to scrape
89+
90+
Yields:
91+
any: result
92+
"""
5693
try:
5794
res = []
5895
th = threading.Thread(target=self.worker, args=(site, res,))
5996
th.start()
6097
th.join()
98+
6199
for _ in res:
62100
_ = _.text
63101
html = BeautifulSoup(_, 'html.parser')
64102
posts = html.find_all('h3', {'class': 'post-title'})
103+
65104
if len(posts) > 0:
105+
66106
for post in posts:
67107
post = post.select('.post-title a')[0]['href']
68108
fetched = re.findall(self.PATTERN, requests.get(post).text)
109+
69110
if len(fetched) > 0:
111+
70112
for proxy in fetched:
71113
yield '.'.join(proxy[:len(proxy)-1]) + f':{proxy[len(proxy)-1]}'
72114
else:
73115
post = requests.get(post).text
74116
download_id = post.split('="https://workupload.com/')[1].split('" ')[0].split('/')[1]
117+
75118
for _ in self.workupload_downloader(download_id):
119+
76120
with open(self.FILE_NAME, 'wb') as zip_file:
77121
zip_file.write(_.content)
78122
zip_file.close()
79123
archive = zipfile.ZipFile('proxy.zip')
124+
80125
for _ in archive.namelist():
126+
81127
if '.txt' in _:
82128
fetched = re.findall(self.PATTERN, archive.read(_).decode())
129+
83130
for proxy in fetched:
84131
yield '.'.join(proxy[:len(proxy)-1]) + f':{proxy[len(proxy)-1]}'
132+
85133
archive.close()
86134
os.remove(self.FILE_NAME)
87135
else:
88136
fetched = re.findall(self.PATTERN, _)
137+
89138
for proxy in fetched:
90139
yield '.'.join(proxy[:len(proxy)-1]) + f':{proxy[len(proxy)-1]}'
91140
except Exception as e:
92141
yield False
93142

94-
95-

0 commit comments

Comments
 (0)