9
9
from dateutil .parser import parse
10
10
import dateutil
11
11
import datetime
12
+ from selenium .webdriver .common .by import By
12
13
except Exception as ex :
13
14
print (ex )
14
15
@@ -59,14 +60,13 @@ def __find_status(post,layout):
59
60
if layout == "old" :
60
61
#aim is to find element that looks like <a href="URL" class="_5pcq"></a>
61
62
#after finding that element, get it's href value and pass it to different method that extracts post_id from that href
62
- status_link = post .find_element_by_class_name ( "_5pcq" ).get_attribute ("href" )
63
+ status_link = post .find_element ( By . CLASS_NAME , "_5pcq" ).get_attribute ("href" )
63
64
#extract out post id from post's url
64
65
status = Scraping_utilities ._Scraping_utilities__extract_id_from_link (status_link )
65
66
elif layout == "new" :
66
- links = post .find_elements_by_css_selector ( "a[role='link']" )
67
- link = Finder . __get_status_link ( links )
67
+ # links = post.find_elements(By.CSS_SELECTOR, "a[role='link']")
68
+ link = post . find_element ( By . CSS_SELECTOR , '.gpro0wi8.b1v8xokw' )
68
69
status_link = link .get_attribute ('href' )
69
- print ("Status Link: " ,status_link )
70
70
status = Scraping_utilities ._Scraping_utilities__extract_id_from_link (
71
71
status_link )
72
72
except NoSuchElementException :
@@ -85,10 +85,10 @@ def __find_share(post,layout):
85
85
try :
86
86
if layout == "old" :
87
87
#aim is to find element that have datatest-id attribute as UFI2SharesCount/root
88
- shares = post .find_element_by_css_selector ( "[data-testid='UFI2SharesCount/root']" ).get_attribute ('textContent' )
88
+ shares = post .find_element ( By . CSS_SELECTOR , "[data-testid='UFI2SharesCount/root']" ).get_attribute ('textContent' )
89
89
shares = Scraping_utilities ._Scraping_utilities__extract_numbers (shares )
90
90
elif layout == "new" :
91
- elements = post .find_elements_by_css_selector ( "div.gtad4xkn" )
91
+ elements = post .find_elements ( By . CSS_SELECTOR , "div.gtad4xkn" )
92
92
shares = "0"
93
93
for element in elements :
94
94
text = element .text
@@ -112,8 +112,7 @@ def __find_reactions(post):
112
112
"""finds all reaction of the facebook post using selenium's webdriver's method"""
113
113
try :
114
114
#find element that have attribute aria-label as 'See who reacted to this
115
- reactions_all = post .find_element_by_css_selector (
116
- '[aria-label="See who reacted to this"]' )
115
+ reactions_all = post .find_element (By .CSS_SELECTOR ,'[aria-label="See who reacted to this"]' )
117
116
except NoSuchElementException :
118
117
reactions_all = ""
119
118
except Exception as ex :
@@ -126,11 +125,11 @@ def __find_comments(post,layout):
126
125
try :
127
126
comments = ""
128
127
if layout == "old" :
129
- comments = post .find_element_by_css_selector ( "a._3hg-" ).get_attribute ('textContent' )
128
+ comments = post .find_element ( By . CSS_SELECTOR , "a._3hg-" ).get_attribute ('textContent' )
130
129
#extract numbers from text
131
130
comments = Scraping_utilities ._Scraping_utilities__extract_numbers (comments )
132
131
elif layout == "new" :
133
- elements = post .find_elements_by_css_selector ( "div.gtad4xkn" )
132
+ elements = post .find_elements ( By . CSS_SELECTOR , "div.gtad4xkn" )
134
133
comments = "0"
135
134
for element in elements :
136
135
text = element .text
@@ -164,7 +163,7 @@ def __fetch_post_passage(href):
164
163
@staticmethod
165
164
def __element_exists (element ,css_selector ):
166
165
try :
167
- found = element .find_element_by_css_selector ( css_selector )
166
+ found = element .find_element ( By . CSS_SELECTOR , css_selector )
168
167
return True
169
168
except NoSuchElementException :
170
169
return False
@@ -174,12 +173,12 @@ def __find_content(post,driver,layout):
174
173
"""finds content of the facebook post using selenium's webdriver's method and returns string containing text of the posts"""
175
174
try :
176
175
if layout == "old" :
177
- post_content = post .find_element_by_class_name ( 'userContent' )
176
+ post_content = post .find_element ( By . CLASS_NAME , 'userContent' )
178
177
elif layout == "new" :
179
- post_content = post .find_element_by_css_selector ( '[data-ad-preview="message"]' )
178
+ post_content = post .find_element ( By . CSS_SELECTOR , '[data-ad-preview="message"]' )
180
179
#if 'See more' or 'Continue reading' is present in post
181
180
if Finder ._Finder__element_exists (post_content ,"span.text_exposed_link > a" ):
182
- element = post_content .find_element_by_css_selector ( "span.text_exposed_link > a" ) #grab that element
181
+ element = post_content .find_element ( By . CSS_SELECTOR , "span.text_exposed_link > a" ) #grab that element
183
182
#if element have already the onclick function, that means it is expandable paragraph
184
183
if element .get_attribute ("onclick" ):
185
184
Utilities ._Utilities__click_see_more (driver ,post_content ) #click 'see more' button to get hidden text as well
@@ -209,7 +208,7 @@ def __find_posted_time(post,layout,link_element):
209
208
#extract element that looks like <abbr class='_5ptz' data-utime="some unix timestamp"> </abbr>
210
209
#posted_time = post.find_element_by_css_selector("abbr._5ptz").get_attribute("data-utime")
211
210
if layout == "old" :
212
- posted_time = post .find_element_by_tag_name ( "abbr" ).get_attribute ('data-utime' )
211
+ posted_time = post .find_element ( By . TAG_NAME , "abbr" ).get_attribute ('data-utime' )
213
212
return datetime .datetime .fromtimestamp (float (posted_time )).isoformat ()
214
213
elif layout == "new" :
215
214
aria_label_value = link_element .get_attribute ("aria-label" )
@@ -233,7 +232,7 @@ def __find_video_url(post,page_name,status):
233
232
"""finds video of the facebook post using selenium's webdriver's method"""
234
233
try :
235
234
#if video is found in the post, than create a video URL by concatenating post's id with page_name
236
- video_element = post .find_element_by_tag_name ( "video" )
235
+ video_element = post .find_element ( By . TAG_NAME , "video" )
237
236
video = "https://www.facebook.com/{}/videos/{}" .format (page_name ,status )
238
237
239
238
except NoSuchElementException :
@@ -250,7 +249,7 @@ def __find_image_url(post):
250
249
"""finds all image of the facebook post using selenium's webdriver's method"""
251
250
try :
252
251
#find all img tag that looks like <img class="scaledImageFitWidth img" src="">
253
- images = post .find_elements_by_css_selector ( "img.scaledImageFitWidth.img" )
252
+ images = post .find_elements ( By . CSS_SELECTOR , "img.scaledImageFitWidth.img" )
254
253
#extract src attribute from all the img tag,store it in list
255
254
sources = [image .get_attribute ("src" ) for image in images ] if len (images ) > 0 else []
256
255
except NoSuchElementException :
@@ -268,10 +267,9 @@ def __find_all_posts(driver,layout):
268
267
try :
269
268
#find all posts that looks like <div class="userContentWrapper"> </div>
270
269
if layout == "old" :
271
- all_posts = driver .find_elements_by_css_selector ( "div.userContentWrapper" )
270
+ all_posts = driver .find_elements ( By . CSS_SELECTOR , "div.userContentWrapper" )
272
271
elif layout == "new" :
273
- all_posts = driver .find_elements_by_css_selector (
274
- '[aria-posinset]' )
272
+ all_posts = driver .find_elements (By .CSS_SELECTOR ,'[aria-posinset]' )
275
273
return all_posts
276
274
except NoSuchElementException :
277
275
print ("Cannot find any posts! Exiting!" )
@@ -288,17 +286,17 @@ def __find_name(driver,layout):
288
286
"""finds name of the facebook page using selenium's webdriver's method"""
289
287
try :
290
288
if layout == "old" :
291
- name = driver .find_element_by_css_selector ( 'a._64-f' ).get_attribute ('textContent' )
289
+ name = driver .find_element ( By . CSS_SELECTOR , 'a._64-f' ).get_attribute ('textContent' )
292
290
elif layout == "new" :
293
- name = driver .find_element_by_tag_name ( "strong" ).get_attribute ("textContent" )
291
+ name = driver .find_element ( By . TAG_NAME , "strong" ).get_attribute ("textContent" )
294
292
return name
295
293
except Exception as ex :
296
294
print ("error at __find_name method : {}" .format (ex ))
297
295
298
296
@staticmethod
299
297
def __detect_ui (driver ):
300
298
try :
301
- driver .find_element_by_id ( "pagelet_bluebar" )
299
+ driver .find_element ( By . ID , "pagelet_bluebar" )
302
300
return "old"
303
301
except NoSuchElementException :
304
302
return "new"
@@ -311,10 +309,10 @@ def __detect_ui(driver):
311
309
def __find_reaction (layout , reactions_all ):
312
310
try :
313
311
if layout == "old" :
314
- return reactions_all .find_elements_by_tag_name (
312
+ return reactions_all .find_elements ( By . TAG_NAME ,
315
313
"a" )
316
314
elif layout == "new" :
317
- return reactions_all .find_elements_by_tag_name (
315
+ return reactions_all .find_elements ( By . TAG_NAME ,
318
316
"div" )
319
317
320
318
except Exception as ex :
0 commit comments