@@ -49,11 +49,12 @@ posts_count = 10
49
49
browser = " firefox"
50
50
proxy = " IP:PORT" # if proxy requires authentication then user:password@IP:PORT
51
51
timeout = 600 # 600 seconds
52
- meta_ai = Facebook_scraper(page_name,posts_count,browser,proxy = proxy,timeout = timeout)
52
+ headless = True
53
+ meta_ai = Facebook_scraper(page_name, posts_count, browser, proxy = proxy, timeout = timeout, headless = headless)
53
54
54
55
```
55
56
56
- <h3 > Parameters for <code >Facebook_scraper(page_name,posts_count,browser,proxy,timeout) </code > class </h3 >
57
+ <h3 > Parameters for <code >Facebook_scraper(page_name, posts_count, browser, proxy, timeout, headless ) </code > class </h3 >
57
58
<table >
58
59
<th >
59
60
<tr >
@@ -68,10 +69,10 @@ meta_ai = Facebook_scraper(page_name,posts_count,browser,proxy=proxy,timeout=tim
68
69
page_name
69
70
</td >
70
71
<td >
71
- string
72
+ String
72
73
</td >
73
74
<td >
74
- name of the facebook page
75
+ Name of the facebook page
75
76
</td >
76
77
</tr >
77
78
@@ -80,10 +81,10 @@ name of the facebook page
80
81
posts_count
81
82
</td >
82
83
<td >
83
- integer
84
+ Integer
84
85
</td >
85
86
<td >
86
- number of posts to scrap, if not passed default is 10
87
+ Number of posts to scrap, if not passed default is 10
87
88
</td >
88
89
</tr >
89
90
@@ -92,10 +93,10 @@ number of posts to scrap, if not passed default is 10
92
93
browser
93
94
</td >
94
95
<td >
95
- string
96
+ String
96
97
</td >
97
98
<td >
98
- which browser to use, either chrome or firefox. if not passed,default is chrome
99
+ Which browser to use, either chrome or firefox. if not passed,default is chrome
99
100
</td >
100
101
</tr >
101
102
@@ -104,24 +105,36 @@ which browser to use, either chrome or firefox. if not passed,default is chrome
104
105
proxy(optional)
105
106
</td >
106
107
<td >
107
- string
108
+ String
108
109
</td >
109
110
<td >
110
- optional argument, if user wants to set proxy, if proxy requires authentication then the format will be <code > user: password @IP: PORT </code >
111
+ Optional argument, if user wants to set proxy, if proxy requires authentication then the format will be <code > user: password @IP: PORT </code >
111
112
</td >
112
113
</tr >
113
114
<tr >
114
115
<td >
115
116
timeout
116
117
</td >
117
118
<td >
118
- integer
119
+ Integer
119
120
</td >
120
121
<td >
121
122
The maximum amount of time the bot should run for. If not passed, the default timeout is set to 10 minutes
122
123
</code >
123
124
</td >
124
125
</tr >
126
+ <tr >
127
+ <td >
128
+ headless
129
+ </td >
130
+ <td >
131
+ Boolean
132
+ </td >
133
+ <td >
134
+ Whether to run browser in headless mode?. Default is True
135
+ </code >
136
+ </td >
137
+ </tr >
125
138
126
139
</table >
127
140
<br >
@@ -212,7 +225,7 @@ Output Structure for JSON format:
212
225
213
226
filename = " data_file" # file name without CSV extension,where data will be saved
214
227
directory = " E:\data" # directory where CSV file will be saved
215
- meta_ai.scrap_to_csv(filename,directory)
228
+ meta_ai.scrap_to_csv(filename, directory)
216
229
217
230
```
218
231
@@ -228,7 +241,7 @@ id,name,shares,likes,loves,wow,cares,sad,angry,haha,reactions_count,comments,con
228
241
<hr >
229
242
<br >
230
243
231
- <h3 > Parameters for <code > scrap_to_csv(filename,directory) </code > method. </h3 >
244
+ <h3 > Parameters for <code > scrap_to_csv(filename, directory) </code > method. </h3 >
232
245
233
246
<table >
234
247
<th >
@@ -244,11 +257,11 @@ id,name,shares,likes,loves,wow,cares,sad,angry,haha,reactions_count,comments,con
244
257
filename
245
258
</td >
246
259
<td >
247
- string
260
+ String
248
261
</td >
249
262
250
263
<td >
251
- name of the CSV file where post's data will be saved
264
+ Name of the CSV file where post's data will be saved
252
265
</td >
253
266
254
267
</tr >
@@ -258,11 +271,11 @@ name of the CSV file where post's data will be saved
258
271
directory
259
272
</td >
260
273
<td >
261
- string
274
+ String
262
275
</td >
263
276
264
277
<td >
265
- directory where CSV file have to be stored.
278
+ Directory where CSV file have to be stored.
266
279
</td >
267
280
268
281
</tr >
@@ -305,7 +318,7 @@ Description
305
318
id
306
319
</td >
307
320
<td >
308
- string
321
+ String
309
322
</td >
310
323
<td >
311
324
Post Identifier(integer casted inside string)
@@ -319,7 +332,7 @@ Post Identifier(integer casted inside string)
319
332
name
320
333
</td >
321
334
<td >
322
- string
335
+ String
323
336
</td >
324
337
<td >
325
338
Name of the page
@@ -331,10 +344,10 @@ Name of the page
331
344
shares
332
345
</td >
333
346
<td >
334
- integer
347
+ Integer
335
348
</td >
336
349
<td >
337
- share count of post
350
+ Share count of post
338
351
</td >
339
352
</tr >
340
353
@@ -343,10 +356,10 @@ share count of post
343
356
reactions
344
357
</td >
345
358
<td >
346
- dictionary
359
+ Dictionary
347
360
</td >
348
361
<td >
349
- dictionary containing reactions as keys and its count as value. Keys => <code > [ "likes","loves","wow","cares","sad","angry","haha"] </code >
362
+ Dictionary containing reactions as keys and its count as value. Keys => <code > [ "likes","loves","wow","cares","sad","angry","haha"] </code >
350
363
</td >
351
364
</tr >
352
365
@@ -355,10 +368,10 @@ dictionary containing reactions as keys and its count as value. Keys => <code> [
355
368
reaction_count
356
369
</td >
357
370
<td >
358
- integer
371
+ Integer
359
372
</td >
360
373
<td >
361
- total reaction count of post
374
+ Total reaction count of post
362
375
</td >
363
376
</tr >
364
377
@@ -368,10 +381,10 @@ total reaction count of post
368
381
comments
369
382
</td >
370
383
<td >
371
- integer
384
+ Integer
372
385
</td >
373
386
<td >
374
- comments count of post
387
+ Comments count of post
375
388
</td >
376
389
</tr >
377
390
@@ -380,10 +393,10 @@ comments count of post
380
393
content
381
394
</td >
382
395
<td >
383
- string
396
+ String
384
397
</td >
385
398
<td >
386
- content of post as text
399
+ Content of post as text
387
400
</td >
388
401
</tr >
389
402
@@ -392,7 +405,7 @@ content of post as text
392
405
video
393
406
</td >
394
407
<td >
395
- string
408
+ String
396
409
</td >
397
410
<td >
398
411
URL of video present in that post
@@ -405,10 +418,10 @@ URL of video present in that post
405
418
image
406
419
</td >
407
420
<td >
408
- list
421
+ List
409
422
</td >
410
423
<td >
411
- python's list containing URLs of all images present in the post
424
+ List containing URLs of all images present in the post
412
425
</td >
413
426
</tr >
414
427
@@ -417,10 +430,10 @@ python's list containing URLs of all images present in the post
417
430
posted_on
418
431
</td >
419
432
<td >
420
- datetime
433
+ Datetime
421
434
</td >
422
435
<td >
423
- time at which post was posted(in ISO 8601 format)
436
+ Time at which post was posted(in ISO 8601 format)
424
437
</td >
425
438
</tr >
426
439
@@ -429,7 +442,7 @@ time at which post was posted(in ISO 8601 format)
429
442
post_url
430
443
</td >
431
444
<td >
432
- string
445
+ String
433
446
</td >
434
447
<td >
435
448
URL for that post
@@ -449,9 +462,10 @@ URL for that post
449
462
<h2 > Tech </h2 >
450
463
<p >This project uses different libraries to work properly.</p >
451
464
<ul >
452
- <li > <a href =" https://www.selenium.dev/ " target =' _blank ' >selenium</a >
453
- <li > <a href =" https://pypi.org/project/webdriver-manager/ " target =' _blank ' >webdriver manager</a >
454
- <li > <a href =" https://pypi.org/project/python-dateutil/ " target =' _blank ' >python dateutil</a >
465
+ <li > <a href =" https://www.selenium.dev/ " target =' _blank ' >Selenium</a ></li >
466
+ <li > <a href =" https://pypi.org/project/webdriver-manager/ " target =' _blank ' >Webdriver Manager</a ></li >
467
+ <li > <a href =" https://pypi.org/project/python-dateutil/ " target =' _blank ' >Python Dateutil</a ></li >
468
+ <li > <a href =" https://pypi.org/project/selenium-wire/ " target =' _blank ' >Selenium-wire</a ></li >
455
469
</ul >
456
470
<br >
457
471
0 commit comments