@@ -81,13 +81,13 @@ class crawler {
81
81
* @return mixed hash-like object or default array $defaults if no config found.
82
82
*/
83
83
public static function get_config () {
84
- $ defaults = array (
84
+ $ defaults = [
85
85
'crawlstart ' => 0 ,
86
86
'crawlend ' => 0 ,
87
87
'crawltick ' => 0 ,
88
88
'retentionperiod ' => 86400 , // 1 week.
89
- 'recentactivity ' => 1
90
- ) ;
89
+ 'recentactivity ' => 1 ,
90
+ ] ;
91
91
$ config = (object ) array_merge ( $ defaults , (array ) get_config ('tool_crawler ' ) );
92
92
return $ config ;
93
93
}
@@ -105,7 +105,7 @@ public function is_bot_valid() {
105
105
if (!$ botusername ) {
106
106
return get_string ('configmissing ' , 'tool_crawler ' );
107
107
}
108
- $ botuser = $ DB ->get_record ('user ' , array ( 'username ' => $ botusername) );
108
+ $ botuser = $ DB ->get_record ('user ' , [ 'username ' => $ botusername] );
109
109
if ( !$ botuser ) {
110
110
return get_string ('botusermissing ' , 'tool_crawler ' ) .
111
111
' <a href="?action=makebot"> ' . get_string ('autocreate ' , 'tool_crawler ' ) . '</a> ' ;
@@ -118,14 +118,14 @@ public function is_bot_valid() {
118
118
}
119
119
if ($ result ->redirect ) {
120
120
return get_string ('bottestpageredirected ' , 'tool_crawler ' ,
121
- array ( 'resredirect ' => htmlspecialchars ($ result ->redirect , ENT_NOQUOTES | ENT_HTML401 )) );
121
+ [ 'resredirect ' => htmlspecialchars ($ result ->redirect , ENT_NOQUOTES | ENT_HTML401 )] );
122
122
}
123
123
124
124
// When the bot successfully scraped the test page (see above), it was logged in and used its own language. So we have to
125
125
// retrieve the expected string in the language set for the _crawler user_, and not in the _current user’s_ language.
126
126
$ oldforcelang = force_current_language ($ botuser ->lang );
127
127
$ expectedcontent = get_string ('hellorobot ' , 'tool_crawler ' ,
128
- array ( 'botusername ' => self ::get_config ()->botusername ) );
128
+ [ 'botusername ' => self ::get_config ()->botusername ] );
129
129
force_current_language ($ oldforcelang );
130
130
131
131
$ hello = strpos ($ result ->contents , $ expectedcontent );
@@ -144,11 +144,11 @@ public function auto_create_bot() {
144
144
// TODO roles?
145
145
146
146
$ botusername = self ::get_config ()->botusername ;
147
- $ botuser = $ DB ->get_record ('user ' , array ( 'username ' => $ botusername) );
147
+ $ botuser = $ DB ->get_record ('user ' , [ 'username ' => $ botusername] );
148
148
if ($ botuser ) {
149
149
return $ botuser ;
150
150
} else {
151
- $ botuser = (object ) array () ;
151
+ $ botuser = (object ) [] ;
152
152
$ botuser ->username = $ botusername ;
153
153
$ botuser ->password = hash_internal_user_password (self ::get_config ()->botpassword );
154
154
$ botuser ->firstname = 'Link checker ' ;
@@ -217,7 +217,7 @@ public function absolute_url($base, $rel) {
217
217
}
218
218
219
219
// Replace '//' or '/./' or '/foo/../' with '/' */.
220
- $ re = array ( '#(/\.?/)# ' , '#/(?!\.\.)[^/]+/\.\./# ' ) ;
220
+ $ re = [ '#(/\.?/)# ' , '#/(?!\.\.)[^/]+/\.\./# ' ] ;
221
221
do {
222
222
$ abs = preg_replace ($ re , '/ ' , $ abs , -1 , $ n );
223
223
} while ($ n > 0 );
@@ -341,13 +341,13 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
341
341
// then avoid scraping the URL at all, if it has been excluded.
342
342
$ shortname = '' ;
343
343
if (preg_match ('/\/course\/(info|view).php\?id=(\d+)/ ' , $ url , $ matches ) ) {
344
- $ course = $ DB ->get_record ('course ' , array ( 'id ' => $ matches [2 ]) );
344
+ $ course = $ DB ->get_record ('course ' , [ 'id ' => $ matches [2 ]] );
345
345
if ($ course ) {
346
346
$ shortname = $ course ->shortname ;
347
347
}
348
348
}
349
349
if (preg_match ('/\/enrol\/index.php\?id=(\d+)/ ' , $ url , $ matches ) ) {
350
- $ course = $ DB ->get_record ('course ' , array ( 'id ' => $ matches [1 ]) );
350
+ $ course = $ DB ->get_record ('course ' , [ 'id ' => $ matches [1 ]] );
351
351
if ($ course ) {
352
352
$ shortname = $ course ->shortname ;
353
353
}
@@ -358,13 +358,13 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
358
358
c.shortname
359
359
FROM {course_modules} cm
360
360
JOIN {course} c ON cm.course = c.id
361
- WHERE cm.id = ? " , array ( $ matches [3 ]) );
361
+ WHERE cm.id = ? " , [ $ matches [3 ]] );
362
362
if ($ cm ) {
363
363
$ shortname = $ cm ->shortname ;
364
364
}
365
365
}
366
366
if (preg_match ('/\/course\/(.*?)\// ' , $ url , $ matches ) ) {
367
- $ course = $ DB ->get_record ('course ' , array ( 'shortname ' => $ matches [1 ]) );
367
+ $ course = $ DB ->get_record ('course ' , [ 'shortname ' => $ matches [1 ]] );
368
368
if ($ course ) {
369
369
$ shortname = $ course ->shortname ;
370
370
}
@@ -383,7 +383,7 @@ public function mark_for_crawl($baseurl, $url, $courseid = null, $priority = TOO
383
383
384
384
if (!$ node ) {
385
385
// If not in the queue then add it.
386
- $ node = (object ) array () ;
386
+ $ node = (object ) [] ;
387
387
$ node ->timecreated = time ();
388
388
$ node ->url = $ url ;
389
389
$ node ->externalurl = self ::is_external ($ url );
@@ -439,7 +439,7 @@ public function get_num_links() {
439
439
SELECT COUNT(*)
440
440
FROM {tool_crawler_edge}
441
441
WHERE lastmod >= ? " ,
442
- array ( self ::get_config ()->crawlstart ) );
442
+ [ self ::get_config ()->crawlstart ] );
443
443
}
444
444
445
445
/**
@@ -503,7 +503,7 @@ public function get_old_queue_size() {
503
503
SELECT COUNT(*)
504
504
FROM {tool_crawler_url}
505
505
WHERE lastcrawled < ? " ,
506
- array ( self ::get_config ()->crawlstart ) );
506
+ [ self ::get_config ()->crawlstart ] );
507
507
}
508
508
509
509
/**
@@ -712,7 +712,7 @@ protected static function clean_html_node_content($node) {
712
712
713
713
$ elementname = mb_strtolower ($ node ->tag , 'UTF-8 ' );
714
714
715
- $ ignoredelements = array ( 'script ' , 'style ' ) ;
715
+ $ ignoredelements = [ 'script ' , 'style ' ] ;
716
716
if (in_array ($ elementname , $ ignoredelements )) {
717
717
return '' ;
718
718
} else if ($ elementname == 'img ' ) {
@@ -833,7 +833,7 @@ public function parse_html($node, $external, $verbose = false) {
833
833
}
834
834
835
835
// Finds each link in the html and adds to database.
836
- $ seen = array () ;
836
+ $ seen = [] ;
837
837
838
838
$ links = $ html ->find ('a[href] ' );
839
839
foreach ($ links as $ e ) {
@@ -910,7 +910,7 @@ private function link_from_node_to_url($from, $url, $text, $idattr) {
910
910
}
911
911
912
912
// For this link, insert or update with the current time for last modified.
913
- $ link = $ DB ->get_record ('tool_crawler_edge ' , array ( 'a ' => $ from ->id , 'b ' => $ to ->id ) );
913
+ $ link = $ DB ->get_record ('tool_crawler_edge ' , [ 'a ' => $ from ->id , 'b ' => $ to ->id ] );
914
914
if (!$ link ) {
915
915
$ link = new \stdClass ();
916
916
$ link ->a = $ from ->id ;
@@ -1136,7 +1136,7 @@ public function scrape($url) {
1136
1136
$ sizelimit = TOOL_CRAWLER_REDIRECTION_DOWNLOAD_LIMIT ; // Assume at first that we will be redirected.
1137
1137
$ abortdownload = false ;
1138
1138
1139
- $ chunks = array () ;
1139
+ $ chunks = [] ;
1140
1140
$ targetisexternal = null ; // Cache for whether target resource is external.
1141
1141
$ targetishtml = null ; // Cache for whether target resource is an HTML document.
1142
1142
$ targetlengthknown = null ; // Cache for whether target resource length is known.
@@ -1288,7 +1288,7 @@ public function scrape($url) {
1288
1288
$ method = 'GET ' ;
1289
1289
}
1290
1290
1291
- $ result = (object ) array () ;
1291
+ $ result = (object ) [] ;
1292
1292
$ result ->url = $ url ;
1293
1293
1294
1294
$ needhttprequest = true ; // Whether we have to send (a further) HTTP request.
@@ -1401,7 +1401,7 @@ public function scrape($url) {
1401
1401
$ method = 'GET ' ;
1402
1402
1403
1403
$ sizelimit = TOOL_CRAWLER_REDIRECTION_DOWNLOAD_LIMIT ; // Assume at first that we will be redirected.
1404
- $ chunks = array () ;
1404
+ $ chunks = [] ;
1405
1405
$ firstheaderline = true ;
1406
1406
$ headersize = 0 ;
1407
1407
$ targetisexternal = null ;
@@ -1515,7 +1515,7 @@ public function get_recentcourses() {
1515
1515
1516
1516
// Do not try to fetch recent courses if uselogs setting is not enabled.
1517
1517
if ($ config ->uselogs == false ) {
1518
- return array () ;
1518
+ return [] ;
1519
1519
}
1520
1520
1521
1521
$ startingtimerecentactivity = strtotime ("- $ config ->recentactivity days " , time ());
0 commit comments