Skip to content

Commit d5d725f

Browse files
authored
Merge pull request #204 from catalyst/coursemode-merge-310
Merge MOODLE_310_STABLE branch
2 parents 9226076 + cbae8b5 commit d5d725f

File tree

18 files changed

+149
-170
lines changed

18 files changed

+149
-170
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# .github/workflows/ci.yml
2+
name: ci
3+
4+
on: [push, pull_request]
5+
6+
jobs:
7+
ci:
8+
uses: catalyst/catalyst-moodle-workflows/.github/workflows/ci.yml@main
9+
secrets:
10+
moodle_org_token: ${{ secrets.MOODLE_ORG_TOKEN }}
11+
with:
12+
disable_behat: true

.travis.yml

Lines changed: 0 additions & 114 deletions
This file was deleted.

README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
[![Build Status](https://travis-ci.org/catalyst/moodle-tool_crawler.svg?branch=master)](https://travis-ci.org/catalyst/moodle-tool_crawler)
1+
[![ci](https://github.com/catalyst/moodle-tool_crawler/actions/workflows/ci.yml/badge.svg?branch=MOODLE_310_STABLE)](https://github.com/catalyst/moodle-tool_crawler/actions/workflows/ci.yml?branch=MOODLE_310_STABLE)
2+
3+
# moodle-tool_crawler
24

35
* [What is this?](#what-is-this)
46
* [How does it work?](#how-does-it-work)
@@ -30,23 +32,24 @@ Since the plugin cronjob comes in from outside it needs to authenticate in Moodl
3032

3133
# Branches
3234

33-
| Moodle verion | Branch |
34-
| ----------------- | ----------- |
35-
| Moodle 3.4 to 3.8 | master |
36-
| Totara 12+ | master |
35+
| Moodle version | Branch |
36+
| ----------------- | --------------------- |
37+
| Moodle 3.10+ | MOODLE_310_STABLE |
38+
| Moodle 3.4 to 3.9 | master |
39+
| Totara 12+ | master |
3740

3841
# Installation
3942

4043
The plugin has a dependency on the [moodle-auth_basic](https://moodle.org/plugins/auth_basic).
4144
To install the dependency plugin as a git submodule:
4245
```
43-
git submodule add https://github.com/catalyst/moodle-auth_basic auth/basic
46+
git submodule add git@github.com:catalyst/moodle-auth_basic.git auth/basic
4447
```
4548

4649

4750
Install plugin moodle-tool_crawler as a git submodule:
4851
```
49-
git submodule add https://github.com/central-queensland-uni/moodle-tool_crawler.git admin/tool/crawler
52+
git submodule add git@github.com:catalyst/moodle-tool_crawler.git admin/tool/crawler
5053
```
5154
# Configuration
5255

@@ -156,7 +159,7 @@ be able to see the line "You are logged in as ".
156159
Once Basic HTTP auth works test running the robot task from the CLI:
157160

158161
```
159-
php admin/tool/task/cli/schedule_task.php --execute='\tool_crawler\task\crawl_task'
162+
php admin/cli/scheduled_task.php --execute='\tool_crawler\task\crawl_task'
160163
Execute scheduled task: Parallel crawling task (tool_crawler\task\crawl_task)
161164
... used 22 dbqueries
162165
... used 0.039698123931885 seconds
@@ -168,7 +171,7 @@ will run in parallel, depending on the crawl_task setting.
168171

169172
You can manually run the adhoc tasks from the CLI with:
170173
```
171-
php admin/tool/task/cli/adhoc_task.php --execute
174+
php admin/cli/adhoc_task.php --execute
172175
Execute adhoc task: tool_crawler\task\adhoc_crawl_task
173176
... used 5733 dbqueries
174177
... used 58.239180088043 seconds

classes/helper.php

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -252,18 +252,18 @@ public static function send_email($courseid) {
252252
/**
253253
* Count broken links
254254
*
255-
* @param $courseid
255+
* @param int $courseid
256256
* @throws \dml_exception
257257
*/
258-
public static function count_broken_links($courseid) {
258+
public static function count_broken_links(int $courseid) {
259259
global $DB;
260260
$sql = "SELECT count(1) AS count
261261
FROM {tool_crawler_url} b
262262
LEFT JOIN {tool_crawler_edge} l ON l.b = b.id
263263
LEFT JOIN {tool_crawler_url} a ON l.a = a.id
264264
LEFT JOIN {course} c ON c.id = a.courseid
265-
WHERE b.httpcode != '200' AND c.id = $courseid";
266-
return $DB->count_records_sql($sql);
265+
WHERE b.httpcode != '200' AND c.id = :courseid";
266+
return $DB->count_records_sql($sql, ['courseid'=> $courseid]);
267267
}
268268

269269
}

classes/robot/crawler.php

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ public function process_queue($verbose = false) {
529529
// Iterate through the queue.
530530
$cronstart = time();
531531
$cronstop = $cronstart + $config->maxcrontime;
532+
$hastime = true;
532533

533534
// Get an instance of the currently configured lock_factory.
534535
$lockfactory = \core\lock\lock_config::get_lock_factory('tool_crawler_process_queue');
@@ -550,7 +551,16 @@ public function process_queue($verbose = false) {
550551
}
551552
}
552553
// While we are not exceeding the maxcron time, and the queue is not empty.
553-
while (time() < $cronstop) {
554+
while ($hastime) {
555+
556+
if (\core\local\cli\shutdown::should_gracefully_exit() ||
557+
\core\task\manager::static_caches_cleared_since($cronstart)) {
558+
if ($verbose) {
559+
echo "Shutting down crawler early\n";
560+
}
561+
return true;
562+
}
563+
554564
if (empty($nodes)) {
555565
// Grab a list of items from the front of the queue. We need the first 1000
556566
// in case other workers are already locked and processing items at the front of the queue.
@@ -625,6 +635,8 @@ public function process_queue($verbose = false) {
625635
} finally {
626636
$lock->release();
627637
}
638+
639+
$hastime = time() < $cronstop;
628640
}
629641
if ($courselock) {
630642
$courselock->release();
@@ -906,8 +918,9 @@ public function parse_html($node, $external, $verbose = false) {
906918
} while ($walk);
907919

908920
$text = self::clean_html_node_content($e);
921+
$text = trim($text);
909922
if ($verbose > 1) {
910-
printf (" - Found link to: %-20s / %-50s => %-50s\n", $text, $e->href, $href);
923+
printf (" - Found link to: %-30s -> %s\n", "'$text'", $href);
911924
}
912925
$this->link_from_node_to_url($node, $href, $text, $idattr);
913926
}
@@ -1134,7 +1147,12 @@ private static function determine_filesize($curlhandle, $method, $success, $body
11341147
public function scrape($url) {
11351148

11361149
global $CFG;
1137-
$cookiefilelocation = $CFG->dataroot . '/tool_crawler_cookies.txt';
1150+
1151+
static $cookiefilelocation = '';
1152+
if (!$cookiefilelocation) {
1153+
$cookiefilelocation = make_request_directory() . '/tool_crawler_cookies.txt';
1154+
}
1155+
11381156
$config = self::get_config();
11391157

11401158
$version = moodle_major_version();

classes/table/course_links.php

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -30,19 +30,22 @@
3030
use tool_crawler\helper;
3131
use moodle_url;
3232
use html_writer;
33+
use stdClass;
3334

3435
class course_links extends table_sql implements renderable {
3536

3637
private $courseid;
38+
39+
private $page;
3740
/**
3841
* table constructor.
3942
*
40-
* @param $uniqueid table unique id
43+
* @param string $uniqueid table unique id
4144
* @param \moodle_url $url base url
45+
* @param int $courseid course id
4246
* @param int $page current page
4347
* @param int $perpage number of records per page
4448
* @throws \coding_exception
45-
* @throws \coding_exception
4649
*/
4750
public function __construct($uniqueid, \moodle_url $url, $courseid, $page = 0, $perpage = 20) {
4851
parent::__construct($uniqueid);
@@ -165,30 +168,30 @@ public function query_db($pagesize, $useinitialsbar = true) {
165168

166169
/**
167170
*
168-
* @param $row
171+
* @param stdClass $row
169172
* @return string
170173
*/
171-
protected function col_lastcrawledtime($row) {
174+
protected function col_lastcrawledtime(stdClass $row) {
172175
return userdate($row->lastcrawled);
173176
}
174177

175178
/**
176179
*
177-
* @param $row
178-
* @return string
180+
* @param stdClass $row
181+
* @return stdClass $row
179182
* @throws \coding_exception
180183
*/
181-
protected function col_priority($row) {
184+
protected function col_priority(stdClass $row) {
182185
return tool_crawler_priority_level($row->priority);
183186
}
184187

185188
/**
186189
*
187-
* @param $row
190+
* @param stdClass $row
188191
* @return mixed
189192
* @throws \coding_exception
190193
*/
191-
protected function col_httpcode($row) {
194+
protected function col_httpcode(stdClass $row) {
192195
$text = tool_crawler_http_code($row);
193196
if ($translation = \tool_crawler\helper::translate_httpcode($row->httpcode)) {
194197
$text .= "<br/>" . $translation;
@@ -198,11 +201,11 @@ protected function col_httpcode($row) {
198201

199202
/**
200203
*
201-
* @param $row
204+
* @param stdClass $row
202205
* @return mixed
203206
* @throws \coding_exception
204207
*/
205-
protected function col_target($row) {
208+
protected function col_target(stdClass $row) {
206209
$text = trim($row->text);
207210
if ($text == "") {
208211
$text = get_string('missing', 'tool_crawler');
@@ -216,11 +219,11 @@ protected function col_target($row) {
216219

217220
/**
218221
*
219-
* @param $row
222+
* @param stdClass $row
220223
* @return mixed
221224
* @throws \coding_exception
222225
*/
223-
protected function col_url($row) {
226+
protected function col_url(stdClass $row) {
224227
return tool_crawler_link($row->url, $row->title, $row->redirect, false, $this->courseid);
225228
}
226229

cli/crawler.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
die();
5353
}
5454

55+
\core\local\cli\shutdown::script_supports_graceful_exit();
56+
5557
tool_crawler_crawl($options['verbose']);
5658
exit(0);
5759

db/uninstall.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
* Link checker robot plugin uninstall script.
1919
*
2020
* @package tool_crawler
21+
* @copyright 2019 Nicolas Roeser
2122
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
2223
*/
2324

lang/en/tool_crawler.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@
8080
$string['crawlend'] = 'Crawl end';
8181
$string['crawlstart'] = 'Crawl start';
8282
$string['cronticks'] = 'Cron ticks';
83+
$string['debugging'] = 'Verbose debugging';
84+
$string['debugoff'] = 'Debugging off';
85+
$string['debugnormal'] = 'Normal debugging';
86+
$string['debugverbose'] = 'Verbose debugging';
87+
$string['debuggingdesc'] = 'This turns on debugging in the task output';
8388
$string['disablebot'] = 'Disable the link crawler robot';
8489
$string['disablebotdesc'] = 'Make the crawler do nothing when a scheduled task is executed. This effectively prevents crawling of links and running of bot cleanup functions. Intended to deactivate or temporarily pause the crawler without having to disable all its scheduled tasks.';
8590
$string['duration'] = 'Duration';

0 commit comments

Comments
 (0)