Skip to content

Commit 611c9d8

Browse files
committed
Merge MOODLE_310_STABLE branch
1 parent 9226076 commit 611c9d8

File tree

15 files changed

+126
-152
lines changed

15 files changed

+126
-152
lines changed

.github/workflows/ci.yml

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
# .github/workflows/ci.yml
2+
name: ci
3+
4+
on: [push, pull_request]
5+
6+
jobs:
7+
ci:
8+
uses: catalyst/catalyst-moodle-workflows/.github/workflows/ci.yml@main
9+
secrets:
10+
moodle_org_token: ${{ secrets.MOODLE_ORG_TOKEN }}
11+
with:
12+
disable_behat: true

.travis.yml

Lines changed: 0 additions & 114 deletions
This file was deleted.

README.md

Lines changed: 12 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,6 @@
1-
[![Build Status](https://travis-ci.org/catalyst/moodle-tool_crawler.svg?branch=master)](https://travis-ci.org/catalyst/moodle-tool_crawler)
1+
[![ci](https://github.com/catalyst/moodle-tool_crawler/actions/workflows/ci.yml/badge.svg?branch=MOODLE_310_STABLE)](https://github.com/catalyst/moodle-tool_crawler/actions/workflows/ci.yml?branch=MOODLE_310_STABLE)
2+
3+
# moodle-tool_crawler
24

35
* [What is this?](#what-is-this)
46
* [How does it work?](#how-does-it-work)
@@ -30,23 +32,24 @@ Since the plugin cronjob comes in from outside it needs to authenticate in Moodl
3032

3133
# Branches
3234

33-
| Moodle verion | Branch |
34-
| ----------------- | ----------- |
35-
| Moodle 3.4 to 3.8 | master |
36-
| Totara 12+ | master |
35+
| Moodle version | Branch |
36+
| ----------------- | --------------------- |
37+
| Moodle 3.10+ | MOODLE_310_STABLE |
38+
| Moodle 3.4 to 3.9 | master |
39+
| Totara 12+ | master |
3740

3841
# Installation
3942

4043
The plugin has a dependency on the [moodle-auth_basic](https://moodle.org/plugins/auth_basic).
4144
To install the dependency plugin as a git submodule:
4245
```
43-
git submodule add https://github.com/catalyst/moodle-auth_basic auth/basic
46+
git submodule add git@github.com:catalyst/moodle-auth_basic.git auth/basic
4447
```
4548

4649

4750
Install plugin moodle-tool_crawler as a git submodule:
4851
```
49-
git submodule add https://github.com/central-queensland-uni/moodle-tool_crawler.git admin/tool/crawler
52+
git submodule add git@github.com:catalyst/moodle-tool_crawler.git admin/tool/crawler
5053
```
5154
# Configuration
5255

@@ -156,7 +159,7 @@ be able to see the line "You are logged in as ".
156159
Once Basic HTTP auth works test running the robot task from the CLI:
157160

158161
```
159-
php admin/tool/task/cli/schedule_task.php --execute='\tool_crawler\task\crawl_task'
162+
php admin/cli/scheduled_task.php --execute='\tool_crawler\task\crawl_task'
160163
Execute scheduled task: Parallel crawling task (tool_crawler\task\crawl_task)
161164
... used 22 dbqueries
162165
... used 0.039698123931885 seconds
@@ -168,7 +171,7 @@ will run in parallel, depending on the crawl_task setting.
168171

169172
You can manually run the adhoc tasks from the CLI with:
170173
```
171-
php admin/tool/task/cli/adhoc_task.php --execute
174+
php admin/cli/adhoc_task.php --execute
172175
Execute adhoc task: tool_crawler\task\adhoc_crawl_task
173176
... used 5733 dbqueries
174177
... used 58.239180088043 seconds

classes/robot/crawler.php

Lines changed: 21 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -529,6 +529,7 @@ public function process_queue($verbose = false) {
529529
// Iterate through the queue.
530530
$cronstart = time();
531531
$cronstop = $cronstart + $config->maxcrontime;
532+
$hastime = true;
532533

533534
// Get an instance of the currently configured lock_factory.
534535
$lockfactory = \core\lock\lock_config::get_lock_factory('tool_crawler_process_queue');
@@ -550,7 +551,16 @@ public function process_queue($verbose = false) {
550551
}
551552
}
552553
// While we are not exceeding the maxcron time, and the queue is not empty.
553-
while (time() < $cronstop) {
554+
while ($hastime) {
555+
556+
if (\core\local\cli\shutdown::should_gracefully_exit() ||
557+
\core\task\manager::static_caches_cleared_since($cronstart)) {
558+
if ($verbose) {
559+
echo "Shutting down crawler early\n";
560+
}
561+
return true;
562+
}
563+
554564
if (empty($nodes)) {
555565
// Grab a list of items from the front of the queue. We need the first 1000
556566
// in case other workers are already locked and processing items at the front of the queue.
@@ -625,6 +635,8 @@ public function process_queue($verbose = false) {
625635
} finally {
626636
$lock->release();
627637
}
638+
639+
$hastime = time() < $cronstop;
628640
}
629641
if ($courselock) {
630642
$courselock->release();
@@ -906,8 +918,9 @@ public function parse_html($node, $external, $verbose = false) {
906918
} while ($walk);
907919

908920
$text = self::clean_html_node_content($e);
921+
$text = trim($text);
909922
if ($verbose > 1) {
910-
printf (" - Found link to: %-20s / %-50s => %-50s\n", $text, $e->href, $href);
923+
printf (" - Found link to: %-30s -> %s\n", "'$text'", $href);
911924
}
912925
$this->link_from_node_to_url($node, $href, $text, $idattr);
913926
}
@@ -1134,7 +1147,12 @@ private static function determine_filesize($curlhandle, $method, $success, $body
11341147
public function scrape($url) {
11351148

11361149
global $CFG;
1137-
$cookiefilelocation = $CFG->dataroot . '/tool_crawler_cookies.txt';
1150+
1151+
static $cookiefilelocation = '';
1152+
if (!$cookiefilelocation) {
1153+
$cookiefilelocation = make_request_directory() . '/tool_crawler_cookies.txt';
1154+
}
1155+
11381156
$config = self::get_config();
11391157

11401158
$version = moodle_major_version();

cli/crawler.php

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,8 @@
5252
die();
5353
}
5454

55+
\core\local\cli\shutdown::script_supports_graceful_exit();
56+
5557
tool_crawler_crawl($options['verbose']);
5658
exit(0);
5759

db/uninstall.php

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
* Link checker robot plugin uninstall script.
1919
*
2020
* @package tool_crawler
21+
* @copyright 2019 Nicolas Roeser
2122
* @license http://www.gnu.org/copyleft/gpl.html GNU GPL v3 or later
2223
*/
2324

lang/en/tool_crawler.php

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,11 @@
8080
$string['crawlend'] = 'Crawl end';
8181
$string['crawlstart'] = 'Crawl start';
8282
$string['cronticks'] = 'Cron ticks';
83+
$string['debugging'] = 'Verbose debugging';
84+
$string['debugoff'] = 'Debugging off';
85+
$string['debugnormal'] = 'Normal debugging';
86+
$string['debugverbose'] = 'Verbose debugging';
87+
$string['debuggingdesc'] = 'This turns on debugging in the task output';
8388
$string['disablebot'] = 'Disable the link crawler robot';
8489
$string['disablebotdesc'] = 'Make the crawler do nothing when a scheduled task is executed. This effectively prevents crawling of links and running of bot cleanup functions. Intended to deactivate or temporarily pause the crawler without having to disable all its scheduled tasks.';
8590
$string['duration'] = 'Duration';

lib.php

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,11 @@ function tool_crawler_crawl($verbose = false) {
4545
$robot = new crawler();
4646
$url = new url();
4747
$config = $robot::get_config();
48+
49+
if ($config->debugging) {
50+
$verbose = $config->debugging;
51+
}
52+
4853
$crawlstart = $config->crawlstart;
4954
$crawlend = $config->crawlend;
5055

@@ -179,7 +184,7 @@ function tool_crawler_extend_navigation_course($navigation, $course, $coursecont
179184
if ($coursereports && ($siteconfig || $courseconfig)) {
180185
$node = $coursereports->add(
181186
get_string('pluginname', 'tool_crawler'),
182-
null,
187+
new moodle_url('/admin/tool/crawler/report.php', array('report' => 'queued', 'course' => $course->id)),
183188
navigation_node::TYPE_CONTAINER,
184189
null,
185190
'linkchecker',

locallib.php

Lines changed: 11 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -45,14 +45,17 @@
4545
* @return string HTML snippet which can be used in output.
4646
*/
4747
function tool_crawler_link($url, $label, $redirect = '', $labelishtml = false, $courseid = 0) {
48+
if (empty($label)) {
49+
// Ensure that label is always at least a string.
50+
$label = '';
51+
}
4852
if (!$labelishtml) {
4953
$label = htmlspecialchars($label, ENT_NOQUOTES | ENT_HTML401);
5054
}
5155

52-
$html = html_writer::link(new moodle_url('url.php', array('courseid' => $courseid, 'url' => $url)), $label) .
53-
' ' .
54-
html_writer::link($url, '', array('target' => 'link')) .
55-
'<br><small>' . htmlspecialchars($url, ENT_NOQUOTES | ENT_HTML401) . '</small>';
56+
$canviewsitelevelreports = has_capability('moodle/site:config', context_system::instance());
57+
$html = $canviewsitelevelreports ? html_writer::link(new moodle_url('url.php', array('courseid' => $courseid, 'url' => $url)), $label) : $label;
58+
$html .= '<br><small>' . html_writer::link($url, htmlspecialchars($url, ENT_NOQUOTES | ENT_HTML401), ['target' => 'link']) . '</small>';
5659

5760
if ($redirect) {
5861
$linkhtmlsnippet = html_writer::link($redirect, htmlspecialchars($redirect, ENT_NOQUOTES | ENT_HTML401));
@@ -175,11 +178,11 @@ function tool_crawler_sql_oversize_filter($tablealias = null) {
175178
$tbl = '';
176179
}
177180

178-
$where = "( ${tbl}filesize > ?
179-
OR ( ${tbl}filesize IS NULL
180-
AND ${tbl}lastcrawled IS NOT NULL
181+
$where = "( {$tbl}filesize > ?
182+
OR ( {$tbl}filesize IS NULL
183+
AND {$tbl}lastcrawled IS NOT NULL
181184
)
182-
OR ${tbl}filesizestatus = ?
185+
OR {$tbl}filesizestatus = ?
183186
)";
184187

185188
$bigfilesize = get_config('tool_crawler', 'bigfilesize');

report.php

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -54,10 +54,13 @@
5454
$coursecontext = context_course::instance($courseid);
5555
require_capability('moodle/course:update', $coursecontext);
5656

57+
$coursename = format_string($course->fullname, true, array('context' => $coursecontext));
5758
$PAGE->set_context($coursecontext);
5859
$PAGE->set_url($navurl);
59-
$PAGE->set_pagelayout('admin');
6060
$PAGE->set_title( get_string($report, 'tool_crawler') );
61+
$PAGE->set_heading($coursename);
62+
$PAGE->set_pagelayout('incourse');
63+
$PAGE->add_body_class('limitedwidth');
6164
$sqlfilter = ' AND c.id = '.$courseid;
6265

6366
} else {

0 commit comments

Comments
 (0)