File tree Expand file tree Collapse file tree 1 file changed +54
-0
lines changed
src/script/webgraph_ranking Expand file tree Collapse file tree 1 file changed +54
-0
lines changed Original file line number Diff line number Diff line change
1
+ #! /bin/bash
2
+
3
+ set -eo pipefail
4
+
5
+ NAME=" $1 "
6
+ TYPE=" ${2:- domain} "
7
+
8
+ if [ -z " $NAME " ]; then
9
+ echo " Usage: $( basename $0 ) <graph-name> [<type>]"
10
+ echo -e " \tgraph-name\tbase name of the webgraph (without the file suffix .graph)"
11
+ echo -e " \ttype\ttype (level) of the graph aggregation: domain (default) or host"
12
+ exit 1
13
+ fi
14
+
15
+ WG=$( dirname $0 ) /run_webgraph.sh
16
+
17
+ if [ -e $NAME .outdegrees ] && [ -e $NAME .indegrees ]; then
18
+ : # out/indegrees already done
19
+ else
20
+ $WG it.unimi.dsi.webgraph.Stats --save-degrees " $NAME "
21
+ fi
22
+
23
+
24
+ if [ " $TYPE " == " domain" ]; then
25
+ zcat $NAME -vertices.txt.gz
26
+ else
27
+ zcat vertices/* .txt.gz
28
+ fi \
29
+ | cut -f2- \
30
+ | paste $NAME .outdegrees $NAME .indegrees - \
31
+ | gzip > $NAME -outdegrees-indegrees.txt.gz
32
+
33
+
34
+ HEADER=" outdegree\tindegree\tname"
35
+ if [ " $TYPE " == " domain" ]; then
36
+ HEADER=" outdegree\tindegree\tname\tnumsubdomains"
37
+ fi
38
+
39
+ (echo -e " $HEADER " ;
40
+ set +o pipefail;
41
+ zcat $NAME -outdegrees-indegrees.txt.gz \
42
+ | perl -aF' \t' -lne ' print if $F[0] > 1000' \
43
+ | sort -k1,1nr \
44
+ | head -10000) \
45
+ | gzip > $NAME -outdegrees-indegrees-topout.txt.gz
46
+
47
+ (echo -e " $HEADER " ;
48
+ set +o pipefail;
49
+ zcat $NAME -outdegrees-indegrees.txt.gz \
50
+ | perl -aF' \t' -lne ' print if $F[1] > 1000' \
51
+ | sort -k2,2nr \
52
+ | head -10000) \
53
+ | gzip > $NAME -outdegrees-indegrees-topin.txt.gz
54
+
You can’t perform that action at this time.
0 commit comments