|
| 1 | +#!/usr/bin/env bash |
| 2 | + |
| 3 | +# Estrae lo score da @score: X |
| 4 | +get_score() { |
| 5 | + local file="$1" |
| 6 | + local line |
| 7 | + line=$(grep -m1 -E '@score: *([0-9]+(\.[0-9]+)?)' "$file") |
| 8 | + if [[ $line =~ ([0-9]+(\.[0-9]+)?) ]]; then |
| 9 | + echo "${BASH_REMATCH[1]}" |
| 10 | + else |
| 11 | + echo "" |
| 12 | + fi |
| 13 | +} |
| 14 | + |
| 15 | +if [[ $# -eq 0 ]]; then |
| 16 | + echo "Usage: $0 path1 [path2 ...]" |
| 17 | + exit 1 |
| 18 | +fi |
| 19 | + |
| 20 | +# Trova tutti i file |
| 21 | +mapfile -t files < <(find "$@" -type f) |
| 22 | + |
| 23 | +N=${#files[@]} |
| 24 | +if (( N == 0 )); then |
| 25 | + echo "No files found." |
| 26 | + exit 1 |
| 27 | +fi |
| 28 | + |
| 29 | +# Analizza tutti i file |
| 30 | +declare -a scores |
| 31 | +declare -a paths |
| 32 | +declare -a mtimes |
| 33 | +sum=0 |
| 34 | +file_missing_score="" |
| 35 | +oldest_mtime=9999999999 |
| 36 | +min_score_file="" |
| 37 | +min_score=9999 |
| 38 | + |
| 39 | +for f in "${files[@]}"; do |
| 40 | + score=$(get_score "$f") |
| 41 | + mtime=$(stat -c "%Y" "$f") # Epoch time |
| 42 | + |
| 43 | + paths+=("$f") |
| 44 | + mtimes+=("$mtime") |
| 45 | + |
| 46 | + if [[ -z $score ]]; then |
| 47 | + score_val=0 |
| 48 | + else |
| 49 | + score_val=$score |
| 50 | + fi |
| 51 | + |
| 52 | + scores+=("$score_val") |
| 53 | + sum=$(awk -v a="$sum" -v b="$score_val" 'BEGIN{print a + b}') |
| 54 | + |
| 55 | + # Trova file più vecchio non ancora valutato |
| 56 | + if [[ -z $score && $mtime -lt $oldest_mtime ]]; then |
| 57 | + oldest_mtime=$mtime |
| 58 | + file_missing_score="$f" |
| 59 | + fi |
| 60 | + |
| 61 | + # Trova file con punteggio minimo (anche 0) |
| 62 | + if awk -v s="$score_val" -v min="$min_score" 'BEGIN{exit !(s < min)}'; then |
| 63 | + min_score=$score_val |
| 64 | + min_score_file="$f" |
| 65 | + fi |
| 66 | +done |
| 67 | + |
| 68 | +# Calcola media |
| 69 | +mean=$(awk -v s="$sum" -v n="$N" 'BEGIN{if(n==0) print 0; else printf "%.6f", s/n}') |
| 70 | + |
| 71 | +# Conta quanti sopra la media |
| 72 | +above=0 |
| 73 | +for s in "${scores[@]}"; do |
| 74 | + awk -v val="$s" -v mu="$mean" 'BEGIN{if(val >= mu) exit 0; exit 1}' && ((above++)) |
| 75 | +done |
| 76 | +p=$(awk -v a="$above" -v n="$N" 'BEGIN{printf "%.6f", a/n}') |
| 77 | + |
| 78 | +# Deviazione standard |
| 79 | +sum_sq=0 |
| 80 | +for s in "${scores[@]}"; do |
| 81 | + diff=$(awk -v val="$s" -v mu="$mean" 'BEGIN{print val - mu}') |
| 82 | + sq=$(awk -v d="$diff" 'BEGIN{print d*d}') |
| 83 | + sum_sq=$(awk -v sum="$sum_sq" -v sq="$sq" 'BEGIN{print sum + sq}') |
| 84 | +done |
| 85 | +sigma=$(awk -v sum_sq="$sum_sq" -v n="$N" 'BEGIN{printf "%.6f", sqrt(sum_sq/n)}') |
| 86 | + |
| 87 | +# Varianza normalizzata |
| 88 | +var_norm=$(awk -v sigma="$sigma" -v mean="$mean" 'BEGIN{ |
| 89 | + if (mean == 0) print 0; |
| 90 | + else { |
| 91 | + v = 1 - (sigma / mean); |
| 92 | + if (v < 0) v = 0; |
| 93 | + printf "%.6f", v; |
| 94 | + } |
| 95 | +}') |
| 96 | + |
| 97 | +# CML |
| 98 | +CML=$(awk -v p="$p" -v v="$var_norm" 'BEGIN{printf "%.6f", p * v}') |
| 99 | + |
| 100 | +# Output CML |
| 101 | +echo "Files analyzed: $N" |
| 102 | +echo "Mean score: $mean" |
| 103 | +echo "Standard deviation: $sigma" |
| 104 | +echo "Files >= mean: $above / $N" |
| 105 | +echo "Percentage above mean: $(awk -v p="$p" 'BEGIN{printf "%.2f%%", p*100}')" |
| 106 | +echo "Normalized variance: $var_norm" |
| 107 | +echo "CML Score: $(awk -v cml="$CML" 'BEGIN{printf "%.2f%%", cml*100}')" |
| 108 | +echo |
| 109 | + |
| 110 | +# Suggerimento per azione |
| 111 | +if [[ -n $file_missing_score ]]; then |
| 112 | + echo "➡️ Next file to score (oldest without score): $file_missing_score" |
| 113 | +else |
| 114 | + echo "⚠️ All files have scores. Lowest scored file: $min_score_file (score: $min_score)" |
| 115 | +fi |
0 commit comments