1
- __version__ = "1.0.5 "
1
+ __version__ = "1.0.6 "
2
2
3
3
import argparse
4
4
import logging
@@ -20,7 +20,7 @@ def version():
20
20
21
21
def main ():
22
22
# Define logger level helper
23
- switcher = {
23
+ logger_switcher = {
24
24
'critical' : 50 ,
25
25
'error' : 40 ,
26
26
'warning' : 30 ,
@@ -39,6 +39,18 @@ def str2bool(v):
39
39
else :
40
40
raise argparse .ArgumentTypeError ('Boolean value expected.' )
41
41
42
+ # boolean input helper for search_mode
43
+ def search_mode_helper (v ):
44
+ if isinstance (v , bool ):
45
+ return v
46
+ if v .lower () in ('and' , '&' , 't' , 'y' , '1' , 'true' ):
47
+ return True
48
+ elif v .lower () in ('or' , '|' , 'f' , 'n' , '0' , 'false' ):
49
+ return False
50
+ else :
51
+ raise argparse .ArgumentTypeError ('"and" or "or" value expected' )
52
+
53
+
42
54
# Parse arguments from command line
43
55
argumentParser = argparse .ArgumentParser ()
44
56
argumentParser .add_argument ('--path' , help = 'path to pdf folder or file' , default = "." )
@@ -52,13 +64,18 @@ def str2bool(v):
52
64
argumentParser .add_argument ('--search' , help = 'word to search for' , default = "default" )
53
65
argumentParser .add_argument ('--tessdata' , help = 'location of tesseract data files' , default = "/usr/share/tessdata" )
54
66
argumentParser .add_argument ('--tables' , type = str2bool , help = 'should tables be extracted and searched' , default = True )
67
+ # True -> and mode, False -> or mode
68
+ argumentParser .add_argument ('--search_mode' , type = search_mode_helper , help = 'And or Or search, when multiple '
69
+ 'search words are provided' ,
70
+ default = True )
55
71
56
72
args = vars (argumentParser .parse_args ())
57
73
output_path = args ["out" ]
58
- log_level = switcher .get (args ["log_level" ])
59
- searchWord = args ["search" ]
74
+ log_level = logger_switcher .get (args ["log_level" ])
75
+ search_word = args ["search" ]
60
76
tessdata_location = args ["tessdata" ]
61
77
tables_extract = args ["tables" ]
78
+ search_mode = args ["search_mode" ]
62
79
63
80
# Set up logger
64
81
logger = logging .getLogger (__name__ )
@@ -140,7 +157,9 @@ def signal_handler(sign, frame):
140
157
logger .debug (doc .text )
141
158
logger .info ('Done parsing PDFs' )
142
159
logger .info ('Stopping' )
143
- generate_html (output_path , docs , searchWord )
160
+ generate_html (output_path , docs , search_word , search_mode )
144
161
# clean up temporary directory
145
162
shutil .rmtree (tempfile .gettempdir () + "/PDFScraper" , ignore_errors = True )
146
163
sys .exit (0 )
164
+
165
+
0 commit comments