@@ -472,7 +472,7 @@ def pdf2docx(pdf_path: str, word_path: str, end_pages: int = None,
472
472
file_list = []
473
473
file = True
474
474
count = 0
475
- if os .path .isfile (pdf_path ) and os . path . isfile :
475
+ if os .path .isfile (pdf_path ):
476
476
file_list .append (pdf_path )
477
477
if need_log :
478
478
log ("转换文件{}开始" .format (pdf_path ))
@@ -484,16 +484,13 @@ def pdf2docx(pdf_path: str, word_path: str, end_pages: int = None,
484
484
for ele in file_list :
485
485
if ele .endswith (".pdf" ):
486
486
try :
487
- cv = Converter (ele )
487
+ cv = MyPdf2DocxConverter (ele )
488
488
if start_pages is None :
489
489
start_pages = 0
490
490
if end_pages is None :
491
491
end_pages = len (cv .pages )
492
- if not file :
493
- cv .convert (local_path_generate (word_path ), start = start_pages , end = end_pages )
494
- else :
495
- cv .convert (local_path_generate (word_path , suffix = ".docx" ), start = start_pages , end = end_pages )
496
- count = count + 1
492
+ cv .convert (local_path_generate (word_path , suffix = ".docx" ), start = start_pages , end = end_pages )
493
+ count = count + 1
497
494
log ("总计pdf文件个数{},已经完成{}" .format (len (file_list ), count ))
498
495
except Exception as e :
499
496
log (string = "转换失败文件{},{}" .format (ele , e ), print_file = sys .stderr )
@@ -691,11 +688,6 @@ def getSomePagesFromOnePDF(path: str, out_path: str, page_range: tuple or list,
691
688
iters = None
692
689
if type (page_range ) == tuple :
693
690
new_page_range = ()
694
- for k in page_range :
695
- '''@todo: 完善verify_rule()'''
696
- if not (0 <= k <= pdf_pages_len - 1 ):
697
- log (string = "范围参数有错" , print_file = sys .stderr )
698
- return False
699
691
if len (page_range ) == 0 :
700
692
log (string = "页码范围不明确,返回错误" , print_file = sys .stderr )
701
693
return False
@@ -707,6 +699,17 @@ def getSomePagesFromOnePDF(path: str, out_path: str, page_range: tuple or list,
707
699
new_page_range = (page_range [0 ], page_range [1 ])
708
700
else :
709
701
new_page_range = (page_range [0 ], page_range [1 ])
702
+ # check tuple legal or not
703
+ a = []
704
+ for k in new_page_range :
705
+ if k < 0 :
706
+ a .append (0 )
707
+ elif k > pdf_pages_len :
708
+ a .append (pdf_pages_len )
709
+ a .append (k )
710
+ new_page_range = (a [0 ], a [1 ])
711
+ if new_page_range [1 ] < new_page_range [0 ]:
712
+ new_page_range = (new_page_range [1 ], new_page_range [0 ])
710
713
iters = range (new_page_range [0 ], new_page_range [1 ])
711
714
else :
712
715
# 去重
@@ -809,7 +812,7 @@ def cooperatePdfWithLimit(files: list, page_range: tuple or list = None, out_pat
809
812
for p in range (len (out_path ) - 4 ):
810
813
temp .append (out_path [p ])
811
814
out_path = "" .join (temp )
812
- out_path = (out_path + group_id + ".pdf" ) if len (out_path ) != 0 else local_path_generate ("" )
815
+ out_path = (out_path + "-groupid-" + group_id + ".pdf" ) if len (out_path ) != 0 else local_path_generate ("" )
813
816
else :
814
817
out_path = out_path if len (out_path ) != 0 else local_path_generate ("" )
815
818
count = 0
@@ -837,12 +840,11 @@ def cooperatePdfWithLimit(files: list, page_range: tuple or list = None, out_pat
837
840
if len (page_range ) == 0 :
838
841
if need_log :
839
842
log ("默认全部合并,因为范围为空" )
840
- new_page_range [0 ] = 0
841
- new_page_range [1 ] = pdf_pages_len - 1
843
+ new_page_range = (0 , pdf_pages_len )
842
844
elif len (page_range ) == 1 :
843
845
if need_log :
844
846
log ("使用范围截取,但只有一个参数,结束参数默认为最大值" )
845
- new_page_range = (page_range [0 ], pdf_pages_len - 1 )
847
+ new_page_range = (page_range [0 ], pdf_pages_len )
846
848
elif len (page_range ) > 2 :
847
849
if need_log :
848
850
log ("使用范围参数,但参数数量过多,截取两个" )
@@ -911,13 +913,12 @@ def cooperatePdf(path: str, page_range: tuple or list = None, out_path: str = ""
911
913
:param need_group: 是否需要分组合并
912
914
:return:
913
915
"""
914
- need_log = need_log if not need_processbar else False
915
916
if len (path ) == 0 :
916
917
log ("给定路径为空,合并结束:{}" .format (path ))
917
918
elif os .path .isfile (path ):
918
919
log ("给定的是文件路径,合并结束:{}" .format (path ))
919
920
if page_range is None :
920
- page_range = []
921
+ page_range = ()
921
922
files = getAllFiles (path )
922
923
if need_group :
923
924
if need_processbar :
@@ -928,6 +929,8 @@ def cooperatePdf(path: str, page_range: tuple or list = None, out_path: str = ""
928
929
file_group = []
929
930
while i < len (files ):
930
931
if (i != 0 and ((i % group_number ) == 0 )) or (i == len (files ) - 1 ):
932
+ if i == len (files ) - 1 :
933
+ file_group .append (files [i ])
931
934
cooperatePdfWithLimit (file_group , page_range , out_path , need_log , timeout , str (group_id ))
932
935
group_id = group_id + 1
933
936
file_group .clear ()
0 commit comments