3333from PaperCrawlerUtil .proxypool .processors .tester import Tester
3434from PaperCrawlerUtil .constant import *
3535from tqdm import tqdm
36+ import smtplib
37+ from email .mime .text import MIMEText
38+ from email .utils import formataddr
3639
3740PROXY_POOL_URL = ""
3841log_style = LOG_STYLE_PRINT
@@ -246,17 +249,23 @@ def get_timestamp(split: str or list = ["-", "-", " ", ":", ":"], accuracy: int
246249 return time .strftime (time_style , time .localtime ())
247250
248251
249- def write_log (string : str = "" , print_file : object = sys .stdout ):
250- if print_file == sys . stdout :
251- logging . warning (string )
252+ def write_log (string : str = "" , print_file : object = sys .stdout , func : callable = None ):
253+ if func is not None :
254+ func (string )
252255 else :
253- logging .error (string )
256+ if print_file == sys .stdout :
257+ logging .warning (string )
258+ else :
259+ logging .error (string )
254260
255261
256262def log (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " , print_file : object = sys .stdout ,
257- print_flush : bool = None , need_time_stamp : bool = True , is_test_out : bool = False ) -> bool :
263+ print_flush : bool = None , need_time_stamp : bool = True , is_test_out : bool = False ,
264+ funcs : callable = None , level : str = None ) -> bool :
258265 """
259266 本项目的通用输出函数, 使用这个方法可以避免tqdm进度条被中断重新输出
267+ :param level: 日志等级
268+ :param funcs: 日志输出函数
260269 :param is_test_out: 是否是测试输出,正式场合不需要输出,可以通过common_util.basic_config()控制
261270 :param need_time_stamp: 是否需要对于输出的日志添加时间戳
262271 :param process_bar_file: 如果使用process_bar参数并且需要保持格式不变,则设置此项参数
@@ -276,6 +285,7 @@ def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n", pri
276285 if global_val .get_value (IS_LOG_TEST_MODE ) else False
277286 if is_test_out and (not is_test_model ):
278287 return flag
288+ global_log_level = global_val .get_value (GLOBAL_LOG_LEVEL )
279289 s = ""
280290 try :
281291 for k in string :
@@ -290,15 +300,135 @@ def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n", pri
290300 if need_time_stamp and type (s ) == str :
291301 s = get_timestamp (split = ["-" , "-" , " " , ":" , ":" ]) + get_split (lens = 3 , style = " " ) + s
292302 if log_style == LOG_STYLE_LOG :
293- write_log (s , print_file )
303+ write_log (s , print_file , func = funcs )
294304 elif log_style == LOG_STYLE_PRINT :
295- tqdm .write (s = s , file = print_file , end = print_end )
305+ if (LEVEL2NUM [global_log_level ] if type (global_log_level ) == str else global_log_level ) <= \
306+ (LEVEL2NUM [level ] if type (level ) == str else level ):
307+ tqdm .write (s = s , file = print_file , end = print_end )
296308 elif log_style == LOG_STYLE_ALL :
297- write_log (s , print_file )
298- tqdm .write (s = s , file = print_file , end = print_end )
309+ if (LEVEL2NUM [global_log_level ] if type (global_log_level ) == str else global_log_level ) <= \
310+ (LEVEL2NUM [level ] if type (level ) == str else level ):
311+ write_log (s , print_file , func = funcs )
312+ tqdm .write (s = s , file = print_file , end = print_end )
299313 return flag
300314
301315
316+ def send_email (sender_email , sender_password , receiver_email ,
317+ message , subject = "default subject" ):
318+ """
319+ send email
320+ :param sender_email:
321+ :param sender_password:
322+ :param receiver_email:
323+ :param message:
324+ :param subject:
325+ :return:
326+ """
327+ try :
328+ if sender_email .endswith ('@163.com' ):
329+ smtp_server = 'smtp.163.com'
330+ smtp_port = 25
331+ elif sender_email .endswith ('@qq.com' ):
332+ smtp_server = 'smtp.qq.com'
333+ smtp_port = 465
334+ else :
335+ Logs .log_error ('Unsupported email domain' )
336+
337+ msg = MIMEText (message , 'plain' , 'utf-8' )
338+ msg ['From' ] = formataddr (('Sender' , sender_email ))
339+ msg ['To' ] = formataddr (('Receiver' , receiver_email ))
340+ msg ['Subject' ] = subject
341+
342+ with smtplib .SMTP_SSL (smtp_server , smtp_port ) as server :
343+ server .login (sender_email , sender_password )
344+ server .sendmail (sender_email , [receiver_email ], msg .as_string ())
345+
346+ Logs .log_info ('Email sent successfully!' )
347+ except Exception as e :
348+ Logs .log_error ('Error sending email:' , str (e ))
349+
350+
351+ class Logs (object ):
352+
353+ def __init__ (self , sender_email , sender_password , receiver_email ):
354+ super ().__init__ ()
355+ self .sender_email = sender_email
356+ self .sender_password = sender_password
357+ self .receiver_email = receiver_email
358+
359+ class LOG (object ):
360+
361+ def __init__ (self , ):
362+ super ().__init__ ()
363+
364+ @staticmethod
365+ def log_info (string , * args , ** keywords ):
366+ logging .info (string , * args , ** keywords )
367+
368+ @staticmethod
369+ def log_warning (string , * args , ** keywords ):
370+ logging .warning (string , * args , ** keywords )
371+
372+ @staticmethod
373+ def log_error (string , * args , ** keywords ):
374+ logging .error (string , * args , ** keywords )
375+
376+ @staticmethod
377+ def log_debug (string , * args , ** keywords ):
378+ logging .debug (string , * args , ** keywords )
379+
380+ @staticmethod
381+ def log_email (sender_email , sender_password , receiver_email ,
382+ message , subject = "default subject" ):
383+ sender_email (sender_email , sender_password , receiver_email ,
384+ message , subject = "default subject" )
385+
386+ def getMethod (self , funcs ):
387+ return getattr (self , funcs , self .log_warning )
388+
389+ @staticmethod
390+ def log (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " ,
391+ print_file : object = sys .stdout ,
392+ print_flush : bool = None , need_time_stamp : bool = True , level : str = INFO , is_test_out : bool = False ,
393+ ** email_param ):
394+ func_factory = Logs .LOG ()
395+ funcs = func_factory .getMethod ("log_" + level )
396+ log (* string , print_sep = print_sep , print_end = print_end , print_file = print_file , print_flush = print_flush ,
397+ need_time_stamp = need_time_stamp , is_test_out = is_test_out , funcs = funcs , level = level , ** email_param )
398+
399+ @staticmethod
400+ def log_warn (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " ,
401+ print_file : object = sys .stdout ,
402+ print_flush : bool = None , need_time_stamp : bool = True ):
403+ Logs .log (* string , print_sep = print_sep , print_end = print_end , print_file = print_file , print_flush = print_flush ,
404+ need_time_stamp = need_time_stamp , is_test_out = False , level = WARN )
405+
406+ @staticmethod
407+ def log_info (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " ,
408+ print_file : object = sys .stdout ,
409+ print_flush : bool = None , need_time_stamp : bool = True ):
410+ Logs .log (* string , print_sep = print_sep , print_end = print_end , print_file = print_file , print_flush = print_flush ,
411+ need_time_stamp = need_time_stamp , is_test_out = False , level = INFO )
412+
413+ @staticmethod
414+ def log_debug (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " ,
415+ print_file : object = sys .stdout ,
416+ print_flush : bool = None , need_time_stamp : bool = True ):
417+ Logs .log (* string , print_sep = print_sep , print_end = print_end , print_file = print_file , print_flush = print_flush ,
418+ need_time_stamp = need_time_stamp , is_test_out = False , level = DEBUG )
419+
420+ @staticmethod
421+ def log_error (* string : str or object , print_sep : str = ' ' , print_end : str = "\n " ,
422+ print_file : object = sys .stdout ,
423+ print_flush : bool = None , need_time_stamp : bool = True ):
424+ Logs .log (* string , print_sep = print_sep , print_end = print_end , print_file = print_file , print_flush = print_flush ,
425+ need_time_stamp = need_time_stamp , is_test_out = False , level = ERROR )
426+
427+ def log_email (self , message , subject = "default subject" ):
428+ send_email (sender_email = self .sender_email , sender_password = self .sender_password ,
429+ receiver_email = self .receiver_email , message = message , subject = subject )
430+
431+
302432class CanStopThread (threading .Thread ):
303433
304434 def __init__ (self ) -> None :
@@ -439,7 +569,7 @@ def stop_thread(thread_list: List[CanStopThread] = []) -> int:
439569
440570
441571def basic_config (log_file_name : str = "crawler_util.log" ,
442- log_level = logging .WARNING ,
572+ log_level = logging .DEBUG ,
443573 proxy_pool_url : str = "" ,
444574 logs_style : str = LOG_STYLE_PRINT ,
445575 require_proxy_pool : bool = False ,
@@ -537,7 +667,8 @@ def basic_config(log_file_name: str = "crawler_util.log",
537667 (ENABLE_TESTER , enable_tester ), (ENABLE_GETTER , enable_getter ),
538668 (ENABLE_SERVER , enable_server ), (TEST_VALID_STATUS , test_valid_stats ), (TEST_ANONYMOUS , test_anonymous ),
539669 (API_THREADED , api_threaded ), (KEEP_PROCESS_BAR_STYLE , keep_process_bar_style ),
540- (KEEP_PROCESS_BAR_STYLE_FILE , keep_process_bar_style_file ), (IS_LOG_TEST_MODE , is_test_out )])
670+ (KEEP_PROCESS_BAR_STYLE_FILE , keep_process_bar_style_file ), (IS_LOG_TEST_MODE , is_test_out ),
671+ (GLOBAL_LOG_LEVEL , log_level )])
541672 PROXY_POOL_URL = proxy_pool_url if len (proxy_pool_url ) != 0 else PROXY_POOL_URL
542673 log_style = logs_style
543674 if require_proxy_pool and PROXY_POOL_CAN_RUN_FLAG and len (
@@ -736,7 +867,7 @@ def deleteSpecialCharFromHtmlElement(html: str = "", sep: str = "") -> str:
736867 从html文本中删除标签,如:”<a>b</a>“ -> "b"
737868 :param html: html文本
738869 :param sep: 在每次去除完一个标签之后,加入的间隔符,如:sep=” “, ”<a>b</a>“ -> " b "
739- :return: 处理玩得字符串
870+ :return: 处理完的字符串
740871 """
741872 names = []
742873 flag = True
@@ -859,5 +990,4 @@ def getAllFiles(target_dir: str, cascade: bool = True) -> list:
859990
860991
861992if __name__ == "__main__" :
862- basic_config (logs_style = LOG_STYLE_PRINT , is_test_out = False )
863-
993+ basic_config (logs_style = LOG_STYLE_PRINT , log_level = EMAIL )
0 commit comments