Skip to content

Commit cfd23a1

Browse files
committed
日志系统更新
1 parent 33f2cbb commit cfd23a1

File tree

4 files changed

+205
-16
lines changed

4 files changed

+205
-16
lines changed

README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,30 @@ A set of tools , including
3636
pip install PaperCrawlerUtil
3737
```
3838

39-
## 基本使用
39+
# 基本使用
40+
41+
## 日志系统
42+
43+
```python
44+
from PaperCrawlerUtil.common_util import *
45+
46+
#目前支持qq和163邮箱,建议使用qq,163没有测试过
47+
#邮箱通知是最高级的通知,请慎用防止消息爆炸
48+
"""
49+
可以通过basic_config(logs_style=LOG_STYLE_PRINT, log_level=logging.WARNING)
50+
进行控制logs_style控制使用命令行输出或者日志文件或者两者兼有,log_level控制日志文件输出的等级
51+
"""
52+
53+
logger = Logs("xxx@qq.com", "xxxxxx", "xx@xx.xx")
54+
logger.log_email("ahufsdihusdfiahu")
55+
logger.log_debug("adfsadf")
56+
logger.log_info("jsdfikaos")
57+
logger.log_warn("fjdiasofjds")
58+
logger.log_error("jdiaofjsoiafjisd")
59+
60+
61+
```
62+
4063
本项目依赖proxypool项目,该项目可以爬取免费的代理,
4164
[proxy pool项目仓库](https://github.com/Python3WebSpider/ProxyPool)
4265
感谢大佬为开源社区做出的贡献

src/PaperCrawlerUtil/common_util.py

Lines changed: 144 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,9 @@
3333
from PaperCrawlerUtil.proxypool.processors.tester import Tester
3434
from PaperCrawlerUtil.constant import *
3535
from tqdm import tqdm
36+
import smtplib
37+
from email.mime.text import MIMEText
38+
from email.utils import formataddr
3639

3740
PROXY_POOL_URL = ""
3841
log_style = LOG_STYLE_PRINT
@@ -246,17 +249,23 @@ def get_timestamp(split: str or list = ["-", "-", " ", ":", ":"], accuracy: int
246249
return time.strftime(time_style, time.localtime())
247250

248251

249-
def write_log(string: str = "", print_file: object = sys.stdout):
250-
if print_file == sys.stdout:
251-
logging.warning(string)
252+
def write_log(string: str = "", print_file: object = sys.stdout, func: callable = None):
253+
if func is not None:
254+
func(string)
252255
else:
253-
logging.error(string)
256+
if print_file == sys.stdout:
257+
logging.warning(string)
258+
else:
259+
logging.error(string)
254260

255261

256262
def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n", print_file: object = sys.stdout,
257-
print_flush: bool = None, need_time_stamp: bool = True, is_test_out: bool = False) -> bool:
263+
print_flush: bool = None, need_time_stamp: bool = True, is_test_out: bool = False,
264+
funcs: callable = None, level: str = None) -> bool:
258265
"""
259266
本项目的通用输出函数, 使用这个方法可以避免tqdm进度条被中断重新输出
267+
:param level: 日志等级
268+
:param funcs: 日志输出函数
260269
:param is_test_out: 是否是测试输出,正式场合不需要输出,可以通过common_util.basic_config()控制
261270
:param need_time_stamp: 是否需要对于输出的日志添加时间戳
262271
:param process_bar_file: 如果使用process_bar参数并且需要保持格式不变,则设置此项参数
@@ -276,6 +285,7 @@ def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n", pri
276285
if global_val.get_value(IS_LOG_TEST_MODE) else False
277286
if is_test_out and (not is_test_model):
278287
return flag
288+
global_log_level = global_val.get_value(GLOBAL_LOG_LEVEL)
279289
s = ""
280290
try:
281291
for k in string:
@@ -290,15 +300,135 @@ def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n", pri
290300
if need_time_stamp and type(s) == str:
291301
s = get_timestamp(split=["-", "-", " ", ":", ":"]) + get_split(lens=3, style=" ") + s
292302
if log_style == LOG_STYLE_LOG:
293-
write_log(s, print_file)
303+
write_log(s, print_file, func=funcs)
294304
elif log_style == LOG_STYLE_PRINT:
295-
tqdm.write(s=s, file=print_file, end=print_end)
305+
if (LEVEL2NUM[global_log_level] if type(global_log_level) == str else global_log_level) <= \
306+
(LEVEL2NUM[level] if type(level) == str else level):
307+
tqdm.write(s=s, file=print_file, end=print_end)
296308
elif log_style == LOG_STYLE_ALL:
297-
write_log(s, print_file)
298-
tqdm.write(s=s, file=print_file, end=print_end)
309+
if (LEVEL2NUM[global_log_level] if type(global_log_level) == str else global_log_level) <= \
310+
(LEVEL2NUM[level] if type(level) == str else level):
311+
write_log(s, print_file, func=funcs)
312+
tqdm.write(s=s, file=print_file, end=print_end)
299313
return flag
300314

301315

316+
def send_email(sender_email, sender_password, receiver_email,
317+
message, subject="default subject"):
318+
"""
319+
send email
320+
:param sender_email:
321+
:param sender_password:
322+
:param receiver_email:
323+
:param message:
324+
:param subject:
325+
:return:
326+
"""
327+
try:
328+
if sender_email.endswith('@163.com'):
329+
smtp_server = 'smtp.163.com'
330+
smtp_port = 25
331+
elif sender_email.endswith('@qq.com'):
332+
smtp_server = 'smtp.qq.com'
333+
smtp_port = 465
334+
else:
335+
Logs.log_error('Unsupported email domain')
336+
337+
msg = MIMEText(message, 'plain', 'utf-8')
338+
msg['From'] = formataddr(('Sender', sender_email))
339+
msg['To'] = formataddr(('Receiver', receiver_email))
340+
msg['Subject'] = subject
341+
342+
with smtplib.SMTP_SSL(smtp_server, smtp_port) as server:
343+
server.login(sender_email, sender_password)
344+
server.sendmail(sender_email, [receiver_email], msg.as_string())
345+
346+
Logs.log_info('Email sent successfully!')
347+
except Exception as e:
348+
Logs.log_error('Error sending email:', str(e))
349+
350+
351+
class Logs(object):
352+
353+
def __init__(self, sender_email, sender_password, receiver_email):
354+
super().__init__()
355+
self.sender_email = sender_email
356+
self.sender_password = sender_password
357+
self.receiver_email = receiver_email
358+
359+
class LOG(object):
360+
361+
def __init__(self, ):
362+
super().__init__()
363+
364+
@staticmethod
365+
def log_info(string, *args, **keywords):
366+
logging.info(string, *args, **keywords)
367+
368+
@staticmethod
369+
def log_warning(string, *args, **keywords):
370+
logging.warning(string, *args, **keywords)
371+
372+
@staticmethod
373+
def log_error(string, *args, **keywords):
374+
logging.error(string, *args, **keywords)
375+
376+
@staticmethod
377+
def log_debug(string, *args, **keywords):
378+
logging.debug(string, *args, **keywords)
379+
380+
@staticmethod
381+
def log_email(sender_email, sender_password, receiver_email,
382+
message, subject="default subject"):
383+
sender_email(sender_email, sender_password, receiver_email,
384+
message, subject="default subject")
385+
386+
def getMethod(self, funcs):
387+
return getattr(self, funcs, self.log_warning)
388+
389+
@staticmethod
390+
def log(*string: str or object, print_sep: str = ' ', print_end: str = "\n",
391+
print_file: object = sys.stdout,
392+
print_flush: bool = None, need_time_stamp: bool = True, level: str = INFO, is_test_out: bool = False,
393+
**email_param):
394+
func_factory = Logs.LOG()
395+
funcs = func_factory.getMethod("log_" + level)
396+
log(*string, print_sep=print_sep, print_end=print_end, print_file=print_file, print_flush=print_flush,
397+
need_time_stamp=need_time_stamp, is_test_out=is_test_out, funcs=funcs, level=level, **email_param)
398+
399+
@staticmethod
400+
def log_warn(*string: str or object, print_sep: str = ' ', print_end: str = "\n",
401+
print_file: object = sys.stdout,
402+
print_flush: bool = None, need_time_stamp: bool = True):
403+
Logs.log(*string, print_sep=print_sep, print_end=print_end, print_file=print_file, print_flush=print_flush,
404+
need_time_stamp=need_time_stamp, is_test_out=False, level=WARN)
405+
406+
@staticmethod
407+
def log_info(*string: str or object, print_sep: str = ' ', print_end: str = "\n",
408+
print_file: object = sys.stdout,
409+
print_flush: bool = None, need_time_stamp: bool = True):
410+
Logs.log(*string, print_sep=print_sep, print_end=print_end, print_file=print_file, print_flush=print_flush,
411+
need_time_stamp=need_time_stamp, is_test_out=False, level=INFO)
412+
413+
@staticmethod
414+
def log_debug(*string: str or object, print_sep: str = ' ', print_end: str = "\n",
415+
print_file: object = sys.stdout,
416+
print_flush: bool = None, need_time_stamp: bool = True):
417+
Logs.log(*string, print_sep=print_sep, print_end=print_end, print_file=print_file, print_flush=print_flush,
418+
need_time_stamp=need_time_stamp, is_test_out=False, level=DEBUG)
419+
420+
@staticmethod
421+
def log_error(*string: str or object, print_sep: str = ' ', print_end: str = "\n",
422+
print_file: object = sys.stdout,
423+
print_flush: bool = None, need_time_stamp: bool = True):
424+
Logs.log(*string, print_sep=print_sep, print_end=print_end, print_file=print_file, print_flush=print_flush,
425+
need_time_stamp=need_time_stamp, is_test_out=False, level=ERROR)
426+
427+
def log_email(self, message, subject="default subject"):
428+
send_email(sender_email=self.sender_email, sender_password=self.sender_password,
429+
receiver_email=self.receiver_email, message=message, subject=subject)
430+
431+
302432
class CanStopThread(threading.Thread):
303433

304434
def __init__(self) -> None:
@@ -439,7 +569,7 @@ def stop_thread(thread_list: List[CanStopThread] = []) -> int:
439569

440570

441571
def basic_config(log_file_name: str = "crawler_util.log",
442-
log_level=logging.WARNING,
572+
log_level=logging.DEBUG,
443573
proxy_pool_url: str = "",
444574
logs_style: str = LOG_STYLE_PRINT,
445575
require_proxy_pool: bool = False,
@@ -537,7 +667,8 @@ def basic_config(log_file_name: str = "crawler_util.log",
537667
(ENABLE_TESTER, enable_tester), (ENABLE_GETTER, enable_getter),
538668
(ENABLE_SERVER, enable_server), (TEST_VALID_STATUS, test_valid_stats), (TEST_ANONYMOUS, test_anonymous),
539669
(API_THREADED, api_threaded), (KEEP_PROCESS_BAR_STYLE, keep_process_bar_style),
540-
(KEEP_PROCESS_BAR_STYLE_FILE, keep_process_bar_style_file), (IS_LOG_TEST_MODE, is_test_out)])
670+
(KEEP_PROCESS_BAR_STYLE_FILE, keep_process_bar_style_file), (IS_LOG_TEST_MODE, is_test_out),
671+
(GLOBAL_LOG_LEVEL, log_level)])
541672
PROXY_POOL_URL = proxy_pool_url if len(proxy_pool_url) != 0 else PROXY_POOL_URL
542673
log_style = logs_style
543674
if require_proxy_pool and PROXY_POOL_CAN_RUN_FLAG and len(
@@ -736,7 +867,7 @@ def deleteSpecialCharFromHtmlElement(html: str = "", sep: str = "") -> str:
736867
从html文本中删除标签,如:”<a>b</a>“ -> "b"
737868
:param html: html文本
738869
:param sep: 在每次去除完一个标签之后,加入的间隔符,如:sep=” “, ”<a>b</a>“ -> " b "
739-
:return: 处理玩得字符串
870+
:return: 处理完的字符串
740871
"""
741872
names = []
742873
flag = True
@@ -859,5 +990,4 @@ def getAllFiles(target_dir: str, cascade: bool = True) -> list:
859990

860991

861992
if __name__ == "__main__":
862-
basic_config(logs_style=LOG_STYLE_PRINT, is_test_out=False)
863-
993+
basic_config(logs_style=LOG_STYLE_PRINT, log_level=EMAIL)

src/PaperCrawlerUtil/constant.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,19 @@
3737
KEEP_PROCESS_BAR_STYLE = "keep_process_bar_style"
3838
KEEP_PROCESS_BAR_STYLE_FILE = "keep_process_bar_style_file"
3939
IS_LOG_TEST_MODE = "IS_LOG_TEST_MODE"
40+
GLOBAL_LOG_LEVEL = "global_log_level"
41+
42+
43+
"""
44+
日志级别
45+
"""
46+
INFO = "info"
47+
WARN = "warn"
48+
DEBUG = "debug"
49+
ERROR = "error"
50+
EMAIL = "email"
51+
52+
LEVEL2NUM = {DEBUG: 10, INFO: 20, WARN: 30, ERROR: 40, EMAIL: 50}
4053

4154

4255
"""

src/README.md

Lines changed: 24 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,30 @@ A set of tools , including
3636
pip install PaperCrawlerUtil
3737
```
3838

39-
## 基本使用
39+
# 基本使用
40+
41+
## 日志系统
42+
43+
```python
44+
from PaperCrawlerUtil.common_util import *
45+
46+
#目前支持qq和163邮箱,建议使用qq,163没有测试过
47+
#邮箱通知是最高级的通知,请慎用防止消息爆炸
48+
"""
49+
可以通过basic_config(logs_style=LOG_STYLE_PRINT, log_level=logging.WARNING)
50+
进行控制logs_style控制使用命令行输出或者日志文件或者两者兼有,log_level控制日志文件输出的等级
51+
"""
52+
53+
logger = Logs("xxx@qq.com", "xxxxxx", "xx@xx.xx")
54+
logger.log_email("ahufsdihusdfiahu")
55+
logger.log_debug("adfsadf")
56+
logger.log_info("jsdfikaos")
57+
logger.log_warn("fjdiasofjds")
58+
logger.log_error("jdiaofjsoiafjisd")
59+
60+
61+
```
62+
4063
本项目依赖proxypool项目,该项目可以爬取免费的代理,
4164
[proxy pool项目仓库](https://github.com/Python3WebSpider/ProxyPool)
4265
感谢大佬为开源社区做出的贡献

0 commit comments

Comments
 (0)