diff --git a/feapder/core/parser_control.py b/feapder/core/parser_control.py index 021d295..1da4592 100644 --- a/feapder/core/parser_control.py +++ b/feapder/core/parser_control.py @@ -33,7 +33,7 @@ class ParserControl(threading.Thread): is_show_tip = False - # 实时统计已做任务数及失败任务数,若失败任务数/已做任务数>0.5 则报警 + # 实时统计请求成功数及失败数,用于计算请求成功率报警 _success_task_count = 0 _failed_task_count = 0 _total_task_count = 0 @@ -455,7 +455,7 @@ def add_parser(self, parser: BaseParser): class AirSpiderParserControl(ParserControl): is_show_tip = False - # 实时统计已做任务数及失败任务数,若失败任务数/已做任务数>0.5 则报警 + # 实时统计请求成功数及失败数,用于计算请求成功率报警 _success_task_count = 0 _failed_task_count = 0 diff --git a/feapder/core/scheduler.py b/feapder/core/scheduler.py index 0177d18..caff471 100644 --- a/feapder/core/scheduler.py +++ b/feapder/core/scheduler.py @@ -330,19 +330,19 @@ def check_task_status(self): else: return - # 检查失败任务数量 超过1000 报警, + # 检查失败请求数量,超过阈值则报警 failed_count = self._redisdb.zget_count(self._tab_failed_requests) if failed_count > setting.WARNING_FAILED_COUNT: # 发送报警 - msg = "《%s》爬虫当前失败任务数:%s, 请检查爬虫是否正常" % (self._spider_name, failed_count) + msg = "《%s》爬虫当前失败请求数:%s, 请检查爬虫是否正常" % (self._spider_name, failed_count) log.error(msg) self.send_msg( msg, level="error", - message_prefix="《%s》爬虫当前失败任务数报警" % (self._spider_name), + message_prefix="《%s》爬虫当前失败请求数报警" % (self._spider_name), ) - # parser_control实时统计已做任务数及失败任务数,若成功率<0.5 则报警 + # parser_control实时统计请求成功数及失败数,若请求成功率低于阈值则报警 ( failed_task_count, success_task_count, @@ -351,9 +351,9 @@ def check_task_status(self): total_count = success_task_count + failed_task_count if total_count > 0: task_success_rate = success_task_count / total_count - if task_success_rate < 0.5: + if task_success_rate < setting.WARNING_SUCCESS_RATE: # 发送报警 - msg = "《%s》爬虫当前任务成功数%s, 失败数%s, 成功率 %.2f, 请检查爬虫是否正常" % ( + msg = "《%s》爬虫当前请求成功数%s, 失败数%s, 成功率 %.2f, 请检查爬虫是否正常" % ( self._spider_name, success_task_count, failed_task_count, @@ -363,7 +363,7 @@ def check_task_status(self): self.send_msg( msg, level="error", - message_prefix="《%s》爬虫当前任务成功率报警" % (self._spider_name), + message_prefix="《%s》爬虫当前请求成功率报警" % (self._spider_name), ) # 判断任务数是否变化 diff --git a/feapder/db/redisdb.py b/feapder/db/redisdb.py index d882e68..97b7d94 100644 --- a/feapder/db/redisdb.py +++ b/feapder/db/redisdb.py @@ -137,15 +137,14 @@ def get_connect(self): else self._ip_ports.split(",") ) if len(ip_ports) > 1: - startup_nodes = [] + parsed_nodes = [] for ip_port in ip_ports: ip, port = ip_port.split(":") - startup_nodes.append({"host": ip, "port": port}) + parsed_nodes.append((ip, int(port))) if self._service_name: # log.debug("使用redis哨兵模式") - hosts = [(node["host"], node["port"]) for node in startup_nodes] - sentinel = Sentinel(hosts, socket_timeout=3, **self._kwargs) + sentinel = Sentinel(parsed_nodes, socket_timeout=3, **self._kwargs) self._redis = sentinel.master_for( self._service_name, password=self._user_pass, @@ -158,10 +157,17 @@ def get_connect(self): else: try: - from rediscluster import RedisCluster - except ModuleNotFoundError as e: - log.error('请安装 pip install "feapder[all]"') - os._exit(0) + from redis.cluster import RedisCluster, ClusterNode + startup_nodes = [ClusterNode(host=ip, port=port) for ip, port in parsed_nodes] + except ModuleNotFoundError: + try: + from rediscluster import RedisCluster + startup_nodes = [{"host": ip, "port": port} for ip, port in parsed_nodes] + except ModuleNotFoundError: + log.error( + '请安装 pip install "feapder[all]",或升级 redis>=4.0,或安装 redis-py-cluster' + ) + os._exit(0) # log.debug("使用redis集群模式") self._redis = RedisCluster( diff --git a/feapder/requirements.txt b/feapder/requirements.txt index 2171767..a882ee8 100644 --- a/feapder/requirements.txt +++ b/feapder/requirements.txt @@ -4,13 +4,12 @@ parsel>=1.5.2 PyExecJS>=1.5.1 pymongo>=3.10.1 PyMySQL>=0.9.3 -redis>=2.10.6,<4.0.0 +redis>=2.10.6,<6.0.0 requests>=2.22.0 selenium>=3.141.0 bs4>=0.0.1 ipython>=7.14.0 bitarray>=1.5.3 -redis-py-cluster>=2.1.0 cryptography>=3.3.2 urllib3>=1.25.8 loguru>=0.5.3 @@ -18,4 +17,4 @@ influxdb>=5.3.1 pyperclip>=1.8.2 webdriver-manager>=4.0.0 terminal-layout>=2.1.3 -playwright \ No newline at end of file +playwright diff --git a/feapder/setting.py b/feapder/setting.py index c52b318..b7b127b 100644 --- a/feapder/setting.py +++ b/feapder/setting.py @@ -193,7 +193,8 @@ # 时间间隔 WARNING_INTERVAL = 3600 # 相同报警的报警时间间隔,防止刷屏; 0表示不去重 WARNING_LEVEL = "DEBUG" # 报警级别, DEBUG / INFO / ERROR -WARNING_FAILED_COUNT = 1000 # 任务失败数 超过WARNING_FAILED_COUNT则报警 +WARNING_FAILED_COUNT = 1000 # 失败请求数 超过WARNING_FAILED_COUNT则报警 +WARNING_SUCCESS_RATE = 0.5 # 请求成功率低于WARNING_SUCCESS_RATE则报警 WARNING_CHECK_TASK_COUNT_INTERVAL = 1200 # 检查已做任务数量的时间间隔,若两次时间间隔之间,任务数无变化则报警 # 日志 diff --git a/feapder/templates/project_template/setting.py b/feapder/templates/project_template/setting.py index 140aaa0..890ec9d 100644 --- a/feapder/templates/project_template/setting.py +++ b/feapder/templates/project_template/setting.py @@ -175,7 +175,8 @@ # # 时间间隔 # WARNING_INTERVAL = 3600 # 相同报警的报警时间间隔,防止刷屏; 0表示不去重 # WARNING_LEVEL = "DEBUG" # 报警级别, DEBUG / INFO / ERROR -# WARNING_FAILED_COUNT = 1000 # 任务失败数 超过WARNING_FAILED_COUNT则报警 +# WARNING_FAILED_COUNT = 1000 # 失败请求数 超过WARNING_FAILED_COUNT则报警 +# WARNING_SUCCESS_RATE = 0.5 # 请求成功率低于WARNING_SUCCESS_RATE则报警 # # LOG_NAME = os.path.basename(os.getcwd()) # LOG_PATH = "log/%s.log" % LOG_NAME # log存储路径 diff --git a/setup.py b/setup.py index cf4fe54..70baf38 100644 --- a/setup.py +++ b/setup.py @@ -38,7 +38,7 @@ "DBUtils>=2.0", "parsel>=1.5.2", "PyMySQL>=0.9.3", - "redis>=2.10.6,<4.0.0", + "redis>=2.10.6,<6.0.0", "requests>=2.22.0", "bs4>=0.0.1", "ipython>=7.14.0", @@ -60,7 +60,6 @@ "bitarray>=1.5.3", "PyExecJS>=1.5.1", "pymongo>=3.10.1", - "redis-py-cluster>=2.1.0", ] + render_requires setuptools.setup(