Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions feapder/core/parser_control.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ class ParserControl(threading.Thread):

is_show_tip = False

# 实时统计已做任务数及失败任务数,若失败任务数/已做任务数>0.5 则报警
# 实时统计请求成功数及失败数,用于计算请求成功率报警
_success_task_count = 0
_failed_task_count = 0
_total_task_count = 0
Expand Down Expand Up @@ -455,7 +455,7 @@ def add_parser(self, parser: BaseParser):
class AirSpiderParserControl(ParserControl):
is_show_tip = False

# 实时统计已做任务数及失败任务数,若失败任务数/已做任务数>0.5 则报警
# 实时统计请求成功数及失败数,用于计算请求成功率报警
_success_task_count = 0
_failed_task_count = 0

Expand Down
14 changes: 7 additions & 7 deletions feapder/core/scheduler.py
Original file line number Diff line number Diff line change
Expand Up @@ -330,19 +330,19 @@ def check_task_status(self):
else:
return

# 检查失败任务数量 超过1000 报警,
# 检查失败请求数量,超过阈值则报警
failed_count = self._redisdb.zget_count(self._tab_failed_requests)
if failed_count > setting.WARNING_FAILED_COUNT:
# 发送报警
msg = "《%s》爬虫当前失败任务数:%s, 请检查爬虫是否正常" % (self._spider_name, failed_count)
msg = "《%s》爬虫当前失败请求数:%s, 请检查爬虫是否正常" % (self._spider_name, failed_count)
log.error(msg)
self.send_msg(
msg,
level="error",
message_prefix="《%s》爬虫当前失败任务数报警" % (self._spider_name),
message_prefix="《%s》爬虫当前失败请求数报警" % (self._spider_name),
)

# parser_control实时统计已做任务数及失败任务数,若成功率<0.5 则报警
# parser_control实时统计请求成功数及失败数,若请求成功率低于阈值则报警
(
failed_task_count,
success_task_count,
Expand All @@ -351,9 +351,9 @@ def check_task_status(self):
total_count = success_task_count + failed_task_count
if total_count > 0:
task_success_rate = success_task_count / total_count
if task_success_rate < 0.5:
if task_success_rate < setting.WARNING_SUCCESS_RATE:
# 发送报警
msg = "《%s》爬虫当前任务成功数%s, 失败数%s, 成功率 %.2f, 请检查爬虫是否正常" % (
msg = "《%s》爬虫当前请求成功数%s, 失败数%s, 成功率 %.2f, 请检查爬虫是否正常" % (
self._spider_name,
success_task_count,
failed_task_count,
Expand All @@ -363,7 +363,7 @@ def check_task_status(self):
self.send_msg(
msg,
level="error",
message_prefix="《%s》爬虫当前任务成功率报警" % (self._spider_name),
message_prefix="《%s》爬虫当前请求成功率报警" % (self._spider_name),
)

# 判断任务数是否变化
Expand Down
22 changes: 14 additions & 8 deletions feapder/db/redisdb.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,15 +137,14 @@ def get_connect(self):
else self._ip_ports.split(",")
)
if len(ip_ports) > 1:
startup_nodes = []
parsed_nodes = []
for ip_port in ip_ports:
ip, port = ip_port.split(":")
startup_nodes.append({"host": ip, "port": port})
parsed_nodes.append((ip, int(port)))

if self._service_name:
# log.debug("使用redis哨兵模式")
hosts = [(node["host"], node["port"]) for node in startup_nodes]
sentinel = Sentinel(hosts, socket_timeout=3, **self._kwargs)
sentinel = Sentinel(parsed_nodes, socket_timeout=3, **self._kwargs)
self._redis = sentinel.master_for(
self._service_name,
password=self._user_pass,
Expand All @@ -158,10 +157,17 @@ def get_connect(self):

else:
try:
from rediscluster import RedisCluster
except ModuleNotFoundError as e:
log.error('请安装 pip install "feapder[all]"')
os._exit(0)
from redis.cluster import RedisCluster, ClusterNode
startup_nodes = [ClusterNode(host=ip, port=port) for ip, port in parsed_nodes]
except ModuleNotFoundError:
try:
from rediscluster import RedisCluster
startup_nodes = [{"host": ip, "port": port} for ip, port in parsed_nodes]
except ModuleNotFoundError:
log.error(
'请安装 pip install "feapder[all]",或升级 redis>=4.0,或安装 redis-py-cluster'
)
os._exit(0)

# log.debug("使用redis集群模式")
self._redis = RedisCluster(
Expand Down
5 changes: 2 additions & 3 deletions feapder/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,18 +4,17 @@ parsel>=1.5.2
PyExecJS>=1.5.1
pymongo>=3.10.1
PyMySQL>=0.9.3
redis>=2.10.6,<4.0.0
redis>=2.10.6,<6.0.0
requests>=2.22.0
selenium>=3.141.0
bs4>=0.0.1
ipython>=7.14.0
bitarray>=1.5.3
redis-py-cluster>=2.1.0
cryptography>=3.3.2
urllib3>=1.25.8
loguru>=0.5.3
influxdb>=5.3.1
pyperclip>=1.8.2
webdriver-manager>=4.0.0
terminal-layout>=2.1.3
playwright
playwright
3 changes: 2 additions & 1 deletion feapder/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -193,7 +193,8 @@
# 时间间隔
WARNING_INTERVAL = 3600 # 相同报警的报警时间间隔,防止刷屏; 0表示不去重
WARNING_LEVEL = "DEBUG" # 报警级别, DEBUG / INFO / ERROR
WARNING_FAILED_COUNT = 1000 # 任务失败数 超过WARNING_FAILED_COUNT则报警
WARNING_FAILED_COUNT = 1000 # 失败请求数 超过WARNING_FAILED_COUNT则报警
WARNING_SUCCESS_RATE = 0.5 # 请求成功率低于WARNING_SUCCESS_RATE则报警
WARNING_CHECK_TASK_COUNT_INTERVAL = 1200 # 检查已做任务数量的时间间隔,若两次时间间隔之间,任务数无变化则报警

# 日志
Expand Down
3 changes: 2 additions & 1 deletion feapder/templates/project_template/setting.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,8 @@
# # 时间间隔
# WARNING_INTERVAL = 3600 # 相同报警的报警时间间隔,防止刷屏; 0表示不去重
# WARNING_LEVEL = "DEBUG" # 报警级别, DEBUG / INFO / ERROR
# WARNING_FAILED_COUNT = 1000 # 任务失败数 超过WARNING_FAILED_COUNT则报警
# WARNING_FAILED_COUNT = 1000 # 失败请求数 超过WARNING_FAILED_COUNT则报警
# WARNING_SUCCESS_RATE = 0.5 # 请求成功率低于WARNING_SUCCESS_RATE则报警
#
# LOG_NAME = os.path.basename(os.getcwd())
# LOG_PATH = "log/%s.log" % LOG_NAME # log存储路径
Expand Down
3 changes: 1 addition & 2 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
"DBUtils>=2.0",
"parsel>=1.5.2",
"PyMySQL>=0.9.3",
"redis>=2.10.6,<4.0.0",
"redis>=2.10.6,<6.0.0",
"requests>=2.22.0",
"bs4>=0.0.1",
"ipython>=7.14.0",
Expand All @@ -60,7 +60,6 @@
"bitarray>=1.5.3",
"PyExecJS>=1.5.1",
"pymongo>=3.10.1",
"redis-py-cluster>=2.1.0",
] + render_requires

setuptools.setup(
Expand Down