fix: 修复 Docker 中下载事件不触发的根本原因

- 移除 --single-process 参数(破坏 Chromium 多进程下载机制)
- 添加 --no-sandbox(Docker 容器需要)
- 显式设置 accept_downloads=True 和 downloads_path
- 添加文件系统兜底检测:即使 Playwright download 事件未触发,
  也能通过检测下载目录中的新文件来获取结果

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-05-17 18:34:23 +08:00
parent b402612641
commit e95a1723e5
+33 -4
View File
@@ -43,17 +43,20 @@ class SecsionDownloader:
async with async_playwright() as p: async with async_playwright() as p:
# Docker 优化:添加 --disable-dev-shm-usage 避免共享内存不足 # Docker 优化:添加 --disable-dev-shm-usage 避免共享内存不足
# 注意:不能使用 --single-process,它会破坏 Chromium 的下载机制
browser = await p.chromium.launch( browser = await p.chromium.launch(
headless=True, headless=True,
downloads_path=self.download_dir,
args=[ args=[
"--disable-dev-shm-usage", "--disable-dev-shm-usage",
"--disable-gpu", "--disable-gpu",
"--single-process" "--no-sandbox"
] ]
) )
context = await browser.new_context( context = await browser.new_context(
ignore_https_errors=True, ignore_https_errors=True,
viewport={'width': 1280, 'height': 800} viewport={'width': 1280, 'height': 800},
accept_downloads=True
) )
page = await context.new_page() page = await context.new_page()
@@ -188,6 +191,9 @@ class SecsionDownloader:
page.on("response", on_response) page.on("response", on_response)
# 记录下载目录现有文件(用于兜底检测)
existing_files = set(os.listdir(self.download_dir)) if os.path.exists(self.download_dir) else set()
# 点击导出报表并捕获下载 # 点击导出报表并捕获下载
logger.info("点击导出报表...") logger.info("点击导出报表...")
download_timeout = 120000 # 2 分钟 download_timeout = 120000 # 2 分钟
@@ -205,8 +211,22 @@ class SecsionDownloader:
return save_path return save_path
except Exception as download_err: except Exception as download_err:
# 下载事件未触发,进行诊断 # Playwright download 事件未触发,尝试文件系统兜底检测
logger.warning(f"下载事件捕获失败: {download_err}") logger.warning(f"Playwright 下载事件捕获失败: {download_err}")
logger.info("尝试文件系统兜底检测...")
# 等待一小段时间让可能的下载完成
await asyncio.sleep(5)
new_files = self._find_new_files(existing_files)
if new_files:
# 按修改时间取最新的
latest = max(new_files, key=lambda f: os.path.getmtime(os.path.join(self.download_dir, f)))
save_path = os.path.join(self.download_dir, latest)
logger.info(f"文件系统兜底检测到新文件: {save_path}")
return save_path
logger.warning("文件系统兜底检测也未发现新文件")
# 保存调试截图 # 保存调试截图
try: try:
@@ -261,6 +281,15 @@ class SecsionDownloader:
raise raise
def _find_new_files(self, existing_files):
"""检测下载目录中新增的文件"""
if not os.path.exists(self.download_dir):
return []
current_files = set(os.listdir(self.download_dir))
new_files = current_files - existing_files
# 过滤掉临时文件和调试截图
return [f for f in new_files if not f.endswith(('.crdownload', '.tmp')) and not f.startswith('debug_')]
async def _set_date(self, page, input_box, date_str): async def _set_date(self, page, input_box, date_str):
""" """
设置 TDesign 日期选择器的值 设置 TDesign 日期选择器的值