diff --git a/CLAUDE.md b/CLAUDE.md index 67bc115..86016a6 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -SalesShow is a monolithic Flask web application for analyzing sales data from Excel files. It supports manual Excel uploads and automated daily downloads from secsion.com via Playwright browser automation. There is no database — all data lives as Excel files on disk in `uploads/`. +SaleShow is a monolithic Flask web application for analyzing sales data from Excel files. It supports manual Excel uploads and automated daily downloads from secsion.com via Playwright browser automation. There is no database — all data lives as Excel files on disk in `uploads/`. ## Commands @@ -23,6 +23,7 @@ gunicorn -w 4 -b 0.0.0.0:8000 app:app # CLI automation — download reports from secsion.com python -m automation.secsion --start 2026-04-28 --end 2026-04-28 +python -m automation.secsion --start 2026-05-15 --end 2026-05-17 --username 15682076681 --password yourpassword ``` No test framework or linter is configured in this project. @@ -31,17 +32,19 @@ No test framework or linter is configured in this project. **Backend (Flask, single `app.py`):** - Routes handle file upload (`/upload`), file listing (`/files`), data loading/processing (`/load/`), deletion, and cleanup. -- `process_sales_data()` (~lines 371-575 in `app.py`) is the core logic. It uses a state-machine approach to handle two Excel formats: "flat tables" (each row has code + product) and "hierarchical tables" (code row is a header, product rows are children). Outputs daily summaries with per-product breakdowns. +- `process_sales_data()` (~lines 371-575) is the core logic. It uses a state-machine approach to handle two Excel formats: "flat tables" (each row has code + product) and "hierarchical tables" (code row is a header, product rows are children). Outputs daily summaries with per-product breakdowns. - `find_header_row()` dynamically detects the header row by scanning first 20 rows for keyword matches. +- Auto-download routes use a global `download_status` dict and run Playwright in daemon threads via `threading.Thread`. **Automation module (`automation/`):** -- `secsion.py` — `SecsionDownloader` uses Playwright headless Chromium to log into secsion.com, navigate to reports, set date range via TDesign date picker, optionally inject `shop_id` via route interception, and download exports. -- `uploader.py` — copies downloaded files into `uploads/` with timestamp-prefix naming (same convention as manual uploads). -- `scheduler.py` — APScheduler `BackgroundScheduler` with `CronTrigger` runs daily auto-download (default 01:00). +- `secsion.py` — `SecsionDownloader` uses Playwright headless Chromium to log into secsion.com, navigate to reports, set date range via TDesign date picker (requires click → select day → Enter confirm → Escape close sequence), optionally inject `shop_id` via route interception on `**/api/bill/export`, and download exports. Has 3-retry logic with exponential backoff. +- `uploader.py` — copies downloaded files into `uploads/` with `YYYYMMDD_HHMMSS_` prefix naming (same convention as manual uploads). +- `scheduler.py` — APScheduler `BackgroundScheduler` with `CronTrigger` runs daily auto-download (default 01:00). Uses `misfire_grace_time=3600`. **Configuration (`config.py`):** - Three-tier priority: Web UI settings (`data/config.json`) > environment variables (`.env` / system env) > defaults. - `Config` class provides static methods for reading/writing secsion credentials, shop ID, and scheduler settings. +- Passwords are masked (`******`) when returned via the API. **Frontend (vanilla JS/CSS, no build step):** - `main.js` — all client-side interactivity: file upload (drag-and-drop), AJAX to API, data rendering (card/table view), client-side filtering, sorting, pagination (50 items/page), export. @@ -52,5 +55,5 @@ No test framework or linter is configured in this project. - No database — Excel files on disk are the data store. - No frontend build step — vanilla JS/CSS served directly via Flask static files. -- Playwright automation runs in daemon threads with a global `download_status` dict for status tracking. -- Passwords are masked (`******`) when returned via the API. +- Playwright automation runs in daemon threads; status tracked via module-level `download_status` dict in `app.py`. +- The secsion.com date picker uses TDesign's `needconfirm="true"` mode — simply calling `.fill()` won't work; must click cell then press Enter. diff --git a/automation/secsion.py b/automation/secsion.py index 0e82f6c..b6383e8 100644 --- a/automation/secsion.py +++ b/automation/secsion.py @@ -159,10 +159,11 @@ class SecsionDownloader: await asyncio.sleep(3) - # 如果配置了 shop_id,拦截导出请求注入 shop_id - if self.shop_id: - import json + # 如果配置了 shop_id,拦截导出请求注入 shop_id,并捕获服务端响应 + import json + export_response = {'status': None, 'body': None, 'content_type': None} + if self.shop_id: async def inject_shop_id(route): request = route.request body = json.loads(request.post_data) @@ -173,18 +174,92 @@ class SecsionDownloader: await page.route('**/api/bill/export', inject_shop_id) logger.info(f"已设置 shop_id 拦截: {self.shop_id}") - # 点击导出报表并捕获下载(Docker 中增加超时到300秒) - logger.info("点击导出报表...") - async with page.expect_download(timeout=300000) as download_info: - await export_btn.click() - logger.info("等待文件下载中...") + # 捕获导出接口的响应(用于调试) + async def on_response(response): + if '/api/bill/export' in response.url: + export_response['status'] = response.status + export_response['content_type'] = response.headers.get('content-type', '') + try: + body = await response.text() + export_response['body'] = body[:2000] if body else '' + except Exception: + export_response['body'] = '(binary or empty)' + logger.info(f"导出接口响应: status={response.status}, content-type={export_response['content_type']}, body长度={len(export_response['body'] or '')}") - download = await download_info.value - filename = download.suggested_filename - save_path = os.path.join(self.download_dir, filename) - await download.save_as(save_path) - logger.info(f"报表已保存至: {save_path}") - return save_path + page.on("response", on_response) + + # 点击导出报表并捕获下载 + logger.info("点击导出报表...") + download_timeout = 120000 # 2 分钟 + + try: + async with page.expect_download(timeout=download_timeout) as download_info: + await export_btn.click() + logger.info("等待文件下载中...") + + download = await download_info.value + filename = download.suggested_filename + save_path = os.path.join(self.download_dir, filename) + await download.save_as(save_path) + logger.info(f"报表已保存至: {save_path}") + return save_path + + except Exception as download_err: + # 下载事件未触发,进行诊断 + logger.warning(f"下载事件捕获失败: {download_err}") + + # 保存调试截图 + try: + screenshot_path = os.path.join(self.download_dir, f"debug_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.png") + await page.screenshot(path=screenshot_path, full_page=True) + logger.info(f"调试截图已保存: {screenshot_path}") + except Exception as ss_err: + logger.warning(f"截图保存失败: {ss_err}") + + # 打印捕获到的响应信息 + if export_response['status']: + logger.info(f"服务端实际响应: status={export_response['status']}, content-type={export_response['content_type']}") + if export_response['body']: + logger.info(f"响应内容(前500字): {export_response['body'][:500]}") + else: + logger.warning("未捕获到 /api/bill/export 响应,可能是请求被拦截或未发出") + + # 检查页面是否有错误提示 + try: + error_text = await page.evaluate("""() => { + const msgs = document.querySelectorAll('.t-message--error, .t-notification--error, [class*="error"], .el-message--error'); + return Array.from(msgs).map(el => el.textContent.trim()).filter(Boolean).join(' | '); + }""") + if error_text: + logger.error(f"页面错误提示: {error_text}") + except Exception: + pass + + # 检查是否有新打开的标签页(某些网站通过 window.open 下载) + try: + pages = page.context.pages + if len(pages) > 1: + logger.info(f"检测到 {len(pages)} 个标签页,检查新标签页...") + for p in pages[1:]: + url = p.url + logger.info(f"新标签页 URL: {url}") + if url.startswith('blob:') or 'download' in url.lower() or 'export' in url.lower(): + # 尝试从新标签页下载 + try: + async with p.expect_download(timeout=30000) as dl_info: + pass + download = await dl_info.value + filename = download.suggested_filename + save_path = os.path.join(self.download_dir, filename) + await download.save_as(save_path) + logger.info(f"从新标签页下载成功: {save_path}") + return save_path + except Exception: + pass + except Exception: + pass + + raise async def _set_date(self, page, input_box, date_str): """