SaleShow/app.py

from flask import Flask, render_template, request, jsonify, send_from_directory
import pandas as pd
import os
from werkzeug.utils import secure_filename
import json
from datetime import datetime
import glob
import time

app = Flask(__name__)
app.config['UPLOAD_FOLDER'] = 'uploads'
app.config['MAX_CONTENT_LENGTH'] = 16 * 1024 * 1024  # 16MB max file size

# 确保上传文件夹存在
if not os.path.exists(app.config['UPLOAD_FOLDER']):
    os.makedirs(app.config['UPLOAD_FOLDER'])

ALLOWED_EXTENSIONS = {'xlsx', 'xls'}

def allowed_file(filename):
    return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS

@app.route('/')
def index():
    return render_template('index.html')

@app.route('/files')
def get_files():
    """获取已上传的文件列表"""
    try:
        upload_folder = app.config['UPLOAD_FOLDER']
        if not os.path.exists(upload_folder):
            return jsonify({'files': []})

        files = []
        # 使用glob获取所有Excel文件
        all_files = glob.glob(os.path.join(upload_folder, '*.xlsx')) + glob.glob(os.path.join(upload_folder, '*.xls'))

        for filepath in all_files:
            if not os.path.isfile(filepath):
                continue

            filename = os.path.basename(filepath)
            # 解析时间戳和原始文件名
            try:
                # 尝试解析格式: YYYYMMDD_HHMMSS_original_filename
                parts = filename.split('_', 2)
                if len(parts) >= 3:
                    timestamp_str = parts[0] + parts[1]
                    upload_time = datetime.strptime(timestamp_str, '%Y%m%d%H%M%S')
                    original_name = parts[2]
                else:
                    upload_time = datetime.fromtimestamp(os.path.getmtime(filepath))
                    original_name = filename
            except:
                upload_time = datetime.fromtimestamp(os.path.getmtime(filepath))
                original_name = filename

            file_size = os.path.getsize(filepath)

            files.append({
                'filename': filename,
                'original_name': original_name,
                'upload_time': upload_time.strftime('%Y-%m-%d %H:%M:%S'),
                'file_size': file_size,
                'file_size_human': f"{file_size / 1024:.1f} KB" if file_size < 1024*1024 else f"{file_size / (1024*1024):.1f} MB"
            })

        files.sort(key=lambda x: x['upload_time'], reverse=True)
        return jsonify({'files': files})
    except Exception as e:
        return jsonify({'error': f'获取文件列表失败: {str(e)}'}), 500

@app.route('/load/<filename>')
def load_file(filename):
    """加载指定的文件"""
    try:
        upload_folder = app.config['UPLOAD_FOLDER']
        filepath = os.path.join(upload_folder, filename)

        if not os.path.exists(filepath):
            return jsonify({'error': '文件不存在'}), 404

        if not allowed_file(filename):
            return jsonify({'error': '不支持的文件格式'}), 400

        # 动态查找表头
        header_row = find_header_row(filepath)
        df = pd.read_excel(filepath, header=header_row)
        sales_data = process_sales_data(df)

        return jsonify({
            'success': True,
            'filename': filename,
            'data': sales_data
        })
    except Exception as e:
        return jsonify({'error': f'文件加载错误: {str(e)}'}), 500

@app.route('/upload', methods=['POST'])
def upload_file():
    if 'file' not in request.files:
        return jsonify({'error': '没有选择文件'}), 400

    file = request.files['file']
    if file.filename == '':
        return jsonify({'error': '没有选择文件'}), 400

    if file and allowed_file(file.filename):
        filename = secure_filename(file.filename)
        timestamp = datetime.now().strftime('%Y%m%d_%H%M%S_')
        filename = timestamp + filename
        filepath = os.path.join(app.config['UPLOAD_FOLDER'], filename)
        file.save(filepath)

        try:
            # 动态查找表头
            header_row = find_header_row(filepath)
            df = pd.read_excel(filepath, header=header_row)
            sales_data = process_sales_data(df)

            return jsonify({
                'success': True,
                'filename': filename,
                'data': sales_data
            })
        except Exception as e:
            return jsonify({'error': f'文件处理错误: {str(e)}'}), 500

    return jsonify({'error': '不支持的文件格式'}), 400

@app.route('/delete/<filename>', methods=['POST'])
def delete_file(filename):
    """删除指定的单个文件"""
    try:
        filename = secure_filename(filename)
        upload_folder = app.config['UPLOAD_FOLDER']
        filepath = os.path.join(upload_folder, filename)

        if os.path.exists(filepath):
            os.remove(filepath)
            return jsonify({
                'success': True,
                'message': f'文件 {filename} 已成功删除'
            })
        else:
            return jsonify({'error': '文件不存在'}), 404
    except Exception as e:
        return jsonify({'error': f'删除文件失败: {str(e)}'}), 500

@app.route('/cleanup', methods=['POST'])
def cleanup_files():
    """清理上传的文件（立即清理）"""
    try:
        upload_folder = app.config['UPLOAD_FOLDER']
        if not os.path.exists(upload_folder):
            return jsonify({'success': True, 'message': '无需清理'})

        files = os.listdir(upload_folder)
        deleted_count = 0
        errors = []

        for filename in files:
            file_path = os.path.join(upload_folder, filename)
            if os.path.isfile(file_path): # Remove extension check to clean everything
                try:
                    os.remove(file_path)
                    deleted_count += 1
                except PermissionError:
                    errors.append(f"{filename} 正在被占用，无法删除")
                except Exception as e:
                    errors.append(f"{filename} 删除失败: {str(e)}")

        message = f'成功清理 {deleted_count} 个文件'
        if errors:
            message += f'。主要错误: {"; ".join(errors[:3])}'
            if len(errors) > 3:
                message += f" 等共{len(errors)}个错误"

        return jsonify({
            'success': True,
            'message': message,
            'deleted_count': deleted_count
        })

    except Exception as e:
        return jsonify({'error': f'清理文件失败: {str(e)}'}), 500

def find_header_row(filepath):
    """查找表头所在的行索引"""
    try:
        # 只读取前20行，不带表头
        df_temp = pd.read_excel(filepath, header=None, nrows=20)

        keywords = ['时间', '日期', '商品', '品名', '数量', '金额', '总价', 'Date', 'Product', 'Qty', 'Amount']

        for index, row in df_temp.iterrows():
            # 将行转换为字符串列表
            row_str = " ".join([str(val) for val in row.values])
            # 统计包含的关键字数量
            match_count = sum(1 for keyword in keywords if keyword in row_str)

            # 如果一行包含至少2个关键字，认为是表头
            if match_count >= 2:
                return index
        return 0 # 默认第一行
    except:
        return 0

def process_sales_data(df):
    """处理销售数据，使用Pandas向量化操作"""
    try:
        # 1. 智能识别列名
        cols = df.columns.tolist()
        col_map = {}

        # 优先级关键字
        keywords = {
            'date': ['时间', '日期', 'Time', 'Date'],
            'product': ['商品', '品名', '详情', 'Product', 'Name', 'Description'],
            'quantity': ['数量', '件数', 'Quantity', 'Qty', 'Count'],
            'amount': ['金额', '总价', 'Amount', 'Price', 'Total'],
            'code': ['编码', '货号', '代码', 'Code', 'No']
        }

        for key, priority_words in keywords.items():
            for word in priority_words:
                found = False
                for col in cols:
                    if word in str(col):
                        col_map[key] = col
                        found = True
                        break # Found highest priority match
                if found: break

        # 兼容旧逻辑的后备方案：按索引
        if 'date' not in col_map and len(cols) > 1: col_map['date'] = cols[1]
        if 'product' not in col_map and len(cols) > 2: col_map['product'] = cols[2]
        if 'quantity' not in col_map and len(cols) > 3: col_map['quantity'] = cols[3]
        if 'amount' not in col_map and len(cols) > 4: col_map['amount'] = cols[4]
        if 'code' not in col_map and len(cols) > 0: col_map['code'] = cols[0]

        if not all(k in col_map for k in ['date', 'product', 'quantity', 'amount']):
             # 尝试更宽迷糊的匹配
             if len(cols) >= 5:
                  col_map['code'] = cols[0]
                  col_map['date'] = cols[1]
                  col_map['product'] = cols[2]
                  col_map['quantity'] = cols[3]
                  col_map['amount'] = cols[4]
             else:
                  raise Exception("无法识别必要的列（时间、商品、数量、金额），请检查Excel格式或列名")

        # 2. 重命名列以便处理
        rename_dict = {
            col_map['date']: 'date',
            col_map['product']: 'product',
            col_map['quantity']: 'quantity',
            col_map['amount']: 'amount'
        }
        if 'code' in col_map:
            rename_dict[col_map['code']] = 'code'

        df = df.rename(columns=rename_dict)

        # 确保code列存在
        if 'code' not in df.columns:
            df['code'] = ''

        # 3. 数据清洗和类型转换
        for col in ['quantity', 'amount']:
            df[col] = pd.to_numeric(df[col], errors='coerce').fillna(0)

        # 4. 日期处理
        df['parsed_date'] = pd.to_datetime(df['date'], errors='coerce')

        # 5. 基于"ID块"的解析逻辑 (State Machine)

        processed_data = []
        daily_summary = {}

        def get_day_entry(date_str):
            if date_str not in daily_summary:
                daily_summary[date_str] = {
                    'date': date_str,
                    'total_quantity': 0,
                    'total_amount': 0,
                    'products': [],
                    'summary_info': None
                }
            return daily_summary[date_str]

        # 状态变量
        current_context = {
            'date_str': None,
            'code': None,
            'header_quantity': 0, # Header行可能包含的总数（参考用）
            'header_amount': 0
        }

        for index, row in df.iterrows():
            # 检查当前行是否有ID (Code)
            has_code = pd.notna(row['code']) and str(row['code']).strip() != ''

            # 检查当前行是否有商品名
            has_product = pd.notna(row['product']) and str(row['product']).strip() != ''

            # --- 状态更新逻辑 ---
            if has_code:
                # 这是一个新的"块"的开始 (Header行)
                if pd.notna(row['parsed_date']):
                    current_context['date_str'] = row['parsed_date'].strftime('%Y-%m-%d %H:%M:%S')
                else:
                    # 如果有Code但没日期，可能沿用上一个？或者这还是同一个块？
                    # 假设Code行必须有日期，如果没有，可能是数据问题，这里保留上一个日期比较安全
                    pass

                current_context['code'] = str(row['code'])
                current_context['header_quantity'] = float(row['quantity'])
                current_context['header_amount'] = float(row['amount'])

                # 如果这个Header行本身没有任何明细行为（Quantity/Amount都在Header上），
                # 层级表里，Header只有总汇。扁平表里，Header也是明细。
                # 通过 has_product 区分：
                # 扁平表Row: Code(Yes) + Product(Yes)
                # 层级表Header: Code(Yes) + Product(No)

            # --- 记录处理逻辑 ---

            # 如果没有有效日期上下文，无法归档，跳过
            if not current_context['date_str']:
                continue

            date_str = current_context['date_str']
            code = current_context['code']

            if has_product:
                # -> 明细记录 (可能是扁平表的当前行，或者是层级表的子行)
                product_name = str(row['product']).strip()
                quantity = float(row['quantity'])
                amount = float(row['amount'])

                # 计算单价
                price = amount / quantity if quantity > 0 else 0

                # 更新统计
                entry = get_day_entry(date_str)
                entry['total_quantity'] += quantity
                entry['total_amount'] += amount

                product_info = {
                    'product': product_name,
                    'quantity': quantity,
                    'amount': amount,
                    'price': price,
                    'code': code
                }
                entry['products'].append(product_info)

                processed_data.append({
                    'date': date_str,
                    'product': product_name,
                    'quantity': quantity,
                    'amount': amount,
                    'price': price,
                    'is_summary': False,
                    'code': code
                })

            elif has_code and not has_product:
                 # -> 纯Header行 (层级表结构)
                 # 我们记录它的汇总信息作为参考，但不计入 daily_summary 的累加（除非完全没有明细）
                 # 但为了显示在列表中（如果需要显示总汇行），我们可以加一个特殊条目

                 # 只有当包含数值时才记录
                 if row['quantity'] > 0 or row['amount'] > 0:
                     entry = get_day_entry(date_str)
                     entry['summary_info'] = {
                        'total_quantity': float(row['quantity']),
                        'total_amount': float(row['amount']),
                        'code': code
                     }

                     processed_data.append({
                        'date': date_str,
                        'product': '【时间段总计】',
                        'quantity': float(row['quantity']),
                        'amount': float(row['amount']),
                        'is_summary': True,
                        'code': code
                    })

        # 后处理：如果某些天完全没有明细行，但有summary_info，则使用summary_info填充total
        for date_str, entry in daily_summary.items():
            if entry['total_quantity'] == 0 and entry['total_amount'] == 0 and entry['summary_info']:
                 entry['total_quantity'] = entry['summary_info']['total_quantity']
                 entry['total_amount'] = entry['summary_info']['total_amount']

        return {
            'columns': {
                'code': col_map.get('code', ''),
                'date': col_map['date'],
                'product': col_map['product'],
                'quantity': col_map['quantity'],
                'amount': col_map['amount']
            },
            'raw_data': processed_data,
            'daily_summary': list(daily_summary.values())
        }

    except Exception as e:
        import traceback
        traceback.print_exc()
        raise Exception(f"数据处理失败: {str(e)}")

if __name__ == '__main__':
    port = int(os.environ.get('PORT', 5000))
    # 生产环境建议关闭 debug
    debug_mode = os.environ.get('FLASK_DEBUG', 'True').lower() == 'true'
    app.run(debug=debug_mode, host='0.0.0.0', port=port)