NhansuperCa/test_full_system.py

"""
Script demo đầy đủ - Test các chức năng của hệ thống
"""

from predict_staff import StaffPredictor
import pandas as pd

def test_all_features():
    """Test tất cả các chức năng"""

    print("="*70)
    print("🧪 KIỂM TRA TOÀN BỘ HỆ THỐNG")
    print("="*70)

    # 1. Khởi tạo và load data
    print("\n1️⃣ KHỞI TẠO VÀ LOAD DỮ LIỆU")
    print("-"*70)
    predictor = StaffPredictor()
    building_df, shift_df = predictor.load_data()

    # 2. Chuẩn bị features
    print("\n2️⃣ CHUẨN BỊ FEATURES")
    print("-"*70)
    X, y, df_full = predictor.prepare_features()

    # 3. Train model
    print("\n3️⃣ TRAIN MODEL")
    print("-"*70)
    results = predictor.train_model(X, y, model_type='random_forest')

    # 4. Feature importance
    print("\n4️⃣ FEATURE IMPORTANCE")
    print("-"*70)
    predictor.show_feature_importance()

    # 5. Test dự đoán cho TẤT CẢ các tòa nhà
    print("\n5️⃣ DỰ ĐOÁN CHO TẤT CẢ TÒA NHÀ")
    print("-"*70)

    available_buildings = building_df['Mã địa điểm'].tolist()
    common_shifts = [
        '6h00:14h00',
        '14h00:22h00',
        '6h30-14h30',
        '13h00-21h00',
        '07:00-17:00',
        '8:00-17:00'
    ]

    all_predictions = []

    for building in available_buildings:
        print(f"\n🏢 Tòa nhà: {building}")
        for shift in common_shifts:
            try:
                pred = predictor.predict_staff(building, shift)
                print(f"  ⏰ {shift:20s} → 👥 {pred:2d} người")
                all_predictions.append({
                    'Mã địa điểm': building,
                    'Ca làm việc': shift,
                    'Số nhân sự dự đoán': pred
                })
            except Exception as e:
                print(f"  ⏰ {shift:20s} → ❌ Lỗi: {e}")

    # 6. Tạo báo cáo tổng hợp
    print("\n6️⃣ BÁO CÁO TỔNG HỢP")
    print("-"*70)

    pred_df = pd.DataFrame(all_predictions)

    # Thống kê theo tòa nhà
    print("\n📊 Tổng nhân sự cần thiết cho mỗi tòa (tất cả các ca):")
    building_stats = pred_df.groupby('Mã địa điểm')['Số nhân sự dự đoán'].agg([
        ('Tổng', 'sum'),
        ('Trung bình', 'mean'),
        ('Min', 'min'),
        ('Max', 'max')
    ]).round(1)
    print(building_stats)

    # Thống kê theo ca
    print("\n📊 Tổng nhân sự cần thiết cho mỗi ca (tất cả các tòa):")
    shift_stats = pred_df.groupby('Ca làm việc')['Số nhân sự dự đoán'].agg([
        ('Tổng', 'sum'),
        ('Trung bình', 'mean'),
        ('Min', 'min'),
        ('Max', 'max')
    ]).round(1)
    print(shift_stats)

    # 7. So sánh với dữ liệu thực tế
    print("\n7️⃣ SO SÁNH VỚI DỮ LIỆU THỰC TẾ")
    print("-"*70)

    # Lấy một số mẫu từ dữ liệu thực
    sample_real = shift_df.head(10)

    print(f"\n{'Mã địa điểm':15s} {'Ca làm việc':25s} {'Thực tế':10s} {'Dự đoán':10s} {'Chênh lệch':12s}")
    print("-"*75)

    for idx, row in sample_real.iterrows():
        building = row['Mã địa điểm']
        shift = row['Ca']
        real_staff = row['Number']

        try:
            pred_staff = predictor.predict_staff(building, shift)
            diff = pred_staff - real_staff
            diff_pct = (diff / real_staff * 100) if real_staff > 0 else 0

            print(f"{building:15s} {shift:25s} {real_staff:10d} {pred_staff:10d} "
                  f"{diff:+5d} ({diff_pct:+.1f}%)")
        except:
            print(f"{building:15s} {shift:25s} {real_staff:10d} {'ERROR':10s}")

    # 8. Xuất kết quả
    print("\n8️⃣ XUẤT KẾT QUẢ")
    print("-"*70)

    output_file = 'ket_qua_du_doan_day_du.csv'
    pred_df.to_csv(output_file, index=False, encoding='utf-8-sig')
    print(f"✅ Đã xuất {len(pred_df)} dự đoán ra file: {output_file}")

    # Tạo pivot table
    pivot_df = pred_df.pivot_table(
        index='Mã địa điểm',
        columns='Ca làm việc',
        values='Số nhân sự dự đoán',
        aggfunc='mean'
    ).round(0)

    pivot_file = 'bang_nhan_su_theo_toa_ca.csv'
    pivot_df.to_csv(pivot_file, encoding='utf-8-sig')
    print(f"✅ Đã xuất ma trận pivot ra file: {pivot_file}")

    print("\n" + "="*70)
    print("✅ HOÀN THÀNH KIỂM TRA TOÀN BỘ HỆ THỐNG!")
    print("="*70)

    return predictor, pred_df


if __name__ == "__main__":
    predictor, predictions = test_all_features()

    print("\n💡 Bạn có thể sử dụng predictor để dự đoán:")
    print("   predictor.predict_staff('559-1', '14h00:22h00')")
    print("\n💡 Xem kết quả dự đoán:")
    print("   predictions.head()")