diff --git a/models/best_model_gradient_boosting.pkl b/models/best_model_gradient_boosting.pkl deleted file mode 100644 index 6f6aec6..0000000 Binary files a/models/best_model_gradient_boosting.pkl and /dev/null differ diff --git a/models/columns_to_scale.txt b/models/columns_to_scale.txt deleted file mode 100644 index 9af1b7e..0000000 --- a/models/columns_to_scale.txt +++ /dev/null @@ -1,49 +0,0 @@ -so_ca_cua_toa -num_tasks -num_cleaning_tasks -num_trash_collection_tasks -num_monitoring_tasks -num_room_cleaning_tasks -num_deep_cleaning_tasks -num_maintenance_tasks -num_support_tasks -num_other_tasks -num_wc_tasks -num_hallway_tasks -num_lobby_tasks -num_patient_room_tasks -num_clinic_room_tasks -num_surgery_room_tasks -num_outdoor_tasks -num_elevator_tasks -num_office_tasks -num_technical_room_tasks -cleaning_ratio -trash_collection_ratio -monitoring_ratio -room_cleaning_ratio -area_diversity -task_complexity_score -so_tang -so_cua_thang_may -dien_tich_ngoai_canh -dien_tich_sanh -dien_tich_hanh_lang -dien_tich_wc -dien_tich_phong -dien_tich_tham -doc_ham -vien_phan_quang -op_tuong -op_chan_tuong -ranh_thoat_nuoc -dien_tich_kinh -hour_start -hour_end -work_hours_numeric -tasks_per_hour -tasks_per_floor -wc_per_floor -cleaning_workload -total_area -area_per_floor \ No newline at end of file diff --git a/models/feature_selection_results.csv b/models/feature_selection_results.csv deleted file mode 100644 index 96661a8..0000000 --- a/models/feature_selection_results.csv +++ /dev/null @@ -1,22 +0,0 @@ -num_features,feature_set,model,val_mae,val_r2 -49,All Features,Gradient Boosting,3.634581261520732,-0.012930925436683394 -20,Top 20,Random Forest,3.666060757466078,0.03739430732677618 -20,Top 20,Gradient Boosting,3.7771892175715056,-0.06012882358872096 -20,Top 20,XGBoost,3.641647574774335,0.015787720680236816 -20,Top 20,LightGBM,3.748396263264872,0.049515999401856026 -25,Top 25,Random Forest,3.614868221684315,0.06809266983693418 -25,Top 25,Gradient Boosting,3.780284523284983,-0.05195262322328653 -25,Top 25,XGBoost,3.760102046004842,-0.01795041561126709 -25,Top 25,LightGBM,3.7433780829414327,0.04227330886423497 -30,Top 30,Random Forest,3.6134273351492454,0.0712211389609152 -30,Top 30,Gradient Boosting,3.8167183021556417,-0.08445111108289871 -30,Top 30,XGBoost,3.8263845290528256,-0.036774635314941406 -30,Top 30,LightGBM,3.6913964071303145,0.0628657702857146 -35,Top 35,Random Forest,3.6403561822073054,0.07301240475906845 -35,Top 35,Gradient Boosting,3.91511098479336,-0.11251667765570872 -35,Top 35,XGBoost,3.884480058477166,-0.052086710929870605 -35,Top 35,LightGBM,3.8490180856510774,0.0009015985532334625 -40,Top 40,Random Forest,3.6265115302924293,0.07854970083459945 -40,Top 40,Gradient Boosting,3.8841932753369783,-0.08916849029447449 -40,Top 40,XGBoost,3.9312105964911117,-0.07247793674468994 -40,Top 40,LightGBM,3.747326913269243,0.029446228984498002 diff --git a/models/final_model_random_forest_top15.pkl b/models/final_model_random_forest_top15.pkl deleted file mode 100644 index 66ed4c8..0000000 Binary files a/models/final_model_random_forest_top15.pkl and /dev/null differ diff --git a/models/final_model_random_forest_top30.pkl b/models/final_model_random_forest_top30.pkl deleted file mode 100644 index 44b9770..0000000 Binary files a/models/final_model_random_forest_top30.pkl and /dev/null differ diff --git a/models/kfold_cv_results.csv b/models/kfold_cv_results.csv deleted file mode 100644 index 40d1704..0000000 --- a/models/kfold_cv_results.csv +++ /dev/null @@ -1,7 +0,0 @@ -Model,Mean_MAE,Std_MAE,Mean_R2,Std_R2 -Gradient Boosting,3.3027367118164057,0.8887355706131049,-0.10233682335888679,0.5154478192871355 -XGBoost,3.323695227191383,0.7236534911368803,-0.14202730655670165,0.5722418063476958 -Random Forest,3.352265185800016,0.8553895357341633,-0.06523395173555772,0.35068316384382126 -LightGBM,3.401600572401869,0.8548539009927945,-0.006448200247211444,0.24861511037048697 -Linear Regression,3.765361427841345,0.8860772035556437,-0.17797005506788818,0.5737173906817382 -Decision Tree,3.8242160456852985,0.8261306554174034,-0.7565366143260073,1.4465792608602213 diff --git a/models/model_comparison.csv b/models/model_comparison.csv deleted file mode 100644 index 8e697ff..0000000 --- a/models/model_comparison.csv +++ /dev/null @@ -1,8 +0,0 @@ -Model,Train_MAE,Train_RMSE,Train_R2,Val_MAE,Val_RMSE,Val_R2 -Mean Baseline,3.956762233641091,6.008718072893705,0.0,5.108172961558765,9.743328619259394,0.0 -Linear Regression,2.8487431324413386,4.306219455232728,0.48639568660771626,4.93124960105569,10.131361272284664,-0.0820337223062475 -Decision Tree,1.4165098388571058,2.927374540160555,0.7626479824388336,3.384117292326248,7.458279107629457,0.41361553592413347 -Random Forest,1.5188249096595474,2.651411767397892,0.8052889029203236,3.557118610267275,8.094827409344068,0.3092508183789415 -Gradient Boosting,0.5002223075020469,1.163143548193567,0.9625283361855776,3.0356483741790528,7.556467123302565,0.39807443890459426 -XGBoost,0.5888747572898865,1.2343001041144357,0.9578033685684204,3.2599263191223145,8.016751870107681,0.3225112557411194 -LightGBM,1.527799546634308,2.6791994654613362,0.8011862391495337,3.602681095009421,8.17893904810045,0.29482138206791697 diff --git a/models/scaler.pkl b/models/scaler.pkl deleted file mode 100644 index d2568b6..0000000 Binary files a/models/scaler.pkl and /dev/null differ diff --git a/models/scaler_final.pkl b/models/scaler_final.pkl deleted file mode 100644 index 1666012..0000000 Binary files a/models/scaler_final.pkl and /dev/null differ diff --git a/models/selected_features_top15.txt b/models/selected_features_top15.txt deleted file mode 100644 index 1b5dae1..0000000 --- a/models/selected_features_top15.txt +++ /dev/null @@ -1,15 +0,0 @@ -op_chan_tuong -loai_hinh -so_cua_thang_may -num_hallway_tasks -cleaning_ratio -op_tuong -area_diversity -num_office_tasks -tong_gio_lam -num_tasks -dien_tich_ngoai_canh -num_clinic_room_tasks -doc_ham -num_elevator_tasks -num_maintenance_tasks \ No newline at end of file diff --git a/models/selected_features_top30.txt b/models/selected_features_top30.txt deleted file mode 100644 index 6010c46..0000000 --- a/models/selected_features_top30.txt +++ /dev/null @@ -1,30 +0,0 @@ -dien_tich_phong -tasks_per_hour -num_trash_collection_tasks -num_surgery_room_tasks -dien_tich_hanh_lang -area_diversity -num_lobby_tasks -hour_end -dien_tich_ngoai_canh -monitoring_ratio -num_patient_room_tasks -num_monitoring_tasks -tasks_per_floor -work_hours_numeric -trash_collection_ratio -dien_tich_tham -so_tang -so_ca_cua_toa -dien_tich_wc -num_wc_tasks -dien_tich_kinh -num_elevator_tasks -num_office_tasks -hour_start -cleaning_ratio -num_technical_room_tasks -ranh_thoat_nuoc -num_room_cleaning_tasks -num_deep_cleaning_tasks -num_tasks \ No newline at end of file diff --git a/models/training_report.json b/models/training_report.json deleted file mode 100644 index 5034dcf..0000000 --- a/models/training_report.json +++ /dev/null @@ -1,77 +0,0 @@ -{ - "best_model": "Gradient Boosting", - "test_mae": 2.715735914594871, - "test_rmse": 4.8725908385837755, - "test_r2": 0.47268268124245805, - "val_mae": 2.9036818323527176, - "val_r2": 0.14702091178419308, - "train_size": 304, - "val_size": 66, - "test_size": 66, - "n_features": 63, - "feature_names": [ - "so_ca_cua_toa", - "num_tasks", - "num_cleaning_tasks", - "num_trash_collection_tasks", - "num_monitoring_tasks", - "num_room_cleaning_tasks", - "num_deep_cleaning_tasks", - "num_maintenance_tasks", - "num_support_tasks", - "num_other_tasks", - "num_wc_tasks", - "num_hallway_tasks", - "num_lobby_tasks", - "num_patient_room_tasks", - "num_clinic_room_tasks", - "num_surgery_room_tasks", - "num_outdoor_tasks", - "num_elevator_tasks", - "num_office_tasks", - "num_technical_room_tasks", - "cleaning_ratio", - "trash_collection_ratio", - "monitoring_ratio", - "room_cleaning_ratio", - "area_diversity", - "task_complexity_score", - "loai_hinh", - "muc_do_luu_luong", - "so_tang", - "so_cua_thang_may", - "dien_tich_ngoai_canh", - "dien_tich_sanh", - "dien_tich_hanh_lang", - "dien_tich_wc", - "dien_tich_phong", - "dien_tich_tham", - "doc_ham", - "vien_phan_quang", - "op_tuong", - "op_chan_tuong", - "ranh_thoat_nuoc", - "dien_tich_kinh", - "hour_start", - "hour_end", - "work_hours_numeric", - "is_morning_shift", - "is_afternoon_shift", - "is_evening_shift", - "is_night_shift", - "tasks_per_hour", - "tasks_per_floor", - "wc_per_floor", - "cleaning_workload", - "total_area", - "area_per_floor", - "has_special_areas", - "loai_ca_24/24", - "loai_ca_Ca chiều", - "loai_ca_Ca gãy", - "loai_ca_Ca sáng", - "loai_ca_Ca đêm", - "loai_ca_Hành chính", - "loai_ca_Part time" - ] -} \ No newline at end of file diff --git a/models/training_report_final.json b/models/training_report_final.json deleted file mode 100644 index ca41da4..0000000 --- a/models/training_report_final.json +++ /dev/null @@ -1,104 +0,0 @@ -{ - "timestamp": "2026-01-08 17:59:10", - "model_name": "Random Forest", - "num_features": 30, - "total_features_available": 49, - "training_samples": 356, - "test_samples": 89, - "test_mae": 2.959679790870802, - "test_mse": 22.52809628510788, - "test_rmse": 4.74637717476265, - "test_r2": 0.4146924609040473, - "baseline_val_mae": 3.634581261520732, - "improvement_vs_baseline": 0.6749014706499299, - "improvement_pct": 18.568892042533307, - "k_fold_cv": "Completed (5-fold)", - "feature_selection_methods": [ - "Random Forest", - "Mutual Information", - "Correlation" - ], - "selected_features": [ - "dien_tich_phong", - "tasks_per_hour", - "num_trash_collection_tasks", - "num_surgery_room_tasks", - "dien_tich_hanh_lang", - "area_diversity", - "num_lobby_tasks", - "hour_end", - "dien_tich_ngoai_canh", - "monitoring_ratio", - "num_patient_room_tasks", - "num_monitoring_tasks", - "tasks_per_floor", - "work_hours_numeric", - "trash_collection_ratio", - "dien_tich_tham", - "so_tang", - "so_ca_cua_toa", - "dien_tich_wc", - "num_wc_tasks", - "dien_tich_kinh", - "num_elevator_tasks", - "num_office_tasks", - "hour_start", - "cleaning_ratio", - "num_technical_room_tasks", - "ranh_thoat_nuoc", - "num_room_cleaning_tasks", - "num_deep_cleaning_tasks", - "num_tasks" - ], - "cols_to_scale": [ - "so_ca_cua_toa", - "num_tasks", - "num_cleaning_tasks", - "num_trash_collection_tasks", - "num_monitoring_tasks", - "num_room_cleaning_tasks", - "num_deep_cleaning_tasks", - "num_maintenance_tasks", - "num_support_tasks", - "num_other_tasks", - "num_wc_tasks", - "num_hallway_tasks", - "num_lobby_tasks", - "num_patient_room_tasks", - "num_clinic_room_tasks", - "num_surgery_room_tasks", - "num_outdoor_tasks", - "num_elevator_tasks", - "num_office_tasks", - "num_technical_room_tasks", - "cleaning_ratio", - "trash_collection_ratio", - "monitoring_ratio", - "room_cleaning_ratio", - "area_diversity", - "task_complexity_score", - "so_tang", - "so_cua_thang_may", - "dien_tich_ngoai_canh", - "dien_tich_sanh", - "dien_tich_hanh_lang", - "dien_tich_wc", - "dien_tich_phong", - "dien_tich_tham", - "doc_ham", - "vien_phan_quang", - "op_tuong", - "op_chan_tuong", - "ranh_thoat_nuoc", - "dien_tich_kinh", - "hour_start", - "hour_end", - "work_hours_numeric", - "tasks_per_hour", - "tasks_per_floor", - "wc_per_floor", - "cleaning_workload", - "total_area", - "area_per_floor" - ] -} \ No newline at end of file