96 lines
2.6 KiB
Python
96 lines
2.6 KiB
Python
import os
|
|
import re
|
|
import pandas as pd
|
|
|
|
pd.set_option('future.no_silent_downcasting', True)
|
|
|
|
INPUT_DIR = "input_files"
|
|
OUTPUT_DIR = "output_files"
|
|
|
|
|
|
def clean_and_convert(value):
|
|
if isinstance(value, str):
|
|
value = re.sub(r'[\n\r\\/]+', ' ', value)
|
|
value = re.sub(r'\s+', ' ', value).strip()
|
|
try:
|
|
num = float(value)
|
|
if num.is_integer():
|
|
return int(num)
|
|
return num
|
|
except ValueError:
|
|
return value
|
|
return value
|
|
|
|
|
|
def deduplicate_columns(df):
|
|
cols = df.columns.tolist()
|
|
counts = {}
|
|
new_cols = []
|
|
for col in cols:
|
|
if col in counts:
|
|
counts[col] += 1
|
|
new_cols.append(f"{col}.{counts[col]}")
|
|
else:
|
|
counts[col] = 0
|
|
new_cols.append(col)
|
|
df.columns = new_cols
|
|
return df
|
|
|
|
|
|
def process_batch():
|
|
print(f"BATCH CONVERT: {INPUT_DIR} -> {OUTPUT_DIR}")
|
|
|
|
if not os.path.exists(INPUT_DIR):
|
|
os.makedirs(INPUT_DIR)
|
|
print(f"Da tao thu muc '{INPUT_DIR}'. Hay copy file Excel vao do va chay lai!")
|
|
return
|
|
|
|
os.makedirs(OUTPUT_DIR, exist_ok=True)
|
|
|
|
files = [f for f in os.listdir(INPUT_DIR) if f.endswith('.xlsx') and not f.startswith('~$')]
|
|
|
|
if not files:
|
|
print(f"Thu muc '{INPUT_DIR}' dang trong.")
|
|
return
|
|
|
|
print(f"Tim thay {len(files)} file. Bat dau xu ly...")
|
|
|
|
for filename in files:
|
|
input_path = os.path.join(INPUT_DIR, filename)
|
|
base_name = os.path.splitext(filename)[0]
|
|
output_path = os.path.join(OUTPUT_DIR, f"{base_name}.json")
|
|
|
|
try:
|
|
print(f"Dang xu ly: {filename} ...", end=" ")
|
|
|
|
df = pd.read_excel(
|
|
input_path,
|
|
sheet_name="Khối lượng công việc chi tiết",
|
|
header=2,
|
|
dtype=object,
|
|
engine='openpyxl'
|
|
)
|
|
|
|
df = df.iloc[2:].reset_index(drop=True)
|
|
df.columns = [str(clean_and_convert(col)) for col in df.columns]
|
|
df = deduplicate_columns(df)
|
|
df = df.map(clean_and_convert)
|
|
df = df.fillna(0)
|
|
|
|
df.to_json(output_path, orient='records', force_ascii=False, indent=4)
|
|
print("Xong")
|
|
|
|
except ValueError as ve:
|
|
if "Worksheet" in str(ve):
|
|
print("Loi: Khong co sheet 'Khối lượng công việc chi tiết'")
|
|
else:
|
|
print(f"Loi: {ve}")
|
|
except Exception as e:
|
|
print(f"Loi: {e}")
|
|
|
|
print("-" * 50)
|
|
print(f"Hoan tat! Kiem tra ket qua tai: {os.path.abspath(OUTPUT_DIR)}")
|
|
|
|
|
|
if __name__ == "__main__":
|
|
process_batch() |