import pandas as pd

# 读取数据
df = pd.read_csv('input_data/input_firm_data/firm_amended.csv')  # 替换为你的 CSV 文件路径

# 要分析的列
columns = [
    "固定资产原值（万元人民币）",
    "固定资产净值（万元人民币）",
    "资产总和（万元人民币）",
    "存货（万元人民币）"
]

# 字段类型定义（可人工定义，也可自动判断）
column_types = {
    "固定资产原值（万元人民币）": "连续型",
    "固定资产净值（万元人民币）": "连续型",
    "资产总和（万元人民币）": "连续型",
    "存货（万元人民币）": "连续型"
}

# 统计分析
summary = []

for col in columns:
    data = df[col].dropna()
    summary.append({
        "字段名": col,
        "类型": column_types[col],
        "计数（非空）": data.count(),
        "均值": data.mean(),
        "标准差": data.std(),
        "最小值": data.min(),
        "中位数": data.median(),
        "最大值": data.max()
    })

# 转为 DataFrame 展示
summary_df = pd.DataFrame(summary)

# 设置列顺序
summary_df = summary_df[["字段名", "类型", "计数（非空）", "均值", "标准差", "最小值", "中位数", "最大值"]]

# 打印结果
print(summary_df)
# 保存为 Excel 文件
output_path = "企业规模数据描述性统计表.xlsx"
summary_df.to_excel(output_path, index=False)

print(f"统计结果已保存为 Excel 文件：{output_path}")