50 lines
1.4 KiB
Python
50 lines
1.4 KiB
Python
import pandas as pd
|
|
|
|
# 读取数据
|
|
df = pd.read_csv('input_data/input_firm_data/firm_amended.csv') # 替换为你的 CSV 文件路径
|
|
|
|
# 要分析的列
|
|
columns = [
|
|
"固定资产原值(万元人民币)",
|
|
"固定资产净值(万元人民币)",
|
|
"资产总和(万元人民币)",
|
|
"存货(万元人民币)"
|
|
]
|
|
|
|
# 字段类型定义(可人工定义,也可自动判断)
|
|
column_types = {
|
|
"固定资产原值(万元人民币)": "连续型",
|
|
"固定资产净值(万元人民币)": "连续型",
|
|
"资产总和(万元人民币)": "连续型",
|
|
"存货(万元人民币)": "连续型"
|
|
}
|
|
|
|
# 统计分析
|
|
summary = []
|
|
|
|
for col in columns:
|
|
data = df[col].dropna()
|
|
summary.append({
|
|
"字段名": col,
|
|
"类型": column_types[col],
|
|
"计数(非空)": data.count(),
|
|
"均值": data.mean(),
|
|
"标准差": data.std(),
|
|
"最小值": data.min(),
|
|
"中位数": data.median(),
|
|
"最大值": data.max()
|
|
})
|
|
|
|
# 转为 DataFrame 展示
|
|
summary_df = pd.DataFrame(summary)
|
|
|
|
# 设置列顺序
|
|
summary_df = summary_df[["字段名", "类型", "计数(非空)", "均值", "标准差", "最小值", "中位数", "最大值"]]
|
|
|
|
# 打印结果
|
|
print(summary_df)
|
|
# 保存为 Excel 文件
|
|
output_path = "企业规模数据描述性统计表.xlsx"
|
|
summary_df.to_excel(output_path, index=False)
|
|
|
|
print(f"统计结果已保存为 Excel 文件:{output_path}") |