37 lines
1.6 KiB
Python
37 lines
1.6 KiB
Python
import pandas as pd
|
|
import random
|
|
import numpy as np
|
|
|
|
|
|
# 生成170条测试数据的函数
|
|
def generate_test_data(num_rows=170):
|
|
data = {
|
|
'Company ID': [i for i in range(1, num_rows + 1)], # 生成1到170的公司ID
|
|
'Company Name': [f'Company_{i}' for i in range(1, num_rows + 1)], # 生成公司名称
|
|
'原材料': [round(random.uniform(100, 1000), 2) for _ in range(num_rows)], # 原材料
|
|
'库存商品': [round(random.uniform(100, 1000), 2) for _ in range(num_rows)], # 库存商品
|
|
'固定资产原值': [round(random.uniform(100, 1000), 2) for _ in range(num_rows)], # 固定资产原值
|
|
'Revenue': [round(random.uniform(10000, 100000), 2) for _ in range(num_rows)], # Revenue
|
|
'Total Employees (People)': [random.randint(50, 1000) for _ in range(num_rows)], # 员工总数
|
|
'Type_Region': [random.choice(['Urban', 'Rural', 'Suburban']) for _ in range(num_rows)], # 区域类型
|
|
'Self-supply Business (Yes/No)': [random.choice(['Yes', 'No']) for _ in range(num_rows)] # 自营业务
|
|
}
|
|
|
|
df = pd.DataFrame(data)
|
|
|
|
# 添加Revenue_Log列
|
|
df['Revenue_Log'] = np.log(df['Revenue'])
|
|
df['production_output'] = df['固定资产原值'] / 10+np.random.randint(100, 500, size=len(df))
|
|
df['demand_quantity'] = df['原材料'] / 10 +np.random.randint(100, 500, size=len(df))
|
|
return df
|
|
|
|
|
|
# 生成数据
|
|
df_test_data = generate_test_data()
|
|
|
|
# 显示前几行
|
|
print(df_test_data.head())
|
|
|
|
# 保存数据到CSV文件
|
|
df_test_data.to_csv('input_data/测试 Firm_amended 170.csv', index=False)
|