debug average vs max

This commit is contained in:
AgentLabCn
2025-12-01 20:05:31 +08:00
parent d86e16e3a3
commit 135b762a29
7 changed files with 212 additions and 227 deletions

View File

@@ -89,7 +89,7 @@ class SimulationModel(Model):
self.cumulative_production = 0
self.monthly_totals = {}
self.production_log = []
self.mean_abs_error = 0.0
self.error = 0.0
# Allow explicit product_set override; otherwise fall back to config.
self.product_set = self._parse_product_set(product_set if product_set is not None else cfg.get("product_set"))
self.output_enabled = output_enabled
@@ -156,7 +156,7 @@ class SimulationModel(Model):
{
"cumulative_production": lambda m: m.cumulative_production,
"monthly_total": lambda m: m.monthly_totals.get(m.current_month, 0),
"mean_abs_error": lambda m: m.mean_abs_error,
"error": lambda m: m.error,
"fulfill_pct_overall": lambda m: m.overall_fulfill_pct,
"transport_cost": lambda m: m.monthly_transport_cost,
}
@@ -218,20 +218,9 @@ class SimulationModel(Model):
use_max: bool = True,
) -> dict:
"""
Compute error metrics based on cumulative monthly deviation.
For each factory:
- cumulative monthly production vs benchmark
- deviation ratio per month (can be negative)
- factory error = max or mean absolute deviation ratio across 12 months
Returns:
{
"mean_abs_error": float,
"factory_max_ratio": {factory_name: value},
"factory_cum_ratio": {factory_name: pd.Series of length 12 (ratios)}
}
Compute cumulative deviation ratios per factory (no aggregation).
Ratios are unitless (e.g., 0.02 == 2%).
"""
errors = []
factory_max_ratio = {}
factory_cum_ratio = {}
month_cols = [f"{m}" for m in range(1, 13)]
@@ -250,14 +239,10 @@ class SimulationModel(Model):
if actual_total <= 0:
continue
pct_ratio = (prod_cum - bench_cum) / actual_total # signed ratio
agg_val = pct_ratio.abs().max() if use_max else pct_ratio.abs().mean()
errors.append(float(agg_val))
factory_max_ratio[fname] = float(agg_val)
factory_cum_ratio[fname] = pct_ratio
factory_max_ratio[fname] = float(pct_ratio.abs().max())
mean_abs_error = float("inf") if not errors else sum(errors) / len(errors)
return {
"mean_abs_error": mean_abs_error,
"factory_max_ratio": factory_max_ratio,
"factory_cum_ratio": factory_cum_ratio,
}
@@ -864,7 +849,7 @@ class SimulationModel(Model):
def _finalize_factory_errors(self, write_files: bool):
if not self.production_log:
self.mean_abs_error = float("inf")
self.error = float("inf")
return
df = pd.DataFrame(self.production_log)
factory_pivot = (
@@ -887,18 +872,33 @@ class SimulationModel(Model):
benchmark = pd.read_csv(f"data/{filename}/benchmark.csv", encoding="utf-8-sig")
except UnicodeDecodeError:
benchmark = pd.read_csv(f"data/{filename}/benchmark.csv", encoding="gbk")
benchmark = benchmark.rename(columns=lambda c: str(c).strip())
month_cols = [col for col in benchmark.columns if str(col).strip().endswith("") and str(col).strip()[:-1].isdigit()]
month_cols = sorted(month_cols, key=lambda x: int(str(x).strip()[:-1]))
if len(month_cols) < 12:
raise KeyError(f"benchmark.csv 缺少月份列,找到: {month_cols}")
benchmark_sorted = benchmark.sort_values(by=benchmark.columns[0])
total_col = "总计"
month_cols = [f"{m}" for m in range(1, 13)]
if total_col not in benchmark_sorted.columns:
benchmark_sorted[total_col] = benchmark_sorted[month_cols].sum(axis=1)
stats = self._compute_factory_error_stats(factory_pivot, benchmark_sorted, total_col=total_col, use_max=self.use_error_max)
cum_pct_by_factory = {k: v * 100.0 for k, v in stats["factory_cum_ratio"].items()} # to %
if stats["mean_abs_error"] == float("inf"):
self.mean_abs_error = float("inf")
return
self.mean_abs_error = stats["mean_abs_error"]
stats = self._compute_factory_error_stats(
factory_pivot,
benchmark_sorted,
total_col=total_col,
use_max=self.use_error_max,
)
cum_ratio_by_factory = stats["factory_cum_ratio"]
# Aggregate factory-level errors (mean across factories of max/mean per-month absolute ratio)
factory_errors = []
for ratio_series in cum_ratio_by_factory.values():
metric_val = ratio_series.abs().max() if self.use_error_max else ratio_series.abs().mean()
factory_errors.append(float(metric_val))
if not factory_errors:
self.error = float("inf")
else:
# Aggregate across factories: max when use_error_max=True, otherwise mean.
self.error = max(factory_errors) if self.use_error_max else sum(factory_errors) / len(factory_errors)
if not write_files:
return
@@ -909,28 +909,30 @@ class SimulationModel(Model):
factory_report_path = os.path.join(output_dir, f"factory_report_{timestamp}.csv")
factory_pivot.to_csv(factory_report_path, index=False, encoding="utf-8-sig")
# Error summary based on cumulative deviation percentage (max abs per factory)
# Error summary based on cumulative deviation ratio (unitless, consistent with GA)
err_rows = []
for fname, pct_series in cum_pct_by_factory.items():
max_abs_pct = pct_series.abs().max() if self.use_error_max else pct_series.abs().mean()
err_rows.append({"工厂名称": fname, "最大累积偏差[%]" if self.use_error_max else "平均累积偏差[%]": float(max_abs_pct)})
error_df = pd.DataFrame(err_rows)
metric_col = "最大累计偏差" if self.use_error_max else "平均累计偏差"
for fname, ratio_series in cum_ratio_by_factory.items():
metric_val = ratio_series.abs().max() if self.use_error_max else ratio_series.abs().mean()
err_rows.append({"工厂名称": fname, metric_col: float(metric_val)})
error_df = pd.DataFrame(err_rows) if err_rows else pd.DataFrame(columns=["工厂名称", metric_col])
error_path = os.path.join(output_dir, f"factory_error_{timestamp}.csv")
error_df.to_csv(error_path, index=False, encoding="utf-8-sig")
# Error bar charts by month using cumulative percentage deviations
if cum_pct_by_factory:
if cum_ratio_by_factory:
month_labels = [f"{m}" for m in range(1, 13)]
month_pct_df = pd.DataFrame(
{fname: pct.reset_index(drop=True) for fname, pct in cum_pct_by_factory.items()}
month_ratio_df = pd.DataFrame(
{fname: pct.reset_index(drop=True) for fname, pct in cum_ratio_by_factory.items()}
).transpose()
month_abs_means = month_pct_df.abs().mean(axis=0)
month_abs_means = month_ratio_df.abs().mean(axis=0)
month_abs_means.index = month_labels[: len(month_abs_means)]
month_abs_means_pct = month_abs_means * 100
plt.figure(figsize=(10, 5))
month_abs_means.plot(kind="bar")
plt.ylabel("月度累偏差平均值[%]")
plt.title("按月份的累偏差(取绝对值后平均)")
month_abs_means_pct.plot(kind="bar")
plt.ylabel("月度累偏差平均值[%]")
plt.title("按月份的累偏差(取绝对值后平均)")
plt.tight_layout()
month_plot_path = os.path.join(output_dir, f"error_by_month_{timestamp}.png")
plt.savefig(month_plot_path)
@@ -938,18 +940,20 @@ class SimulationModel(Model):
factory_df = pd.DataFrame(
{
"name": list(cum_pct_by_factory.keys()),
"error_pct": [
(pct.abs().max() if self.use_error_max else pct.abs().mean()) for pct in cum_pct_by_factory.values()
"name": list(cum_ratio_by_factory.keys()),
"error_ratio": [
(pct.abs().max() if self.use_error_max else pct.abs().mean()) for pct in cum_ratio_by_factory.values()
],
}
).sort_values(by="error_pct")
)
factory_df = factory_df.sort_values(by="error_ratio")
factory_df["error_pct"] = factory_df["error_ratio"] * 100
self.factory_error_df = factory_df.reset_index(drop=True)
plt.figure(figsize=(12, 6))
ax = factory_df["error_pct"].reset_index(drop=True).plot(kind="bar")
metric_label = "最大" if self.use_error_max else "平均"
plt.ylabel(f"{metric_label}累积偏差[%]")
plt.title(f"按工厂的{metric_label}累积偏差(百分比)")
plt.ylabel(f"{metric_label}????[%]")
plt.title(f"????{metric_label}?????????")
plt.tight_layout()
for idx, row in factory_df.reset_index(drop=True).iterrows():
val = row["error_pct"]
@@ -966,8 +970,7 @@ class SimulationModel(Model):
factory_plot_path = os.path.join(output_dir, f"error_by_factory_{timestamp}.png")
plt.savefig(factory_plot_path)
plt.close()
# Save histories for inventory, fulfillment pct, unmet backlog
# Save histories for inventory, fulfillment pct, unmet backlog
inv_path = os.path.join(output_dir, f"inventory_history_{timestamp}.csv")
fulfill_path = os.path.join(output_dir, f"fulfill_history_{timestamp}.csv")
unmet_path = os.path.join(output_dir, f"unmet_history_{timestamp}.csv")