error不一样

2025-12-01 18:51:07 +08:00
parent b86290c331
commit d86e16e3a3
6 changed files with 278 additions and 213 deletions
--- a/simulation_model.py
+++ b/simulation_model.py
@@ -75,6 +75,7 @@ class SimulationModel(Model):
        self.ramp_ranges = self._load_product_month_efficiency()
        self.factory_mapping = self._load_factory_mapping()
        self.factory_factors = {}
+        self.use_error_max = self._to_bool(cfg["is_error_max"])
        merged_factors = {}
        merged_factors.update({k: v for k, v in kwargs.items() if k.startswith("factor_")})
        if factory_factors:
@@ -209,6 +210,58 @@ class SimulationModel(Model):
        self.running = True
        self.datacollector.collect(self)

+    def _compute_factory_error_stats(
+        self,
+        factory_pivot: pd.DataFrame,
+        benchmark_sorted: pd.DataFrame,
+        total_col: str = "总计",
+        use_max: bool = True,
+    ) -> dict:
+        """
+        Compute error metrics based on cumulative monthly deviation.
+        For each factory:
+          - cumulative monthly production vs benchmark
+          - deviation ratio per month (can be negative)
+          - factory error = max or mean absolute deviation ratio across 12 months
+        Returns:
+          {
+            "mean_abs_error": float,
+            "factory_max_ratio": {factory_name: value},
+            "factory_cum_ratio": {factory_name: pd.Series of length 12 (ratios)}
+          }
+        Ratios are unitless (e.g., 0.02 == 2%).
+        """
+        errors = []
+        factory_max_ratio = {}
+        factory_cum_ratio = {}
+        month_cols = [f"{m}月" for m in range(1, 13)]
+        bench_name_col = benchmark_sorted.columns[0]
+
+        for _, prow in factory_pivot.iterrows():
+            fname = prow["工厂名称"]
+            brow = benchmark_sorted[benchmark_sorted[bench_name_col] == fname]
+            if brow.empty:
+                continue
+            prod_months = pd.Series([float(prow[col]) for col in month_cols])
+            bench_months = pd.Series([float(brow.iloc[0][col]) for col in month_cols])
+            prod_cum = prod_months.cumsum()
+            bench_cum = bench_months.cumsum()
+            actual_total = float(brow.iloc[0][total_col]) if total_col in brow.columns else float(bench_cum.iloc[-1])
+            if actual_total <= 0:
+                continue
+            pct_ratio = (prod_cum - bench_cum) / actual_total  # signed ratio
+            agg_val = pct_ratio.abs().max() if use_max else pct_ratio.abs().mean()
+            errors.append(float(agg_val))
+            factory_max_ratio[fname] = float(agg_val)
+            factory_cum_ratio[fname] = pct_ratio
+
+        mean_abs_error = float("inf") if not errors else sum(errors) / len(errors)
+        return {
+            "mean_abs_error": mean_abs_error,
+            "factory_max_ratio": factory_max_ratio,
+            "factory_cum_ratio": factory_cum_ratio,
+        }
+
    def _get_output_timestamp(self) -> str:
        if not hasattr(self, "_output_timestamp"):
            self._output_timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
@@ -840,28 +893,12 @@ class SimulationModel(Model):
        month_cols = [f"{m}月" for m in range(1, 13)]
        if total_col not in benchmark_sorted.columns:
            benchmark_sorted[total_col] = benchmark_sorted[month_cols].sum(axis=1)
-        errors = []
-        bench_name_col = benchmark_sorted.columns[0]
-        for _, prow in factory_pivot.iterrows():
-            fname = prow["工厂名称"]
-            brow = benchmark_sorted[benchmark_sorted[bench_name_col] == fname]
-            if brow.empty:
-                continue
-            prod_months = pd.Series([float(prow[f"{m}月"]) for m in range(1, 13)])
-            bench_months = pd.Series([float(brow.iloc[0][f"{m}月"]) for m in range(1, 13)])
-            prod_cum = prod_months.cumsum()
-            bench_cum = bench_months.cumsum()
-            actual_total = float(brow.iloc[0][total_col]) if total_col in brow.columns else float(bench_cum.iloc[-1])
-            if actual_total <= 0:
-                continue
-            diff = prod_cum - bench_cum
-            pct = diff.abs() / actual_total
-            err = pct.mean()
-            errors.append(float(err))
-        if not errors:
+        stats = self._compute_factory_error_stats(factory_pivot, benchmark_sorted, total_col=total_col, use_max=self.use_error_max)
+        cum_pct_by_factory = {k: v * 100.0 for k, v in stats["factory_cum_ratio"].items()}  # to %
+        if stats["mean_abs_error"] == float("inf"):
            self.mean_abs_error = float("inf")
            return
-        self.mean_abs_error = sum(errors) / len(errors)
+        self.mean_abs_error = stats["mean_abs_error"]

        if not write_files:
            return
@@ -872,65 +909,63 @@ class SimulationModel(Model):
        factory_report_path = os.path.join(output_dir, f"factory_report_{timestamp}.csv")
        factory_pivot.to_csv(factory_report_path, index=False, encoding="utf-8-sig")

-        error_df = pd.DataFrame(
-            {
-                "工厂名称": factory_pivot["工厂名称"].iloc[:min_len],
-                "仿真总计": prod_total,
-                "基准总计": bench_total,
-                "误差（比例）": rel_errors.round(6),
-                "误差绝对值（比例）": abs_rel_errors.round(6),
-            }
-        )
+        # Error summary based on cumulative deviation percentage (max abs per factory)
+        err_rows = []
+        for fname, pct_series in cum_pct_by_factory.items():
+            max_abs_pct = pct_series.abs().max() if self.use_error_max else pct_series.abs().mean()
+            err_rows.append({"工厂名称": fname, "最大累积偏差[%]" if self.use_error_max else "平均累积偏差[%]": float(max_abs_pct)})
+        error_df = pd.DataFrame(err_rows)
        error_path = os.path.join(output_dir, f"factory_error_{timestamp}.csv")
        error_df.to_csv(error_path, index=False, encoding="utf-8-sig")

-        # Error bar charts by month and by factory (mean error)
-        # Align monthly errors
-        prod_months = factory_pivot[[f"{m}月" for m in range(1, 13)]].iloc[:min_len].astype(float)
-        bench_months = benchmark_sorted[[f"{m}月" for m in range(1, 13)]].iloc[:min_len].astype(float)
-        bench_months_safe = bench_months.replace(0, pd.NA)
-        month_pct_errors = (prod_months - bench_months_safe) / bench_months_safe * 100
-        month_pct_errors = month_pct_errors.fillna(0).infer_objects(copy=False).astype(float)
-        month_error_means = pd.Series(month_pct_errors.mean(axis=0), index=[f"{m}月" for m in range(1, 13)])
+        # Error bar charts by month using cumulative percentage deviations
+        if cum_pct_by_factory:
+            month_labels = [f"{m}月" for m in range(1, 13)]
+            month_pct_df = pd.DataFrame(
+                {fname: pct.reset_index(drop=True) for fname, pct in cum_pct_by_factory.items()}
+            ).transpose()
+            month_abs_means = month_pct_df.abs().mean(axis=0)
+            month_abs_means.index = month_labels[: len(month_abs_means)]

-        plt.figure(figsize=(10, 5))
-        month_error_means.plot(kind="bar")
-        plt.ylabel("平均误差（%）")
-        plt.title("按月份的平均误差（百分比）")
-        plt.tight_layout()
-        month_plot_path = os.path.join(output_dir, f"error_by_month_{timestamp}.png")
-        plt.savefig(month_plot_path)
-        plt.close()
+            plt.figure(figsize=(10, 5))
+            month_abs_means.plot(kind="bar")
+            plt.ylabel("月度累积偏差平均值[%]")
+            plt.title("按月份的累积偏差（取绝对值后平均）")
+            plt.tight_layout()
+            month_plot_path = os.path.join(output_dir, f"error_by_month_{timestamp}.png")
+            plt.savefig(month_plot_path)
+            plt.close()

-        bench_totals_safe = bench_total.replace(0, pd.NA)
-        factory_pct_errors = (prod_total - bench_totals_safe) / bench_totals_safe * 100
-        factory_pct_errors = factory_pct_errors.fillna(0).infer_objects(copy=False).astype(float)
-        factory_names = factory_pivot["工厂名称"].iloc[:min_len].reset_index(drop=True)
-        factory_df = pd.DataFrame({"name": factory_names, "error_pct": factory_pct_errors}).sort_values(
-            by="error_pct"
-        )
-        self.factory_error_df = factory_df.reset_index(drop=True)
-        plt.figure(figsize=(12, 6))
-        ax = factory_df["error_pct"].reset_index(drop=True).plot(kind="bar")
-        plt.ylabel("误差（%）")
-        plt.title("按工厂的误差（总计，百分比）")
-        plt.tight_layout()
-        # Annotate large absolute errors
-        for idx, row in factory_df.reset_index(drop=True).iterrows():
-            val = row["error_pct"]
-            if abs(val) >= 50:  # threshold
-                ax.text(
-                    idx,
-                    val,
-                    row["name"],
-                    rotation=90,
-                    va="bottom" if val >= 0 else "top",
-                    ha="center",
-                    fontsize=8,
-                )
-        factory_plot_path = os.path.join(output_dir, f"error_by_factory_{timestamp}.png")
-        plt.savefig(factory_plot_path)
-        plt.close()
+            factory_df = pd.DataFrame(
+                {
+                    "name": list(cum_pct_by_factory.keys()),
+                    "error_pct": [
+                        (pct.abs().max() if self.use_error_max else pct.abs().mean()) for pct in cum_pct_by_factory.values()
+                    ],
+                }
+            ).sort_values(by="error_pct")
+            self.factory_error_df = factory_df.reset_index(drop=True)
+            plt.figure(figsize=(12, 6))
+            ax = factory_df["error_pct"].reset_index(drop=True).plot(kind="bar")
+            metric_label = "最大" if self.use_error_max else "平均"
+            plt.ylabel(f"{metric_label}累积偏差[%]")
+            plt.title(f"按工厂的{metric_label}累积偏差（百分比）")
+            plt.tight_layout()
+            for idx, row in factory_df.reset_index(drop=True).iterrows():
+                val = row["error_pct"]
+                if abs(val) >= 50:
+                    ax.text(
+                        idx,
+                        val,
+                        row["name"],
+                        rotation=90,
+                        va="bottom" if val >= 0 else "top",
+                        ha="center",
+                        fontsize=8,
+                    )
+            factory_plot_path = os.path.join(output_dir, f"error_by_factory_{timestamp}.png")
+            plt.savefig(factory_plot_path)
+            plt.close()

        # Save histories for inventory, fulfillment pct, unmet backlog
        inv_path = os.path.join(output_dir, f"inventory_history_{timestamp}.csv")