import numpy as np  # 引入NumPy库，用于高效的数值计算
from pySOT.utils import round_vars  # 引入用于四舍五入的函数
from typing import TYPE_CHECKING  # 引入类型检查工具
if TYPE_CHECKING:
    from policy import Policy

class GeneticAlgorithm:  # 定义一个遗传算法类
    def __init__(self, the_policy: 'Policy'):  # 初始化方法，接收一个 Policy 对象
        self.n_variables = the_policy.dim  # 从 policy 中获取问题的维度
        self.lower_boundary = the_policy.lb  # 获取决策变量的下界
        self.upper_boundary = the_policy.ub  # 获取决策变量的上界
        self.integer_variables = the_policy.int_var  # 获取整数变量的索引

        self.sigma = 0.2  # 设置变异操作的标准差
        self.p_mutation = 1.0 / the_policy.dim  # 设置变异概率
        self.tournament_size = 5  # 设置锦标赛选择的大小
        self.p_cross = 0.9  # 设置交叉概率

        pop_size = the_policy.arr_init_doe_points.shape[0]  # 获取种群大小
        self.lst_value = the_policy.lst_y_init_doe_points  # 初始化每个个体的适应度值

        # 如果种群大小是奇数，生成一个随机个体来确保种群大小是偶数
        if pop_size % 2 == 1:
            arr_random = np.random.rand(1, self.n_variables)  # 生成一个随机的个体
            arr_one_random = self.lower_boundary + arr_random * (self.upper_boundary - self.lower_boundary)  # 将随机个体约束在边界内
            self.lst_value.append(the_policy.eval(arr_one_random[0, :], is_init_points=True))  # 评估该个体的适应度
            self.population = np.vstack((the_policy.arr_init_doe_points, arr_one_random))  # 将该个体加入到种群中
        else:
            self.population = np.copy(the_policy.arr_init_doe_points)  # 直接使用初始种群

        self.n_individuals = self.population.shape[0]  # 获取种群中个体的数量
        assert self.n_individuals == pop_size or self.n_individuals == pop_size + 1, 'Wrong pop size'  # 确保种群大小正确

        # 如果有整数变量，需要进行位置四舍五入
        if len(self.integer_variables) > 0:
            self.population[:, self.integer_variables] = np.round(self.population[:, self.integer_variables])  # 对整数变量四舍五入
            for i in self.integer_variables:
                ind = np.where(self.population[:, i] < self.lower_boundary[i])  # 如果超出了下界，修正为下界
                self.population[ind, i] += 1
                ind = np.where(self.population[:, i] > self.upper_boundary[i])  # 如果超出了上界，修正为上界
                self.population[ind, i] -= 1

        self.ind, self.best_individual, self.best_value = None, None, None  # 初始化最优个体和最优值
        self.pop_next, self.lst_pop_next_is_evaluated = None, None  # 初始化下一代种群和评估标志
        self.update_info()  # 更新最优解信息

    def update_info(self):
        # 更新最优个体和适应度值
        self.ind = np.argmin(self.lst_value)  # 获取适应度最小的个体（假设目标是最小化）
        self.best_individual = np.copy(self.population[self.ind, :])  # 复制最优个体
        self.best_value = self.lst_value[self.ind]  # 记录最优值

        self.pop_next, self.lst_pop_next_is_evaluated = self._generate_next_population()  # 生成下一代种群
        self.lst_value = []  # 清空当前种群的适应度值

    def _generate_next_population(self):
        # 生成下一代种群
        competitors = np.random.randint(0, self.n_individuals, (self.n_individuals, self.tournament_size))  # 随机选择竞赛个体
        ind = np.argmin(np.array(self.lst_value)[competitors], axis=1)  # 选择每轮锦标赛中的最优个体
        winner_indices = np.zeros(self.n_individuals, dtype=int)  # 用于存储胜利个体的索引
        for i in range(self.tournament_size):  # 进行锦标赛选择
            winner_indices[np.where(ind == i)] = competitors[np.where(ind == i), i]

        # 按照锦标赛结果将种群分为父母
        parent1 = self.population[winner_indices[0: self.n_individuals // 2], :]
        parent2 = self.population[winner_indices[self.n_individuals // 2: self.n_individuals], :]

        # 交叉操作：对父母个体进行交叉
        cross = np.where(np.random.rand(self.n_individuals // 2) < self.p_cross)[0]  # 按照概率决定哪些个体进行交叉
        nn = len(cross)  # 计算交叉个体的数量
        alpha = np.random.rand(nn, 1)  # 生成交叉系数

        # 创建新的染色体
        parent1_new = np.multiply(alpha, parent1[cross, :]) + np.multiply(1 - alpha, parent2[cross, :])
        parent2_new = np.multiply(alpha, parent2[cross, :]) + np.multiply(1 - alpha, parent1[cross, :])
        parent1[cross, :] = parent1_new
        parent2[cross, :] = parent2_new
        arr_new_population = np.concatenate((parent1, parent2))  # 合并两个父代得到新的种群

        # 变异操作
        scale_factors = self.sigma * (self.upper_boundary - self.lower_boundary)  # 计算变异的尺度
        perturbation = np.random.randn(self.n_individuals, self.n_variables)  # 生成扰动
        perturbation = np.multiply(perturbation, scale_factors)  # 根据尺度调整扰动
        perturbation = np.multiply(
            perturbation, (np.random.rand(self.n_individuals, self.n_variables) < self.p_mutation)
        )  # 根据变异概率决定哪些位置进行扰动

        arr_new_population += perturbation  # 将扰动添加到新种群
        arr_new_population = np.maximum(np.reshape(self.lower_boundary, (1, self.n_variables)), arr_new_population)  # 确保不超过下界
        arr_new_population = np.minimum(np.reshape(self.upper_boundary, (1, self.n_variables)), arr_new_population)  # 确保不超过上界

        # 如果有整数变量，进行四舍五入
        if len(self.integer_variables) > 0:
            arr_new_population = round_vars(arr_new_population, self.integer_variables, self.lower_boundary,
                                            self.upper_boundary)

        assert arr_new_population.shape[0] == self.n_individuals, 'Wrong arr_new_population shape'  # 确保新种群的大小正确
        return arr_new_population, [False] * self.n_individuals  # 返回新种群和评估标志（都设为未评估）

    def select_next_point(self):
        # 选择下一个要评估的个体
        for idx_ind, is_evaluated in enumerate(self.lst_pop_next_is_evaluated):
            if not is_evaluated:  # 如果该个体没有被评估
                return self.pop_next[idx_ind, :]  # 返回该个体

    def receive_sim_value(self, the_value):
        # 接收评估结果，并更新适应度信息
        self.lst_value.append(the_value)  # 将评估值添加到适应度列表
        idx_ind = 0
        for idx_ind, is_evaluated in enumerate(self.lst_pop_next_is_evaluated):
            if not is_evaluated:  # 找到未评估的个体
                self.lst_pop_next_is_evaluated[idx_ind] = True  # 标记该个体为已评估
                break
        if idx_ind == len(self.lst_pop_next_is_evaluated) - 1:
            assert idx_ind == self.n_individuals - 1, 'Wrong index'  # 确保所有个体都已评估
            self.update_info()  # 更新最优解信息