簡體   English   中英

難以理解遺傳算法-Java

[英]Trouble understanding Genetic Algorithm - Java

我目前正在使用游戲代碼庫,其中一部分使用遺傳算法來處理某些AI。 這個概念對我來說還很陌生,盡管我已經仔細閱讀並閱讀了它的工作原理,但是我發現很難將正在演化,變異等的東西准確地鏈接到實際代碼上。

這些動作基本上是AI必須移動的每個可能選項。 因此,它正在嘗試發展一種狀態,並找出采取最佳措施。 誰能幫我更清楚地理解它?

private static double GAMMA = 0.90;
private static long BREAK_MS = 35;
private static int SIMULATION_DEPTH = 7;
private static int POPULATION_SIZE = 5;

private static double RECPROB = 0.1;
private double MUT = (1.0 / SIMULATION_DEPTH);
private final int N_ACTIONS;

private ElapsedCpuTimer timer;

private int genome[][][];
private final HashMap<Integer, Types.ACTIONS> action_mapping;
private final HashMap<Types.ACTIONS, Integer> r_action_mapping;
protected Random randomGenerator;

private int numSimulations;

/**
 * Public constructor with state observation and time due.
 *
 * @param stateObs     state observation of the current game.
 * @param elapsedTimer Timer for the controller creation.
 */
public Agent(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {

    randomGenerator = new Random();

    action_mapping = new HashMap<Integer, Types.ACTIONS>();
    r_action_mapping = new HashMap<Types.ACTIONS, Integer>();
    int i = 0;
    for (Types.ACTIONS action : stateObs.getAvailableActions()) {
        action_mapping.put(i, action);
        r_action_mapping.put(action, i);
        i++;
    }

    N_ACTIONS = stateObs.getAvailableActions().size();
    initGenome(stateObs);


}


double microbial_tournament(int[][] actionGenome, StateObservation stateObs, StateHeuristic heuristic) throws TimeoutException {
    int a, b, c, W, L;
    int i;


    a = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
    do {
        b = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
    } while (a == b);

    double score_a = simulate(stateObs, heuristic, actionGenome[a]);
    double score_b = simulate(stateObs, heuristic, actionGenome[b]);

    if (score_a > score_b) {
        W = a;
        L = b;
    } else {
        W = b;
        L = a;
    }

    int LEN = actionGenome[0].length;

    for (i = 0; i < LEN; i++) {
        if (randomGenerator.nextDouble() < RECPROB) {
            actionGenome[L][i] = actionGenome[W][i];
        }
    }


    for (i = 0; i < LEN; i++) {
        if (randomGenerator.nextDouble() < MUT) actionGenome[L][i] = randomGenerator.nextInt(N_ACTIONS);
    }

    return Math.max(score_a, score_b);

}

private void initGenome(StateObservation stateObs) {

    genome = new int[N_ACTIONS][POPULATION_SIZE][SIMULATION_DEPTH];


    // Randomize initial genome
    for (int i = 0; i < genome.length; i++) {
        for (int j = 0; j < genome[i].length; j++) {
            for (int k = 0; k < genome[i][j].length; k++) {
                genome[i][j][k] = randomGenerator.nextInt(N_ACTIONS);
            }
        }
    }
}


private double simulate(StateObservation stateObs, StateHeuristic heuristic, int[] policy) throws TimeoutException {


    //System.out.println("depth" + depth);
    long remaining = timer.remainingTimeMillis();
    if (remaining < BREAK_MS) {
        //System.out.println(remaining);
        throw new TimeoutException("Timeout");
    }


    int depth = 0;
    stateObs = stateObs.copy();
    for (; depth < policy.length; depth++) {
        Types.ACTIONS action = action_mapping.get(policy[depth]);

        stateObs.advance(action);

        if (stateObs.isGameOver()) {
            break;
        }
    }

    numSimulations++;
    double score = Math.pow(GAMMA, depth) * heuristic.evaluateState(stateObs);
    return score;


}

private Types.ACTIONS microbial(StateObservation stateObs, int maxdepth, StateHeuristic heuristic, int iterations) {

    double[] maxScores = new double[stateObs.getAvailableActions().size()];

    for (int i = 0; i < maxScores.length; i++) {
        maxScores[i] = Double.NEGATIVE_INFINITY;
    }


    outerloop:
    for (int i = 0; i < iterations; i++) {
        for (Types.ACTIONS action : stateObs.getAvailableActions()) {


            StateObservation stCopy = stateObs.copy();
            stCopy.advance(action);

            double score = 0;
            try {
                score = microbial_tournament(genome[r_action_mapping.get(action)], stCopy, heuristic) + randomGenerator.nextDouble()*0.00001;
            } catch (TimeoutException e) {
                break outerloop;
            }
            int int_act = this.r_action_mapping.get(action);

            if (score > maxScores[int_act]) {
                maxScores[int_act] = score;
            }


        }
    }

    Types.ACTIONS maxAction = this.action_mapping.get(Utils.argmax(maxScores));


    return maxAction;

}

/**
 * Picks an action. This function is called every game step to request an
 * action from the player.
 *
 * @param stateObs     Observation of the current state.
 * @param elapsedTimer Timer when the action returned is due.
 * @return An action for the current state
 */
public Types.ACTIONS act(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {

    this.timer = elapsedTimer;
    numSimulations = 0;

    Types.ACTIONS lastGoodAction = microbial(stateObs, SIMULATION_DEPTH, new WinScoreHeuristic(stateObs), 100);

    return lastGoodAction;
}


@Override
public void draw(Graphics2D g)
{
    //g.drawString("Num Simulations: " + numSimulations, 10, 20);
}

}

genome是溶液的編碼(基因型),通過simulate將其翻譯成實際的問題空間(表型)。 此外,還返回健身評分作為評估的一部分。 其他方法會初始化或干擾基因型以獲得不同的解決方案。

如果您需要更多的信息,而不是轉儲一大堆詢問“請解釋”的代碼,請提出更多具體問題!

暫無
暫無

聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.

 
粵ICP備18138465號  © 2020-2024 STACKOOM.COM