[英]Trouble understanding Genetic Algorithm - Java
我目前正在使用游戲代碼庫,其中一部分使用遺傳算法來處理某些AI。 這個概念對我來說還很陌生,盡管我已經仔細閱讀並閱讀了它的工作原理,但是我發現很難將正在演化,變異等的東西准確地鏈接到實際代碼上。
這些動作基本上是AI必須移動的每個可能選項。 因此,它正在嘗試發展一種狀態,並找出采取最佳措施。 誰能幫我更清楚地理解它?
private static double GAMMA = 0.90;
private static long BREAK_MS = 35;
private static int SIMULATION_DEPTH = 7;
private static int POPULATION_SIZE = 5;
private static double RECPROB = 0.1;
private double MUT = (1.0 / SIMULATION_DEPTH);
private final int N_ACTIONS;
private ElapsedCpuTimer timer;
private int genome[][][];
private final HashMap<Integer, Types.ACTIONS> action_mapping;
private final HashMap<Types.ACTIONS, Integer> r_action_mapping;
protected Random randomGenerator;
private int numSimulations;
/**
* Public constructor with state observation and time due.
*
* @param stateObs state observation of the current game.
* @param elapsedTimer Timer for the controller creation.
*/
public Agent(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {
randomGenerator = new Random();
action_mapping = new HashMap<Integer, Types.ACTIONS>();
r_action_mapping = new HashMap<Types.ACTIONS, Integer>();
int i = 0;
for (Types.ACTIONS action : stateObs.getAvailableActions()) {
action_mapping.put(i, action);
r_action_mapping.put(action, i);
i++;
}
N_ACTIONS = stateObs.getAvailableActions().size();
initGenome(stateObs);
}
double microbial_tournament(int[][] actionGenome, StateObservation stateObs, StateHeuristic heuristic) throws TimeoutException {
int a, b, c, W, L;
int i;
a = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
do {
b = (int) ((POPULATION_SIZE - 1) * randomGenerator.nextDouble());
} while (a == b);
double score_a = simulate(stateObs, heuristic, actionGenome[a]);
double score_b = simulate(stateObs, heuristic, actionGenome[b]);
if (score_a > score_b) {
W = a;
L = b;
} else {
W = b;
L = a;
}
int LEN = actionGenome[0].length;
for (i = 0; i < LEN; i++) {
if (randomGenerator.nextDouble() < RECPROB) {
actionGenome[L][i] = actionGenome[W][i];
}
}
for (i = 0; i < LEN; i++) {
if (randomGenerator.nextDouble() < MUT) actionGenome[L][i] = randomGenerator.nextInt(N_ACTIONS);
}
return Math.max(score_a, score_b);
}
private void initGenome(StateObservation stateObs) {
genome = new int[N_ACTIONS][POPULATION_SIZE][SIMULATION_DEPTH];
// Randomize initial genome
for (int i = 0; i < genome.length; i++) {
for (int j = 0; j < genome[i].length; j++) {
for (int k = 0; k < genome[i][j].length; k++) {
genome[i][j][k] = randomGenerator.nextInt(N_ACTIONS);
}
}
}
}
private double simulate(StateObservation stateObs, StateHeuristic heuristic, int[] policy) throws TimeoutException {
//System.out.println("depth" + depth);
long remaining = timer.remainingTimeMillis();
if (remaining < BREAK_MS) {
//System.out.println(remaining);
throw new TimeoutException("Timeout");
}
int depth = 0;
stateObs = stateObs.copy();
for (; depth < policy.length; depth++) {
Types.ACTIONS action = action_mapping.get(policy[depth]);
stateObs.advance(action);
if (stateObs.isGameOver()) {
break;
}
}
numSimulations++;
double score = Math.pow(GAMMA, depth) * heuristic.evaluateState(stateObs);
return score;
}
private Types.ACTIONS microbial(StateObservation stateObs, int maxdepth, StateHeuristic heuristic, int iterations) {
double[] maxScores = new double[stateObs.getAvailableActions().size()];
for (int i = 0; i < maxScores.length; i++) {
maxScores[i] = Double.NEGATIVE_INFINITY;
}
outerloop:
for (int i = 0; i < iterations; i++) {
for (Types.ACTIONS action : stateObs.getAvailableActions()) {
StateObservation stCopy = stateObs.copy();
stCopy.advance(action);
double score = 0;
try {
score = microbial_tournament(genome[r_action_mapping.get(action)], stCopy, heuristic) + randomGenerator.nextDouble()*0.00001;
} catch (TimeoutException e) {
break outerloop;
}
int int_act = this.r_action_mapping.get(action);
if (score > maxScores[int_act]) {
maxScores[int_act] = score;
}
}
}
Types.ACTIONS maxAction = this.action_mapping.get(Utils.argmax(maxScores));
return maxAction;
}
/**
* Picks an action. This function is called every game step to request an
* action from the player.
*
* @param stateObs Observation of the current state.
* @param elapsedTimer Timer when the action returned is due.
* @return An action for the current state
*/
public Types.ACTIONS act(StateObservation stateObs, ElapsedCpuTimer elapsedTimer) {
this.timer = elapsedTimer;
numSimulations = 0;
Types.ACTIONS lastGoodAction = microbial(stateObs, SIMULATION_DEPTH, new WinScoreHeuristic(stateObs), 100);
return lastGoodAction;
}
@Override
public void draw(Graphics2D g)
{
//g.drawString("Num Simulations: " + numSimulations, 10, 20);
}
}
genome
是溶液的編碼(基因型),通過simulate
將其翻譯成實際的問題空間(表型)。 此外,還返回健身評分作為評估的一部分。 其他方法會初始化或干擾基因型以獲得不同的解決方案。
如果您需要更多的信息,而不是轉儲一大堆詢問“請解釋”的代碼,請提出更多具體問題!
聲明:本站的技術帖子網頁,遵循CC BY-SA 4.0協議,如果您需要轉載,請注明本站網址或者原文地址。任何問題請咨詢:yoyou2525@163.com.