feat: alpha-beta prunning

feat: minimax algorighm
feat: add capsule score in evaluation
2024-04-09 18:46:56 +08:00 · 2024-04-09 18:02:17 +08:00 · 2024-04-09 15:36:35 +08:00
1 changed files with 62 additions and 8 deletions
--- a/multiAgents.py
+++ b/multiAgents.py
@ -72,12 +72,12 @@ class ReflexAgent(Agent):
        newPos = successorGameState.getPacmanPosition()
        newFood = successorGameState.getFood()
        newGhostStates = successorGameState.getGhostStates()
        newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]
        "*** YOUR CODE HERE ***"
        ans = successorGameState.getScore()
-        AVOID_GHOST_SCORE = 10
+        AVOID_GHOST_SCORE = 25
        EAT_FOOD_SCORE = 20
        EAT_CAPSULE_SCORE = 70
        nearest_ghost_distance = 1e9
        for ghostState in newGhostStates:
@ -88,12 +88,16 @@ class ReflexAgent(Agent):
        for foodPos in newFood.asList():
            nearest_food_distance = min(nearest_food_distance, util.manhattanDistance(foodPos, newPos)+1)
        nearest_capsule_distance = 1e9
        for capsulePos in successorGameState.getCapsules():
            nearest_capsule_distance = min(nearest_capsule_distance, util.manhattanDistance(capsulePos, newPos)+1)
        ans -= AVOID_GHOST_SCORE * (1/nearest_ghost_distance)
        ans += EAT_FOOD_SCORE * (1/nearest_food_distance)
-
+        if nearest_capsule_distance != 1e9:
-
+            ans += EAT_CAPSULE_SCORE * (1/nearest_capsule_distance)
-        
+        else:
-
+            ans += 500
        return ans
@ -158,7 +162,29 @@ class MinimaxAgent(MultiAgentSearchAgent):
        Returns whether or not the game state is a losing state
        """
        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()       
+        return self.result(gameState, 0, self.depth)[1]
    def result(self, state, agentIndex, depth):
        if depth == 0 or state.isLose() or state.isWin():
            return self.evaluationFunction(state), None
        elif agentIndex == 0:               # maximize
            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
            max_reward, max_action = -1e9, None
            for action in state.getLegalActions(agentIndex):
                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth)[0]
                if reward > max_reward:
                    max_reward, max_action = reward, action
            return max_reward, max_action
        else:                               # minimize
            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
            min_reward, min_action = 1e9, None
            for action in state.getLegalActions(agentIndex):
                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth)[0]
                if reward < min_reward:
                    min_reward, min_action = reward, action
            return min_reward, min_action
 class AlphaBetaAgent(MultiAgentSearchAgent):
    """
@ -170,7 +196,35 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
          Returns the minimax action using self.depth and self.evaluationFunction
        """
        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        return self.result(gameState, 0, self.depth, -1e9, 1e9)[1]
    def result(self, state, agentIndex, depth, alpha, beta):
        if depth == 0 or state.isLose() or state.isWin():
            return self.evaluationFunction(state), None
        elif agentIndex == 0:               # maximize
            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
            max_reward, max_action = -1e9, None
            for action in state.getLegalActions(agentIndex):
                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth, alpha, beta)[0]
                if reward > max_reward:
                    max_reward, max_action = reward, action
                if reward > beta:
                    return reward, action
                alpha = max(alpha, max_reward)
            return max_reward, max_action
        else:                               # minimize
            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
            min_reward, min_action = 1e9, None
            for action in state.getLegalActions(agentIndex):
                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth, alpha, beta)[0]
                if reward < min_reward:
                    min_reward, min_action = reward, action
                if reward < alpha:
                    return reward, action
                beta = min(beta, min_reward)
            return min_reward, min_action
 class ExpectimaxAgent(MultiAgentSearchAgent):
    """
Author	SHA1	Message	Date
snsd0805	3fcd55923b	feat: alpha-beta prunning	2024-04-09 18:46:56 +08:00
snsd0805	e7464ceeb4	feat: minimax algorighm	2024-04-09 18:02:17 +08:00
snsd0805	05d32500b2	feat: add capsule score in evaluation	2024-04-09 15:36:35 +08:00