feat: alpha-beta prunning

feat: minimax algorighm
feat: add capsule score in evaluation
2024-04-09 18:46:56 +08:00 · 2024-04-09 18:02:17 +08:00 · 2024-04-09 15:36:35 +08:00
1 changed files with 62 additions and 8 deletions
--- a/multiAgents.py
+++ b/multiAgents.py
@ -72,12 +72,12 @@ class ReflexAgent(Agent):
        newPos = successorGameState.getPacmanPosition()
        newFood = successorGameState.getFood()
        newGhostStates = successorGameState.getGhostStates()
-        newScaredTimes = [ghostState.scaredTimer for ghostState in newGhostStates]

        "*** YOUR CODE HERE ***"
        ans = successorGameState.getScore()
-        AVOID_GHOST_SCORE = 10
+        AVOID_GHOST_SCORE = 25
        EAT_FOOD_SCORE = 20
+        EAT_CAPSULE_SCORE = 70

        nearest_ghost_distance = 1e9
        for ghostState in newGhostStates:
@ -88,13 +88,17 @@ class ReflexAgent(Agent):
        for foodPos in newFood.asList():
            nearest_food_distance = min(nearest_food_distance, util.manhattanDistance(foodPos, newPos)+1)

+        nearest_capsule_distance = 1e9
+        for capsulePos in successorGameState.getCapsules():
+            nearest_capsule_distance = min(nearest_capsule_distance, util.manhattanDistance(capsulePos, newPos)+1)
+
        ans -= AVOID_GHOST_SCORE * (1/nearest_ghost_distance)
        ans += EAT_FOOD_SCORE * (1/nearest_food_distance)
+        if nearest_capsule_distance != 1e9:
+            ans += EAT_CAPSULE_SCORE * (1/nearest_capsule_distance)
+        else:
+            ans += 500

-
-        
-
-            
        return ans

        return successorGameState.getScore()
@ -158,7 +162,29 @@ class MinimaxAgent(MultiAgentSearchAgent):
        Returns whether or not the game state is a losing state
        """
        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()       
+        return self.result(gameState, 0, self.depth)[1]
+
+    def result(self, state, agentIndex, depth):
+        if depth == 0 or state.isLose() or state.isWin():
+            return self.evaluationFunction(state), None
+        elif agentIndex == 0:               # maximize
+            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
+            max_reward, max_action = -1e9, None
+
+            for action in state.getLegalActions(agentIndex):
+                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth)[0]
+                if reward > max_reward:
+                    max_reward, max_action = reward, action
+            return max_reward, max_action
+        else:                               # minimize
+            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
+            min_reward, min_action = 1e9, None
+
+            for action in state.getLegalActions(agentIndex):
+                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth)[0]
+                if reward < min_reward:
+                    min_reward, min_action = reward, action
+            return min_reward, min_action

 class AlphaBetaAgent(MultiAgentSearchAgent):
    """
@ -170,7 +196,35 @@ class AlphaBetaAgent(MultiAgentSearchAgent):
          Returns the minimax action using self.depth and self.evaluationFunction
        """
        "*** YOUR CODE HERE ***"
-        util.raiseNotDefined()
+        return self.result(gameState, 0, self.depth, -1e9, 1e9)[1]
+
+    def result(self, state, agentIndex, depth, alpha, beta):
+        if depth == 0 or state.isLose() or state.isWin():
+            return self.evaluationFunction(state), None
+        elif agentIndex == 0:               # maximize
+            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
+            max_reward, max_action = -1e9, None
+
+            for action in state.getLegalActions(agentIndex):
+                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth, alpha, beta)[0]
+                if reward > max_reward:
+                    max_reward, max_action = reward, action
+                if reward > beta:
+                    return reward, action
+                alpha = max(alpha, max_reward)
+            return max_reward, max_action
+        else:                               # minimize
+            (nextAgentIndex, nextDepth) = (0, depth-1) if agentIndex == (state.getNumAgents()-1) else (agentIndex+1, depth)
+            min_reward, min_action = 1e9, None
+
+            for action in state.getLegalActions(agentIndex):
+                reward = self.result(state.generateSuccessor(agentIndex, action), nextAgentIndex, nextDepth, alpha, beta)[0]
+                if reward < min_reward:
+                    min_reward, min_action = reward, action
+                if reward < alpha:
+                    return reward, action
+                beta = min(beta, min_reward)
+            return min_reward, min_action

 class ExpectimaxAgent(MultiAgentSearchAgent):
    """
Author	SHA1	Message	Date
snsd0805	3fcd55923b	feat: alpha-beta prunning	2024-04-09 18:46:56 +08:00
snsd0805	e7464ceeb4	feat: minimax algorighm	2024-04-09 18:02:17 +08:00
snsd0805	05d32500b2	feat: add capsule score in evaluation	2024-04-09 15:36:35 +08:00