强化学习训练井字棋的代码
时间: 2023-10-01 22:02:10 浏览: 88
以下是一个简单的强化学习训练井字棋的Python代码示例:
```python
import numpy as np
# 初始化游戏棋盘
board = np.zeros((3, 3))
# 定义玩家和AI的棋子类型
player_piece = 1
ai_piece = -1
# 定义游戏结束的条件
def game_over(board):
# 判断是否有一方连成三个棋子
for i in range(3):
if sum(board[i, :]) == 3 or sum(board[:, i]) == 3:
return True
if board[0, 0] + board[1, 1] + board[2, 2] == 3 or board[0, 2] + board[1, 1] + board[2, 0] == 3:
return True
# 判断是否已经下满了所有的格子
if np.all((board == 0) == False):
return True
return False
# 定义玩家的落子函数
def player_move(board):
while True:
row = int(input("请输入要下的行数(1~3):")) - 1
col = int(input("请输入要下的列数(1~3):")) - 1
if row >= 0 and row < 3 and col >= 0 and col < 3 and board[row, col] == 0:
board[row, col] = player_piece
break
# 定义AI的落子函数
def ai_move(board):
best_score = -np.inf
best_move = None
for i in range(3):
for j in range(3):
if board[i, j] == 0:
board[i, j] = ai_piece
score = minimax(board, 0, False)
board[i, j] = 0
if score > best_score:
best_score = score
best_move = (i, j)
board[best_move[0], best_move[1]] = ai_piece
# 定义minimax算法
def minimax(board, depth, is_maximizing):
if game_over(board):
if is_maximizing:
return -1
else:
return 1
if is_maximizing:
best_score = -np.inf
for i in range(3):
for j in range(3):
if board[i, j] == 0:
board[i, j] = ai_piece
score = minimax(board, depth + 1, False)
board[i, j] = 0
best_score = max(best_score, score)
return best_score
else:
best_score = np.inf
for i in range(3):
for j in range(3):
if board[i, j] == 0:
board[i, j] = player_piece
score = minimax(board, depth + 1, True)
board[i, j] = 0
best_score = min(best_score, score)
return best_score
# 开始游戏
while not game_over(board):
player_move(board)
if game_over(board):
break
ai_move(board)
print(board)
# 判断胜负
if sum(board[0, :]) == 3 or sum(board[1, :]) == 3 or sum(board[2, :]) == 3:
print("你赢了!")
elif sum(board[:, 0]) == 3 or sum(board[:, 1]) == 3 or sum(board[:, 2]) == 3:
print("你赢了!")
elif board[0, 0] + board[1, 1] + board[2, 2] == 3 or board[0, 2] + board[1, 1] + board[2, 0] == 3:
print("你赢了!")
elif sum(board[0, :]) == -3 or sum(board[1, :]) == -3 or sum(board[2, :]) == -3:
print("你输了!")
elif sum(board[:, 0]) == -3 or sum(board[:, 1]) == -3 or sum(board[:, 2]) == -3:
print("你输了!")
elif board[0, 0] + board[1, 1] + board[2, 2] == -3 or board[0, 2] + board[1, 1] + board[2, 0] == -3:
print("你输了!")
else:
print("平局!")
```
这个代码使用了minimax算法来实现AI的决策。minimax算法是一种递归算法,会考虑所有可能的落子情况,并计算每种情况下的得分,然后选择得分最优的落子。由于井字棋的状态空间相对较小,因此使用minimax算法是可行的。在实际的应用中,可能需要使用更复杂的算法来处理更大的状态空间。
阅读全文