亚马逊棋蒙特卡洛代码c++
时间: 2023-08-28 10:04:44 浏览: 213
以下是一个基于蒙特卡洛树搜索算法的亚马逊棋 AI 代码,使用 C++ 实现:
```c++
#include <bits/stdc++.h>
using namespace std;
const int N = 10, M = 100000;
const int dir[8][2] = {{-1, -1}, {-1, 0}, {-1, 1}, {0, -1}, {0, 1}, {1, -1}, {1, 0}, {1, 1}};
struct Move {
int x, y, nx, ny, cnt;
};
int n, m, t, a[N][N], f[N][N][N][N], vis[N][N], st[N][N];
int q[N * N][2], hh, tt, dx[M], dy[M], cnt[M];
int dist(int x, int y, int nx, int ny) {
if (x == nx && y == ny) return 0;
if (x == nx || y == ny || abs(x - nx) == abs(y - ny)) return max(abs(x - nx), abs(y - ny));
return 2 * max(abs(x - nx), abs(y - ny));
}
int get(int x, int y) { return x * m + y; }
vector<Move> get_moves(int player) {
vector<Move> res;
memset(vis, 0, sizeof vis);
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j)
if (a[i][j] == player) {
for (int k = 0; k < 8; ++k) {
int nx = i + dir[k][0], ny = j + dir[k][1];
if (nx < 0 || nx >= n || ny < 0 || ny >= m || a[nx][ny]) continue;
memset(st, 0, sizeof st);
int hh = 0, tt = -1;
q[++tt][0] = i, q[tt][1] = j, st[i][j] = 1;
while (hh <= tt) {
int x = q[hh][0], y = q[hh++][1];
for (int l = 0; l < 8; ++l) {
int nx = x + dir[l][0], ny = y + dir[l][1];
if (nx < 0 || nx >= n || ny < 0 || ny >= m || st[nx][ny]) continue;
int d = dist(i, j, nx, ny);
if (d >= t) continue;
if (a[nx][ny] == 3 - player) {
tt = -1;
break;
}
q[++tt][0] = nx, q[tt][1] = ny, st[nx][ny] = 1;
}
}
if (tt == -1) continue;
for (int l = 0; l <= tt; ++l)
if (!vis[q[l][0]][q[l][1]]) {
vis[q[l][0]][q[l][1]] = 1;
res.push_back({i, j, q[l][0], q[l][1], tt + 1});
}
}
}
return res;
}
int dfs(int player, int depth) {
if (depth == 0) return 0;
auto moves = get_moves(player);
int res = -1;
for (auto move : moves) {
int x = move.x, y = move.y, nx = move.nx, ny = move.ny, cnt = move.cnt;
a[nx][ny] = player, a[x][y] = 0;
int t = dfs(3 - player, depth - 1);
a[x][y] = player, a[nx][ny] = 0;
if (t == -1) return -1;
res = max(res, cnt - t);
}
return res;
}
void init() {
memset(f, -1, sizeof f);
queue<int> q;
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j)
if (a[i][j] != 0) {
f[i][j][i][j] = 0;
q.push(get(i, j));
}
while (q.size()) {
int t = q.front();
q.pop();
int x = t / m, y = t % m;
for (int k = 0; k < 8; ++k) {
int nx = x + dir[k][0], ny = y + dir[k][1];
if (nx < 0 || nx >= n || ny < 0 || ny >= m) continue;
if (f[x][y][nx][ny] == -1) {
f[x][y][nx][ny] = f[x][y][x][y] + 1;
q.push(get(nx, ny));
}
}
}
}
int eval() {
vector<int> p1, p2;
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j)
if (a[i][j] == 1)
p1.push_back(i * m + j);
else if (a[i][j] == 2)
p2.push_back(i * m + j);
int res = 0;
for (int i = 0; i < p1.size(); ++i)
for (int j = 0; j < p2.size(); ++j)
res += f[p1[i] / m][p1[i] % m][p2[j] / m][p2[j] % m];
return res;
}
int dfs2(int player, int depth, int lf, int rf) {
if (depth == 0) return eval();
auto moves = get_moves(player);
if (moves.empty()) return lf - rf;
int res = -1e9;
vector<int> cnts;
for (auto move : moves) {
int x = move.x, y = move.y, nx = move.nx, ny = move.ny, cnt = move.cnt;
a[nx][ny] = player, a[x][y] = 0;
cnts.push_back(cnt);
res = max(res, -dfs2(3 - player, depth - 1, -rf, -lf));
a[x][y] = player, a[nx][ny] = 0;
lf = max(lf, res - cnt);
if (lf >= rf) break;
}
return res;
}
void MCTS(int player, int depth) {
auto moves = get_moves(player);
if (moves.empty()) return;
int ucnt = moves.size();
int sum = 0;
for (int i = 0; i < ucnt; ++i) {
int x = moves[i].x, y = moves[i].y, nx = moves[i].nx, ny = moves[i].ny, cnt = moves[i].cnt;
a[nx][ny] = player, a[x][y] = 0;
int t = dfs(3 - player, depth - 1);
a[nx][ny] = 0, a[x][y] = player;
if (t != -1) cnts[i] += cnt - t, sum += cnt - t;
}
double mx = -1e9;
int idx = -1;
for (int i = 0; i < ucnt; ++i) {
double val = 1.0 * cnts[i] / sum + sqrt(log(cnt[i]) / cnts[i]);
if (val > mx) mx = val, idx = i;
}
int x = moves[idx].x, y = moves[idx].y, nx = moves[idx].nx, ny = moves[idx].ny;
a[nx][ny] = player, a[x][y] = 0;
}
int main() {
cin >> n >> m >> t;
for (int i = 0; i < n; ++i)
for (int j = 0; j < m; ++j) cin >> a[i][j];
init();
int depth = 0;
while (1) {
auto moves = get_moves(1);
if (moves.empty()) {
puts("Black loses");
break;
}
if (depth >= 3) {
memset(cnt, 0, sizeof cnt);
for (int i = 0; i < M; ++i) cnt[i] = 1;
for (int i = 0; i < 100; ++i) {
int t = dfs2(1, 3, -1e9, 1e9);
if (t > 0) break;
int idx = rand() % moves.size();
int x = moves[idx].x, y = moves[idx].y, nx = moves[idx].nx, ny = moves[idx].ny;
a[nx][ny] = 1, a[x][y] = 0;
}
} else {
for (int i = 0; i < 10000; ++i) MCTS(1, 5);
int mx = -1e9, idx = -1;
for (int i = 0; i < moves.size(); ++i)
if (cnts[i] > mx) mx = cnts[i], idx = i;
int x = moves[idx].x, y = moves[idx].y, nx = moves[idx].nx, ny = moves[idx].ny;
a[nx][ny] = 1, a[x][y] = 0;
}
++depth;
auto moves2 = get_moves(2);
if (moves2.empty()) {
puts("White loses");
break;
}
int idx = rand() % moves2.size();
int x = moves2[idx].x, y = moves2[idx].y, nx = moves2[idx].nx, ny = moves2[idx].ny;
a[nx][ny] = 2, a[x][y] = 0;
}
return 0;
}
```
该代码实现了亚马逊棋的基本算法,包括:
- 蒙特卡洛树搜索算法;
- 最短路算法;
- 评估函数。
其中,`get_moves` 函数用于获取当前玩家所有合法的移动方式;`dfs` 函数用于在搜索树上搜索指定深度的节点,并返回该节点的得分(如果该节点无法到达终局状态,则返回 -1);`init` 函数用于预处理出最短路;`eval` 函数用于评估当前局面的得分;`dfs2` 函数用于在搜索树上搜索指定深度的节点,并返回该节点的得分;`MCTS` 函数用于执行一次蒙特卡洛树搜索,并更新访问次数和得分。
阅读全文