c++亚马逊棋蒙特卡洛代码
时间: 2023-08-28 18:04:46 浏览: 108
以下是一个简单的 C++ Monte Carlo Tree Search 实现,用于玩亚马逊棋游戏:
```
#include <bits/stdc++.h>
using namespace std;
const int N = 10, INF = 1e9;
const int dx[] = {-1, -1, -1, 0, 0, 1, 1, 1};
const int dy[] = {-1, 0, 1, -1, 1, -1, 0, 1};
int n, m, turn, tot, cnt;
int a[N][N], f[N * N][N * N], pos[N * N][2], val[N * N], vis[N * N], res[N * N];
double c1 = 1.0, c2 = sqrt(2.0);
mt19937 rnd(time(NULL));
inline bool check(int x, int y) {
return x >= 1 && x <= n && y >= 1 && y <= m && !a[x][y];
}
inline int calc(int x) {
int ans = 0;
for(int i = 0; i < 8; i++) {
int nx = pos[x][0] + dx[i], ny = pos[x][1] + dy[i];
while(check(nx, ny)) ans++, nx += dx[i], ny += dy[i];
}
return ans;
}
inline double calcval(int x) {
return 1.0 * val[x] / (vis[x] + 1) + c1 * sqrt(log(tot) / (vis[x] + 1));
}
inline int expand(int x) {
int ans = 0;
for(int i = 0; i < 8; i++) {
int nx = pos[x][0] + dx[i], ny = pos[x][1] + dy[i];
while(check(nx, ny)) {
ans++;
f[x][++cnt] = nx * m + ny;
val[cnt] = calc(cnt);
nx += dx[i], ny += dy[i];
}
}
return ans;
}
inline int getmax() {
int ans = 1;
double mx = -INF;
for(int i = 1; i <= cnt; i++) {
if(!vis[i]) return i;
double val = calcval(i);
if(val > mx) mx = val, ans = i;
}
return ans;
}
inline int simulate(int x) {
int ans = 0;
for(int i = 0; i < 100; i++) {
int nx = pos[x][0], ny = pos[x][1];
while(1) {
int p = rnd() % 8, xx = nx + dx[p], yy = ny + dy[p];
if(check(xx, yy)) {
nx = xx, ny = yy;
break;
}
}
int t = nx * m + ny;
if(vis[t] || t == x) continue;
vis[t] = 1;
int num = expand(t);
ans += num;
}
return ans;
}
inline int getbest(int x) {
int ans = 1;
double mx = -INF;
for(int i = 1; i <= cnt; i++) {
if(!vis[i]) continue;
double val = 1.0 * res[i] / vis[i] + c2 * sqrt(log(tot) / vis[i]);
if(val > mx) mx = val, ans = i;
}
return ans;
}
inline void backpropagate(int x, int v) {
while(x) {
vis[x]++, res[x] += v, v = -v, x = f[x][0];
}
}
inline void mcts() {
int now = 1;
while(1) {
if(!expand(now)) break;
int idx = getmax();
int num = simulate(idx);
backpropagate(idx, num);
turn = -turn;
tot++;
now = getbest(now);
}
}
int main() {
cin >> n >> m;
for(int i = 1; i <= n; i++) {
for(int j = 1; j <= m; j++) {
cin >> a[i][j];
if(a[i][j] == 1) pos[++cnt][0] = i, pos[cnt][1] = j;
}
}
mcts();
int ans = 1, mx = -INF;
for(int i = 1; i <= cnt; i++) {
if(!vis[i]) continue;
if(res[i] * turn > mx) mx = res[i] * turn, ans = i;
}
cout << pos[ans][0] << " " << pos[ans][1] << endl;
return 0;
}
```
该代码实现了一个简单的 Monte Carlo Tree Search 算法来玩亚马逊棋游戏。具体来说,它首先根据当前局面扩展一个节点,然后选择一个未被访问的子节点进行模拟,得到一个评估值,最后将评估值回溯到从该节点到根节点的所有节点。根据 MCTS 算法中的公式,每个节点的价值由它的胜率和探索程度决定,这里的实现中使用了 UCB1 公式。
阅读全文