用C写一个图上作业法构造与DFA等价正规表达式的程序实现
时间: 2024-05-16 20:17:07 浏览: 33
以下是一个用C语言实现的图上作业法构造与DFA等价正规表达式的程序:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#define MAX 50 // 定义最大状态数
#define MAX_LEN 1000 // 定义最大正则表达式长度
int n, m, p, q; // n: 状态数,m: 字符集大小,p: 起始状态,q: 终止状态
char re[MAX][MAX_LEN]; // 存储每个状态的正则表达式
char sigma[MAX]; // 存储字符集
int trans[MAX][MAX][MAX]; // 存储状态转移
int priority[MAX][MAX]; // 存储正则表达式优先级
// 比较两个正则表达式的优先级
int cmp(char a, char b) {
if (a == '|') {
return (b != '|');
} else if (a == '.') {
return (b != '|' && b != '.');
} else {
return 1;
}
}
// 将正则表达式转化为后缀表达式
void to_postfix(char *re) {
int len = strlen(re);
char stack[MAX];
int top = -1;
int postfix[MAX_LEN];
int p = 0;
for (int i = 0; i < len; i++) {
if (re[i] >= 'a' && re[i] <= 'z') {
postfix[p++] = re[i];
} else {
if (top == -1 || re[i] == '(' || cmp(re[i], stack[top])) {
stack[++top] = re[i];
} else {
while (top != -1 && !cmp(re[i], stack[top])) {
postfix[p++] = stack[top--];
}
stack[++top] = re[i];
}
}
}
while (top != -1) {
postfix[p++] = stack[top--];
}
postfix[p] = '\0';
strcpy(re, (char *) postfix);
}
// 根据字符集和正则表达式构造DFA
void build_dfa() {
int state[MAX] = {0};
int state_cnt = 0;
for (int i = 0; i < n; i++) {
to_postfix(re[i]);
int len = strlen(re[i]);
char stack[MAX];
int top = -1;
for (int j = 0; j < len; j++) {
if (re[i][j] >= 'a' && re[i][j] <= 'z') {
state[++state_cnt] = 0;
for (int k = 0; k < m; k++) {
if (sigma[k] == re[i][j]) {
trans[i][k][state_cnt] = 1;
}
}
stack[++top] = state_cnt;
} else if (re[i][j] == '|') {
int s1 = stack[top--];
int s2 = stack[top--];
state[++state_cnt] = 0;
for (int k = 0; k < m; k++) {
trans[i][k][s1] = trans[i][k][s2] = 0;
trans[i][k][state_cnt] = trans[i][k][s1] || trans[i][k][s2];
}
stack[++top] = state_cnt;
} else if (re[i][j] == '.') {
int s1 = stack[top--];
int s2 = stack[top--];
for (int k = 0; k < m; k++) {
for (int l = 1; l <= state_cnt; l++) {
trans[i][k][l] = 0;
}
for (int l = 1; l <= state_cnt; l++) {
if (trans[i][k][s1] && trans[i][k][l]) {
trans[i][k][l] = 1;
}
if (trans[i][k][s2] && trans[i][k][l]) {
trans[i][k][l] = 1;
}
}
}
stack[++top] = s2;
} else if (re[i][j] == '*') {
int s = stack[top--];
state[++state_cnt] = 0;
for (int k = 0; k < m; k++) {
trans[i][k][s] = trans[i][k][state_cnt] = 0;
trans[i][k][state_cnt] = 1;
}
for (int k = 1; k <= state_cnt; k++) {
if (trans[i][0][s] && trans[i][0][k]) {
trans[i][0][k] = 1;
}
}
stack[++top] = state_cnt;
}
}
int s = stack[top];
priority[i][s] = 1;
}
}
// 计算两个正则表达式的连接
void concat(char *re1, char *re2, char *res) {
strcpy(res, re1);
strcat(res, ".");
strcat(res, re2);
}
// 计算两个正则表达式的合并
void merge(char *re1, char *re2, char *res) {
strcpy(res, "(");
strcat(res, re1);
strcat(res, "|");
strcat(res, re2);
strcat(res, ")");
}
// 计算正则表达式的闭包
void closure(char *re, char *res) {
strcpy(res, "(");
strcat(res, re);
strcat(res, ")*");
}
// 构造状态i到状态j的正则表达式
void get_re(int i, int j, char *res) {
if (i == j) {
strcpy(res, "ε");
return;
}
if (trans[i][0][j]) {
res[0] = sigma[0];
res[1] = '\0';
return;
}
char tmp[MAX_LEN];
strcpy(res, "");
for (int k = 0; k < m; k++) {
for (int l = 0; l < n; l++) {
if (trans[i][k][l] && trans[l][0][j]) {
sprintf(tmp, "%c", sigma[k]);
merge(res, tmp, res);
} else if (trans[i][k][l]) {
char re1[MAX_LEN], re2[MAX_LEN];
get_re(l, j, re1);
sprintf(re2, "%c", sigma[k]);
concat(re2, re1, re1);
merge(res, re1, res);
}
}
}
}
// 构造最终的正则表达式
void construct_re() {
char res[MAX_LEN];
get_re(p, q, res);
strcpy(re[p], res);
}
int main() {
printf("请输入状态数n、字符集大小m、起始状态p、终止状态q(空格隔开):");
scanf("%d %d %d %d", &n, &m, &p, &q);
printf("请输入字符集(长度为m的小写字母串):");
scanf("%s", sigma);
for (int i = 0; i < n; i++) {
printf("请输入状态 %d 的正则表达式:", i);
scanf("%s", re[i]);
}
build_dfa();
construct_re();
printf("最终的正则表达式为:%s\n", re[p]);
return 0;
}
```
该程序的主要思路是,首先根据给定的正则表达式构造DFA,然后再根据DFA递归地求出起始状态到终止状态的正则表达式。具体实现细节详见代码注释。