帮我用c语言链表写一个可以读入多行文本的KWIC索引系统
时间: 2023-04-08 08:02:14 浏览: 80
好的,我可以回答这个问题。KWIC索引系统是一种文本索引技术,可以将文本中的关键词按照一定的规则排列,方便用户查找。以下是一个基于链表的C语言实现:
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <ctype.h>
#define MAX_LINE_LEN 1024
#define MAX_WORD_LEN 128
#define MAX_CONTEXT_LEN 40
typedef struct node {
char *word;
struct node *next;
} Node;
void insert(Node **head, char *word) {
Node *new_node = (Node *)malloc(sizeof(Node));
new_node->word = word;
new_node->next = NULL;
if (*head == NULL) {
*head = new_node;
} else {
Node *curr = *head;
while (curr->next != NULL) {
curr = curr->next;
}
curr->next = new_node;
}
}
void free_list(Node *head) {
while (head != NULL) {
Node *temp = head;
head = head->next;
free(temp->word);
free(temp);
}
}
void to_lower(char *str) {
for (int i = 0; str[i]; i++) {
str[i] = tolower(str[i]);
}
}
void print_kwic(char *line, int index, int context_len) {
char *words[MAX_WORD_LEN];
int word_count = 0;
// Split line into words
char *word = strtok(line, " \t\n");
while (word != NULL) {
words[word_count++] = word;
word = strtok(NULL, " \t\n");
}
// Print KWIC for each word
for (int i = 0; i < word_count; i++) {
// Shift words to the left
for (int j = i; j < word_count + i; j++) {
int k = j % word_count;
if (j > i) {
printf(" ");
}
printf("%s", words[k]);
}
// Print context
printf("\t");
for (int j = i - context_len; j <= i + context_len; j++) {
int k = (j + word_count) % word_count;
if (j == i) {
printf("[");
}
printf("%s", words[k]);
if (j == i) {
printf("]");
}
if (j < i + context_len) {
printf(" ");
}
}
printf("\n");
}
}
int main() {
char line[MAX_LINE_LEN];
Node *head = NULL;
// Read lines from stdin and insert into linked list
while (fgets(line, MAX_LINE_LEN, stdin) != NULL) {
char *word = strdup(line);
to_lower(word);
insert(&head, word);
}
// Print KWIC for each line
int index = 0;
int context_len = MAX_CONTEXT_LEN / 2;
Node *curr = head;
while (curr != NULL) {
printf("%d:\t", index++);
print_kwic(curr->word, index, context_len);
curr = curr->next;
}
// Free memory
free_list(head);
return 0;
}
```
这个程序可以从标准输入读入多行文本,然后对每一行进行KWIC索引,并输出到标准输出。KWIC索引的上下文长度可以通过修改`MAX_CONTEXT_LEN`宏定义来调整。
阅读全文