c语言代码根据我的代码
时间: 2023-10-20 13:05:39 浏览: 144
好的,以下是一个基于你之前提供的C++代码实现爬取当当网图书信息的C语言代码示例。请注意,由于C语言的特殊性,代码结构和使用方式与C++会有所不同。
```c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <curl/curl.h>
#include <libxml/parser.h>
#include <libxml/tree.h>
#define URL "http://book.dangdang.com/"
#define USER_AGENT "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3"
#define BOOK_COUNT 20
struct Book {
char name[100];
char author[100];
char price[20];
};
struct MemoryStruct {
char *memory;
size_t size;
};
static size_t WriteMemoryCallback(void *contents, size_t size, size_t nmemb, void *userp) {
size_t realsize = size * nmemb;
struct MemoryStruct *mem = (struct MemoryStruct *) userp;
mem->memory = realloc(mem->memory, mem->size + realsize + 1);
if (mem->memory == NULL) {
printf("Out of memory\n");
return 0;
}
memcpy(&(mem->memory[mem->size]), contents, realsize);
mem->size += realsize;
mem->memory[mem->size] = 0;
return realsize;
}
void parse_xml(char *xml_str, struct Book *book_list) {
xmlDocPtr doc = xmlReadMemory(xml_str, strlen(xml_str), NULL, NULL, 0);
xmlNodePtr root_node = xmlDocGetRootElement(doc);
xmlNodePtr sale_node = root_node->children;
xmlNodePtr new_node = sale_node->next;
xmlNodePtr hot_node = new_node->next;
int i = 0;
for (xmlNodePtr book_node = sale_node->children; book_node != NULL; book_node = book_node->next) {
if (i >= BOOK_COUNT) {
break;
}
xmlNodePtr name_node = book_node->children;
xmlNodePtr author_node = name_node->next;
xmlNodePtr price_node = author_node->next;
strcpy(book_list[i].name, xmlNodeGetContent(name_node));
strcpy(book_list[i].author, xmlNodeGetContent(author_node));
strcpy(book_list[i].price, xmlNodeGetContent(price_node));
i++;
}
for (xmlNodePtr book_node = new_node->children; book_node != NULL; book_node = book_node->next) {
if (i >= BOOK_COUNT) {
break;
}
xmlNodePtr name_node = book_node->children;
xmlNodePtr author_node = name_node->next;
xmlNodePtr price_node = author_node->next;
strcpy(book_list[i].name, xmlNodeGetContent(name_node));
strcpy(book_list[i].author, xmlNodeGetContent(author_node));
strcpy(book_list[i].price, xmlNodeGetContent(price_node));
i++;
}
for (xmlNodePtr book_node = hot_node->children; book_node != NULL; book_node = book_node->next) {
if (i >= BOOK_COUNT) {
break;
}
xmlNodePtr name_node = book_node->children;
xmlNodePtr author_node = name_node->next;
xmlNodePtr price_node = author_node->next;
strcpy(book_list[i].name, xmlNodeGetContent(name_node));
strcpy(book_list[i].author, xmlNodeGetContent(author_node));
strcpy(book_list[i].price, xmlNodeGetContent(price_node));
i++;
}
xmlFreeDoc(doc);
xmlCleanupParser();
}
int main() {
CURL *curl_handle;
CURLcode res;
struct MemoryStruct chunk;
chunk.memory = malloc(1);
chunk.size = 0;
curl_global_init(CURL_GLOBAL_ALL);
curl_handle = curl_easy_init();
curl_easy_setopt(curl_handle, CURLOPT_URL, URL);
curl_easy_setopt(curl_handle, CURLOPT_USERAGENT, USER_AGENT);
curl_easy_setopt(curl_handle, CURLOPT_WRITEFUNCTION, WriteMemoryCallback);
curl_easy_setopt(curl_handle, CURLOPT_WRITEDATA, (void *) &chunk);
res = curl_easy_perform(curl_handle);
if (res != CURLE_OK) {
printf("curl_easy_perform() failed: %s\n", curl_easy_strerror(res));
} else {
struct Book book_list[60];
parse_xml(chunk.memory, book_list);
FILE *fp = fopen("book_info.csv", "w");
fprintf(fp, "书名,作者,价格\n");
for (int i = 0; i < BOOK_COUNT * 3; i++) {
fprintf(fp, "%s,%s,%s\n", book_list[i].name, book_list[i].author, book_list[i].price);
}
fclose(fp);
printf("数据已保存至book_info.csv文件中\n");
}
curl_easy_cleanup(curl_handle);
curl_global_cleanup();
free(chunk.memory);
return 0;
}
```
以上代码使用了C语言的libcurl库和libxml库来实现HTTP请求和XML解析。它首先发送HTTP请求,获取当当网的HTML页面,然后从HTML页面中解析出XML数据,再将XML数据解析为结构体数组,最后将结构体数组保存为.csv文件。请注意,代码中使用了动态内存分配,需要在程序结束时手动释放内存。
阅读全文