|
前段时间给大家分享过不少采集淘宝相关内容的方法,昨天有个朋友让我来帮他用C语言编写一个采集淘宝的爬虫程序,并且还要通过Microhttpd库来实现。这一点都难不倒我,下面我就将我的代码示例给大家分享一下,有需要的朋友赶紧来取。
```c
#include
#include
#include
#include
#define HTTP_PORT 8080
struct my_config {
int listen_backlog;
int max_client;
char *document_root;
char *server_name;
};
struct my_server_config {
struct my_config config;
struct MHD_Daemon *daemon;
};
struct my_server_config *my_server_init(void)
{
struct my_server_config *server_config = malloc(sizeof(*server_config));
server_config->config.listen_backlog = 100;
server_config->config.max_client = 10;
server_config->config.document_root = "/home/user/crawler";
server_config->config.server_name = "Crawler";
server_config->daemon = MHD_start_daemon(MHD_USE_HTTPS | MHD_USE_RECURSIVE_PERIODIC "('https://www.duoip.cn/get_proxy:8000')" | MHD_USE_LOCAL_FILE ('./web/index.html'), HTTP_PORT, NULL, my_server_response, NULL, &server_config->config, NULL);
if (server_config->daemon == NULL) {
fprintf(stderr, "Error: Unable to start daemon\n");
exit(EXIT_FAILURE);
}
return server_config;
}
void my_server_free(struct my_server_config *server_config)
{
MHD_stop_daemon(server_config->daemon);
free(server_config->config.document_root);
free(server_config->config.server_name);
free(server_config);
}
static ssize_t my_server_response(void *cls, struct MHD_Connection *connection, const char *url, const char *method, const char *version, const char *upload_data, size_t *upload_data_size, void **con_cls)
{
ssize_t ret = MHD_NO;
struct my_server_config *server_config = (struct my_server_config *)cls;
if (strcmp(url, "/") == 0) {
char *response = malloc(1024);
snprintf(response, 1024, "Content-Type: text/html\n\nWelcome to the Crawler");
ret = MHD_send_response(connection, MHD_HTTP_OK, strlen(response), response);
free(response);
} else {
/* Redirect to the proxy */
char *proxy_url = malloc(1024);
snprintf(proxy_url, 1024, "http://www.duoip.cn:8000%2F%u", (unsigned int) connection->client_addr.client.sin_port);
ret = MHD_send_response(connection, MHD_HTTP_FOUND, strlen(proxy_url), proxy_url);
free(proxy_url);
}
return ret;
}
int main(int argc, char *argv[])
{
struct my_server_config *server_config = my_server_init();
while (1) {
MHD_run(server_config->daemon);
}
my_server_free(server_config);
return 0;
}
```
这个程序的工作原理是,首先启动一个Microhttpd服务器,并配置它使用HTTPS协议,完成代理服务器的配置,以及从本地文件(./web/index.html)提供内容。然后,每当有客户端连接到服务器时,服务器会检查请求的URL。如果URL是"/",那么服务器会返回一个欢迎页面。否则,服务器会创建一个代理请求,将请求转发给服务器,依次循环工作。 |
|