【本文谢绝转载,原文来自http://990487026.blog.51cto.com】
chunli魂斗罗~$ cat main.c #include <stdio.h> #include <stdlib.h> #include <unistd.h> #include <curl/curl.h> #include <string.h> #include <ctype.h> #include <iconv.h> #define TMP_FILE "tmp.html" #define HTML_BUFFER_SIZE 1024*800 void split(char **arr, char *str, const char *del) { char *s = strtok(str, del); while (s != NULL) { *arr++ = s; s = strtok(NULL, del); } } /*将str1字符串中第一次出现的str2字符串替换成str3*/ void replaceFirst(char *str1, char *str2, char *str3) { char str4[strlen(str1) + 1]; char *p; strcpy(str4, str1); if ((p = strstr(str1, str2)) != NULL)/*p指向str2在str1中第一次出现的位置*/ { while (str1 != p && str1 != NULL)/*将str1指针移动到p的位置*/ { str1++; } str1[0] = ‘\0‘; /*将str1指针指向的值变成/0,以此来截断str1,舍弃str2及以后的内容,只保留str2以前的内容*/ strcat(str1, str3); /*在str1后拼接上str3,组成新str1*/ strcat(str1, strstr(str4, str2) + strlen(str2)); /*strstr(str4,str2)是指向str2及以后的内容(包括str2),strstr(str4,str2)+strlen(str2)就是将指针向前移动strlen(str2)位,跳过str2*/ } } /*将str1出现的所有的str2都替换为str3*/ void replace(char *str1, char *str2, char *str3) { while (strstr(str1, str2) != NULL) { replaceFirst(str1, str2, str3); } } /*截取src字符串中,从下标为start开始到end-1(end前面)的字符串保存在dest中(下标从0开始)*/ void substring(char *dest, char *src, int start, int end) { int i = start; if (start > strlen(src))return; if (end > strlen(src)) end = strlen(src); while (i < end) { dest[i - start] = src[i]; i++; } dest[i - start] = ‘\0‘; return; } /*返回src中下标为index的字符*/ char charAt(char *src, int index) { char *p = src; int i = 0; if (index < 0 || index > strlen(src)) return 0; while (i < index)i++; return p[i]; } /*返回str2第一次出现在str1中的位置(下表索引),不存在返回-1*/ int indexOf(char *str1, char *str2) { char *p = str1; int i = 0; p = strstr(str1, str2); if (p == NULL) return -1; else { while (str1 != p) { str1++; i++; } } return i; } /*返回str1中最后一次出现str2的位置(下标),不存在返回-1*/ int lastIndexOf(char *str1, char *str2) { char *p = str1; int i = 0, len = strlen(str2); p = strstr(str1, str2); if (p == NULL)return -1; while (p != NULL) { for (; str1 != p; str1++)i++; p = p + len; p = strstr(p, str2); } return i; } /*删除str左边第一个非空白字符前面的空白字符(空格符和横向制表符)*/ void ltrim(char *str) { int i = 0, j, len = strlen(str); while (str[i] != ‘\0‘) { if (str[i] != 32 && str[i] != 9)break; /*32:空格,9:横向制表符*/ i++; } if (i != 0) for (j = 0; j <= len - i; j++) { str[j] = str[j + i]; /*将后面的字符顺势前移,补充删掉的空白位置*/ } } /*删除str最后一个非空白字符后面的所有空白字符(空格符和横向制表符)*/ void rtrim(char *str) { char *p = str; int i = strlen(str) - 1; while (i >= 0) { if (p[i] != 32 && p[i] != 9)break; i--; } str[++i] = ‘\0‘; } /*删除str两端的空白字符*/ void trim(char *str) { ltrim(str); rtrim(str); } //这是libcurl接收数据的回调函数,相当于recv的死循环 //其中stream可以自定义数据类型,这里我传入的是文件保存路径 static size_t write_callback(void *ptr, size_t size, size_t nmemb, void *stream) { int len = size * nmemb; int written = len; FILE *fp = NULL; const char *pathfile = (const char*)stream; if (access(pathfile,0) == -1) { fp = fopen((char*) stream, "wb"); } else { fp = fopen((char*) stream, "ab"); } if (fp) { fwrite(ptr, size, nmemb, fp); } // printf("%s\n",ptr); fclose(fp); return written; } //加上-lcurl库 void test_post(char* url,char* data) { CURL *curl; curl = curl_easy_init(); if (curl) { //www.baidu.com/#wd=java curl_easy_setopt(curl, CURLOPT_URL, url); curl_easy_setopt(curl, CURLOPT_POST, 1L); curl_easy_setopt(curl, CURLOPT_POSTFIELDS, data); curl_easy_perform(curl); curl_easy_cleanup(curl); } fclose(curl); } int file_exists(char *filename) { return (access(filename, 0) == 0); } int GetCharset(char *src_html,char *charCode) { char tmp_html[HTML_BUFFER_SIZE]={0}; int pos = indexOf(src_html, "text/html; charset="); if (pos > 0) { strncpy(tmp_html, src_html + pos + strlen("text/html; charset="), strlen(src_html) - pos); pos = indexOf(tmp_html, "\""); if (pos > 0) { strncpy(charCode, tmp_html, pos); } } return 0; } void test_get(char* url) { CURL *curl; CURLcode res; curl = curl_easy_init(); if (curl) { if (file_exists(TMP_FILE)) remove(TMP_FILE); curl_easy_setopt(curl, CURLOPT_URL, url); //指定回调函数 curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, write_callback); //这个变量可作为接收或传递数据的作用 curl_easy_setopt(curl, CURLOPT_WRITEDATA, TMP_FILE); res = curl_easy_perform(curl); char tocode[64] = "UTF-8"; if (CURLE_OK == res) { char *ct; res = curl_easy_getinfo(curl, CURLINFO_CONTENT_TYPE, &ct); if ((CURLE_OK == res) && ct) printf("We received Content-Type: %s\n", ct); //printf("====\n"); //int pos=strcspn(ct,"UTF-8"); int index = indexOf(ct, "="); char* arr[3]; if (index > 0) { split(arr, ct, "="); //printf("%s\n", arr[1]); strcpy(tocode, arr[1]); } FILE *fp = NULL; fp = fopen(TMP_FILE, "r"); char src_html[HTML_BUFFER_SIZE]={0}; char *src_html_p = src_html; size_t srclen = strlen(src_html); char output_html[HTML_BUFFER_SIZE]={0}; char *output_html_p = output_html; size_t outlen = 0; char tmp_html[HTML_BUFFER_SIZE]={0}; if (fp) { fread(src_html, HTML_BUFFER_SIZE, 1, fp); strcpy(tmp_html,src_html); if(index <0) { GetCharset(tmp_html,tocode); printf("%s\n",tocode); } //打开字符集转换 iconv_t hIconv = iconv_open(tocode,"iso-8859-1"); if (-1 == (ssize_t)hIconv) { return ; //打开失败,可能不支持的字符集 } printf("%s\n",src_html); //开始转换 //size_t iconv(iconv_t cd,char **inbuf, size_t *inbytesleft,char **outbuf, size_t *outbytesleft); size_t ret = iconv(hIconv,&src_html_p,&srclen,&output_html_p,&outlen); if(ret == -1) { printf("转换失败\n"); exit(2); } //printf("%s\n", output_html); //printf("ok"); if(strcmp(output_html,"")==0) { printf("%s\n",src_html); } //关闭字符集转换 iconv_close(hIconv); } } curl_easy_cleanup(curl); } } int main(int argc, char** argv) { //test_post("https://www.baidu.com/",""); test_get("http://www.baidu.com/"); return 0; } chunli魂斗罗~$
编译运行:
chunli魂斗罗~$ gcc main.c -Wall -l curl && ./a.out We received Content-Type: text/html UTF-8 <!DOCTYPE html><!--STATUS OK--> <html> <head> <meta http-equiv="content-type" content="text/html;charset=utf-8"> <meta http-equiv="X-UA-Compatible" content="IE=Edge"> <link rel="dns-prefetch" href="//s1.bdstatic.com"/> <link rel="dns-prefetch" href="//t1.baidu.com"/> <link rel="dns-prefetch" href="//t2.baidu.com"/> <link rel="dns-prefetch" href="//t3.baidu.com"/> <link rel="dns-prefetch" href="//t10.baidu.com"/> <link rel="dns-prefetch" href="//t11.baidu.com"/> <link rel="dns-prefetch" href="//t12.baidu.com"/> <link rel="dns-prefetch" href="//b1.bdstatic.com"/> <title>百度一下,你就知道</title> <link href="http://s1.bdstatic.com/r/www/cache/static/home/css/index.css" rel="stylesheet" type="text/css" /> <!--[if lte IE 8]><style index="index" >#content{height:480px\9}#m{top:260px\9}</style><![endif]--> <!--[if IE 8]><style index="index" >#u1 a.mnav,#u1 a.mnav:visited{font-family:simsun}</style><![endif]--> <script>var hashMatch = document.location.href.match(/#+(.*wd=[^&].+)/);if (hashMatch && hashMatch[0] && hashMatch[1]) {document.location.replace("http://"+location.host+"/s?"+hashMatch[1]);}var ns_c = function(){};</script> <script>function h(obj){obj.style.behavior=‘url(#default#homepage)‘;var a = obj.setHomePage(‘//www.baidu.com/‘);}</script> <noscript><meta http-equiv="refresh" content="0; url=/baidu.html?from=noscript"/></noscript>
【本文谢绝转载,原文来自http://990487026.blog.51cto.com】
时间: 2024-12-14 12:25:08