1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
| //对网页进行转码 wchar_t* Convert(CString str, int targetCodePage) {
int iunicodeLen = MultiByteToWideChar(targetCodePage, 0, (LPCTSTR)str.GetBuffer(), -1, NULL, 0); wchar_t* pUnicode = NULL; pUnicode = new wchar_t[iunicodeLen + 1]; memset(pUnicode, 0, (iunicodeLen + 1) * sizeof(wchar_t)); MultiByteToWideChar(targetCodePage, 0, (LPCTSTR)str.GetBuffer(), -1, (LPWSTR)pUnicode, iunicodeLen);//映射一个字符串到一个款字节中 return pUnicode; }
/*获取网页的源码,参数1为网页链接,2为缓冲区指针*/ bool GetHtml(LPCTSTR szURL, CString& getbuf) { HINTERNET hInternet, hUrl; char buffer[1124]; WCHAR wBuffer[1124];
DWORD dwBytesRead = 0; DWORD dwBytesWritten = 0; BOOL bIsFirstPacket = true; BOOL bRet = true; int nNowcopyDate = 0;
hInternet = InternetOpen(_T("Mozilla/4.0 (compatible)"), INTERNET_OPEN_TYPE_PRECONFIG, NULL, INTERNET_INVALID_PORT_NUMBER, 0);//初始化应用程序,使用WinNet if (hInternet == NULL) return FALSE;
hUrl = InternetOpenUrl(hInternet, szURL, NULL, 0, INTERNET_FLAG_RELOAD, 0);//打开一个资源 ftp,gopher,http开头
if (hUrl == NULL) { DWORD m = GetLastError(); return FALSE; } do { memset(buffer, 0, sizeof(char) * 1124); InternetReadFile(hUrl, buffer, sizeof(char) * 1024, &dwBytesRead); bIsFirstPacket = false; nNowcopyDate = +dwBytesRead; wchar_t* punicode; punicode = Convert(buffer, CP_UTF8);// 对源码进行转码 第二个参数为网页的编码格式 //CString strTmp = CString(buffer);
getbuf += _T("\r\n"); getbuf += punicode; delete punicode; } while (dwBytesRead > 0);
InternetCloseHandle(hUrl); InternetCloseHandle(hInternet); return TRUE; }
|