/* ************************************************************************************ 项目是分析各视频的真实地址 生成一个dll, 供其它项目使用, 项目中使用了zlib, boost, 要另下载
如果是sohu网站则自己分析地址. 如果是其它视频网站才从flvcd上获取结果. 项目中使用了: 1.gizp解压. 2.UTF8与GB2312转码 3.boost正则表达式 boost查找单个匹配, 查找所有匹配 4.sohu视频地址是分了四类视频分析的. 可以用fiddler查找功能查找到所想要的几个字符串 5.文件获取是使用的MFC中的CHttpFile获取的, 尝试用了WinINet和WinHTTP ms的api访问网络的都不怎么行. chrome浏览器第一个版本是用winhttp访问网络的. 也试过socket访问网络 但要跳转什么的太繁了 6.函数导出, 可以用def文件. 也可以用dellexport 7.多线程CreateThread 注: 网络访问花了相当大的时间 正则表达式boost中的perl正则表达式. "要写成\" \要写成\\, 要多用查找替换. 匹配多个结果时要迭代搜索查询 网络给的数据是压缩的gzip问题也花了好长时间. utf-8与gb2312转换也花了好长时间. buff最后一次读取时, 字符串没法控制. 内存初始化是没置成0就行了 函数导出研究了两种方法, 花了很长时间. 多线程没花多长时间 ************************************************************************************ */
Analyzer.cpp
vector<string> Analyzer::GetPropertyInIntegratedBrackets(string strPropertyName, string strJson) { vector<string> vect; regex regclipsURL("(?<=(" + strPropertyName + "\":\\[))[^]]+?(?=(]))"); boost::smatch what; string strclipsURL = ""; //转成另一个变量再传,不然出错 强转是强的指针,以前是结构类型,强指针没用 if(regex_search(strJson, what, regclipsURL)) { strclipsURL = what[0]; } int iIndex = 0; while (iIndex >= 0) { iIndex = strclipsURL.find(','); if(iIndex > 0) { vect.push_back(strclipsURL.substr(1, iIndex - 2));//去了两边的双引号 strclipsURL = strclipsURL.substr(iIndex + 1); } else vect.push_back(strclipsURL.substr(1,strlen(strclipsURL.c_str()) - 2)); //去了两边的双引号 } return vect; } int Analyzer::httpgzdecompress(Byte *zdata, uLong nzdata, Byte *data, uLong *ndata) { int err = 0; z_stream d_stream = {0}; /* decompression stream */ static char dummy_head[2] = { 0x8 + 0x7 * 0x10, (((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF, }; d_stream.zalloc = (alloc_func)0; d_stream.zfree = (free_func)0; d_stream.opaque = (voidpf)0; d_stream.next_in = zdata; d_stream.avail_in = 0; d_stream.next_out = data; if(inflateInit2(&d_stream, 47) != Z_OK) return -1; while (d_stream.total_out < *ndata && d_stream.total_in < nzdata) { d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */ if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break; if(err != Z_OK ) { if(err == Z_DATA_ERROR) { d_stream.next_in = (Bytef*) dummy_head; d_stream.avail_in = sizeof(dummy_head); if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK) { return -1; } } else return -1; } } if(inflateEnd(&d_stream) != Z_OK) return -1; *ndata = d_stream.total_out; return 0; } //ms-help://MS.VSCC.v90/MS.MSDNQTR.v90.chs/intl/unicode_81rn.htm //将UTF8字符串转换为gb2312 CString Analyzer::ConvertUTF8toGB2312(const char *pData, size_t size) { size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0); WCHAR * pChar = new WCHAR[n+1]; n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n); pChar[n]=0; n = WideCharToMultiByte(936, 0, pChar, -1, 0, 0, 0, 0); char *p = new char[n+1]; n = WideCharToMultiByte(936, 0, pChar, -1, p, (int)n, 0, 0); CString result(p); delete []pChar; delete []p; return result; } CString Analyzer::GetPageHtml(CString strUrl) { CString strHtml = "";//获取HTML try { strUrl = strUrl.Trim(); CInternetSession session("HttpClient"); session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 5000); // 5秒的连接超时 session.SetOption(INTERNET_OPTION_SEND_TIMEOUT, 1000); // 1秒的发送超时 session.SetOption(INTERNET_OPTION_RECEIVE_TIMEOUT, 7000); // 7秒的接收超时 session.SetOption(INTERNET_OPTION_DATA_SEND_TIMEOUT, 1000); // 1秒的发送超时 session.SetOption(INTERNET_OPTION_DATA_RECEIVE_TIMEOUT, 7000); // 7秒的接收超时 session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 1); // 1次重试 CHttpFile* pFile = (CHttpFile*)session.OpenURL((LPCTSTR)strUrl, 1, INTERNET_FLAG_RELOAD | INTERNET_FLAG_TRANSFER_BINARY); DWORD dwStatusCode; pFile-> QueryInfoStatusCode(dwStatusCode); if(dwStatusCode == HTTP_STATUS_OK) { CString strLength = ""; CString strHeaders = ""; pFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH, strLength); pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strHeaders); long lLength = 4096 * 500; byte* pbHtml = new byte[lLength]; //在堆上动态分配内存 memset(pbHtml, 0, lLength); //初始化 byte sRecived[512]; int iIndex = 0; int num = 0; while((num = pFile->Read(sRecived,512)) > 0 ) { memcpy(pbHtml+iIndex, sRecived, num); iIndex+=num; } pbHtml[iIndex] = NULL; if(strHeaders.Find("gzip") > -1) { uLong ulLength = 4096 * 500; byte* pbData = new byte[ulLength]; memset(pbData,0,ulLength); httpgzdecompress(pbHtml, lLength, pbData, &ulLength); pbData[ulLength] = NULL; strHtml = (CHAR*)pbData; delete pbData; } else { strHtml = (CHAR*)pbHtml; if(strHeaders.MakeLower().Find("utf-8") > - 1 || strHtml.MakeLower().Find("utf-8") > -1)//strHtml变成小写了 { strHtml = ConvertUTF8toGB2312((CHAR*)pbHtml,strlen((CHAR*)pbHtml));//编码转换 } else//重新得到大小写区分的 { strHtml = (CHAR*)pbHtml; } } delete pbHtml; } pFile -> Close(); delete pFile; session.Close(); return strHtml; } catch (CException* e) { (void)e; this->m_State = Analyzer_State_NetError; return ""; } }