/*
************************************************************************************
项目是分析各视频的真实地址 生成一个dll, 供其它项目使用, 项目中使用了zlib, boost, 要另下载
如果是sohu网站则自己分析地址. 如果是其它视频网站才从flvcd上获取结果.
项目中使用了:
1.gizp解压.
2.UTF8与GB2312转码
3.boost正则表达式 boost查找单个匹配, 查找所有匹配
4.sohu视频地址是分了四类视频分析的. 可以用fiddler查找功能查找到所想要的几个字符串
5.文件获取是使用的MFC中的CHttpFile获取的, 尝试用了WinINet和WinHTTP ms的api访问网络的都不怎么行.
chrome浏览器第一个版本是用winhttp访问网络的. 也试过socket访问网络 但要跳转什么的太繁了
6.函数导出, 可以用def文件. 也可以用dellexport
7.多线程CreateThread
注:
网络访问花了相当大的时间
正则表达式boost中的perl正则表达式. "要写成\" \要写成\\, 要多用查找替换. 匹配多个结果时要迭代搜索查询
网络给的数据是压缩的gzip问题也花了好长时间.
utf-8与gb2312转换也花了好长时间.
buff最后一次读取时, 字符串没法控制. 内存初始化是没置成0就行了
函数导出研究了两种方法, 花了很长时间.
多线程没花多长时间
************************************************************************************
*/
Analyzer.cpp
vector<string> Analyzer::GetPropertyInIntegratedBrackets(string strPropertyName, string strJson)
{
vector<string> vect;
regex regclipsURL("(?<=(" + strPropertyName + "\":\\[))[^]]+?(?=(]))");
boost::smatch what;
string strclipsURL = "";
//转成另一个变量再传,不然出错 强转是强的指针,以前是结构类型,强指针没用
if(regex_search(strJson, what, regclipsURL))
{
strclipsURL = what[0];
}
int iIndex = 0;
while (iIndex >= 0)
{
iIndex = strclipsURL.find(',');
if(iIndex > 0)
{
vect.push_back(strclipsURL.substr(1, iIndex - 2));//去了两边的双引号
strclipsURL = strclipsURL.substr(iIndex + 1);
}
else
vect.push_back(strclipsURL.substr(1,strlen(strclipsURL.c_str()) - 2)); //去了两边的双引号
}
return vect;
}
int Analyzer::httpgzdecompress(Byte *zdata, uLong nzdata, Byte *data, uLong *ndata)
{
int err = 0;
z_stream d_stream = {0}; /* decompression stream */
static char dummy_head[2] =
{
0x8 + 0x7 * 0x10,
(((0x8 + 0x7 * 0x10) * 0x100 + 30) / 31 * 31) & 0xFF,
};
d_stream.zalloc = (alloc_func)0;
d_stream.zfree = (free_func)0;
d_stream.opaque = (voidpf)0;
d_stream.next_in = zdata;
d_stream.avail_in = 0;
d_stream.next_out = data;
if(inflateInit2(&d_stream, 47) != Z_OK) return -1;
while (d_stream.total_out < *ndata && d_stream.total_in < nzdata)
{
d_stream.avail_in = d_stream.avail_out = 1; /* force small buffers */
if((err = inflate(&d_stream, Z_NO_FLUSH)) == Z_STREAM_END) break;
if(err != Z_OK )
{
if(err == Z_DATA_ERROR)
{
d_stream.next_in = (Bytef*) dummy_head;
d_stream.avail_in = sizeof(dummy_head);
if((err = inflate(&d_stream, Z_NO_FLUSH)) != Z_OK)
{
return -1;
}
}
else return -1;
}
}
if(inflateEnd(&d_stream) != Z_OK) return -1;
*ndata = d_stream.total_out;
return 0;
}
//ms-help://MS.VSCC.v90/MS.MSDNQTR.v90.chs/intl/unicode_81rn.htm
//将UTF8字符串转换为gb2312
CString Analyzer::ConvertUTF8toGB2312(const char *pData, size_t size)
{
size_t n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, NULL, 0);
WCHAR * pChar = new WCHAR[n+1];
n = MultiByteToWideChar(CP_UTF8, 0, pData, (int)size, pChar, n);
pChar[n]=0;
n = WideCharToMultiByte(936, 0, pChar, -1, 0, 0, 0, 0);
char *p = new char[n+1];
n = WideCharToMultiByte(936, 0, pChar, -1, p, (int)n, 0, 0);
CString result(p);
delete []pChar;
delete []p;
return result;
}
CString Analyzer::GetPageHtml(CString strUrl)
{
CString strHtml = "";//获取HTML
try
{
strUrl = strUrl.Trim();
CInternetSession session("HttpClient");
session.SetOption(INTERNET_OPTION_CONNECT_TIMEOUT, 5000); // 5秒的连接超时
session.SetOption(INTERNET_OPTION_SEND_TIMEOUT, 1000); // 1秒的发送超时
session.SetOption(INTERNET_OPTION_RECEIVE_TIMEOUT, 7000); // 7秒的接收超时
session.SetOption(INTERNET_OPTION_DATA_SEND_TIMEOUT, 1000); // 1秒的发送超时
session.SetOption(INTERNET_OPTION_DATA_RECEIVE_TIMEOUT, 7000); // 7秒的接收超时
session.SetOption(INTERNET_OPTION_CONNECT_RETRIES, 1); // 1次重试
CHttpFile* pFile = (CHttpFile*)session.OpenURL((LPCTSTR)strUrl, 1, INTERNET_FLAG_RELOAD | INTERNET_FLAG_TRANSFER_BINARY);
DWORD dwStatusCode;
pFile-> QueryInfoStatusCode(dwStatusCode);
if(dwStatusCode == HTTP_STATUS_OK)
{
CString strLength = "";
CString strHeaders = "";
pFile->QueryInfo(HTTP_QUERY_CONTENT_LENGTH, strLength);
pFile->QueryInfo(HTTP_QUERY_RAW_HEADERS_CRLF, strHeaders);
long lLength = 4096 * 500;
byte* pbHtml = new byte[lLength]; //在堆上动态分配内存
memset(pbHtml, 0, lLength); //初始化
byte sRecived[512];
int iIndex = 0;
int num = 0;
while((num = pFile->Read(sRecived,512)) > 0 )
{
memcpy(pbHtml+iIndex, sRecived, num);
iIndex+=num;
}
pbHtml[iIndex] = NULL;
if(strHeaders.Find("gzip") > -1)
{
uLong ulLength = 4096 * 500;
byte* pbData = new byte[ulLength];
memset(pbData,0,ulLength);
httpgzdecompress(pbHtml, lLength, pbData, &ulLength);
pbData[ulLength] = NULL;
strHtml = (CHAR*)pbData;
delete pbData;
}
else
{
strHtml = (CHAR*)pbHtml;
if(strHeaders.MakeLower().Find("utf-8") > - 1 || strHtml.MakeLower().Find("utf-8") > -1)//strHtml变成小写了
{
strHtml = ConvertUTF8toGB2312((CHAR*)pbHtml,strlen((CHAR*)pbHtml));//编码转换
}
else//重新得到大小写区分的
{
strHtml = (CHAR*)pbHtml;
}
}
delete pbHtml;
}
pFile -> Close();
delete pFile;
session.Close();
return strHtml;
}
catch (CException* e)
{
(void)e;
this->m_State = Analyzer_State_NetError;
return "";
}
}

