Three methods to obtain the source code of the web page of the use of MFC and Socket implementation

  • 2020-04-02 02:04:39
  • OfStack

The first method is to use the one in MFC

< Afxinet. H >


CString GetHttpFileData(CString strUrl)
{
     CInternetSession Session("Internet Explorer", 0);
     CHttpFile *pHttpFile = NULL;
     CString strData;
     CString strClip;
     pHttpFile = (CHttpFile*)Session.OpenURL(strUrl);
    while ( pHttpFile->ReadString(strClip) )
     {
      strData += strClip;
      }
     return strData;
}

I'm going to talk about pHttpFile- > ReadString() may read only one piece of data at a time, depending on network conditions, so add each read to the end of the total, using CString instead of buffer handling :)
Don't forget to include the header file #include < Afxinet. H > In project Settings, use MFC or it won't compile

The second is implemented using WinNet's pure API


#define MAXBLOCKSIZE 1024
#include <windows.h>
#include <wininet.h>
#pragma comment(lib, "wininet.lib")
void GetWebSrcCode(const char *Url);
int _tmain(int argc, _TCHAR* argv[])
{
    GetWebSrcCode("//www.jb51.net/");
    return 0;
}
void GetWebSrcCode(const char *Url)
{
    HINTERNET hSession = InternetOpen("zwt", INTERNET_OPEN_TYPE_PRECONFIG, NULL, NULL, 0);
    if (hSession != NULL)
    {
        HINTERNET hURL = InternetOpenUrl(hSession, Url, NULL, 0, INTERNET_FLAG_DONT_CACHE, 0);
        if (hURL != NULL)
        {
            char Temp[MAXBLOCKSIZE] = {0};
            ULONG Number = 1;
            FILE *stream;
            if( (stream = fopen( "E:\test.html", "wb" )) != NULL )
            {
                while (Number > 0)
                {
                    InternetReadFile(hURL, Temp, MAXBLOCKSIZE - 1, &Number);
                    fwrite(Temp, sizeof (char), Number , stream);
                }
                fclose( stream );
            }
            InternetCloseHandle(hURL);
            hURL = NULL;
        }
        InternetCloseHandle(hSession);
        hSession = NULL;
    }
}

The third is the use of unwrapped Socket implementation


int main(int argc, char* argv[])
{
    SOCKET hsocket;
    SOCKADDR_IN saServer;
    WSADATA wsadata;
    LPHOSTENT lphostent;
    int nRet;
    char Dest[3000];  
    char* host_name="blog.sina.com.cn";
    char* req="GET /s/blog_44acab2f01016gz3.html HTTP/1.1rn"
        "User-Agent: Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 5.1; Trident/4.0; .NET CLR 1.1.4322; .NET CLR 2.0.50727; .NET4.0C; .NET4.0E; .NET CLR 3.0.4506.2152; .NET CLR 3.5.30729)rn"
        "Host:blog.sina.com.cnrnrn";

    //Initializing the socket & NBSP;
    if(WSAStartup(MAKEWORD(2,2),&wsadata))
        printf(" Initialize the SOCKET Error! ");
    lphostent=gethostbyname(host_name);   
    if(lphostent==NULL)   
        printf("lphostent Is empty! ");   
    hsocket = socket(AF_INET,SOCK_STREAM,IPPROTO_TCP);   
    saServer.sin_family = AF_INET;   
    saServer.sin_port = htons(80);   
    saServer.sin_addr =*((LPIN_ADDR)*lphostent->h_addr_list);   
    //Using SOCKET connection & NBSP; & have spent
    nRet = connect(hsocket,(LPSOCKADDR)&saServer,sizeof(SOCKADDR_IN));   
    if(nRet == SOCKET_ERROR)   
    {
        printf(" Error establishing connection! ");   
        closesocket(hsocket);
        return 0;
    }
    //Send & NBSP; by SOCKET; & have spent
    nRet = send(hsocket,req,strlen(req),0);   
    if(nRet==SOCKET_ERROR)   
    {   
        printf(" Error sending packet! ");   
        closesocket(hsocket);   
    }   
    nRet=1;   
    while(nRet>0)   
    {   
        //Receive return packet & NBSP; & have spent
        nRet=recv(hsocket,(LPSTR)Dest,sizeof(Dest),0);   
        if(nRet>0)
            Dest[nRet]=0;
        else   
            Dest[0]=0;  
        char sDest[3000] = {0};
        UTF8_2_GB2312(sDest,nRet,Dest,nRet);
        //Displays the size, contents & NBSP; of the returned packet;
        //printf("nReceived bytes:%dn",nRet);   
        printf("Result:n%s",sDest);   
    }
}

In addition, when we get the page above, the access to may be UTF8, it seems that most of the current sites are using this code! The following is the code conversion.


void UTF_8ToUnicode(wchar_t* pOut,char *pText)
{   
    char* uchar = (char *)pOut; 
    uchar[1] = ((pText[0] & 0x0F) << 4) + ((pText[1] >> 2) & 0x0F);
    uchar[0] = ((pText[1] & 0x03) << 6) + (pText[2] & 0x3F); 
} 
void Gb2312ToUnicode(wchar_t* pOut,char *gbBuffer) 
{   
    ::MultiByteToWideChar(CP_ACP,MB_PRECOMPOSED,gbBuffer,2,pOut,1);
} 
void UTF_8ToGB2312(char*pOut, char *pText, int pLen)  
{   
    char Ctemp[4];   
    memset(Ctemp,0,4); 
    int i =0 ,j = 0; 
    while(i < pLen) 
    {  
        if(pText[i] >= 0)  
        {  
            pOut[j++] = pText[i++]; 
        } 
        else 
        {  
            WCHAR Wtemp; 
            UTF_8ToUnicode(&Wtemp,pText + i);
            UnicodeToGB2312(Ctemp,Wtemp); 
            pOut[j] = Ctemp[0];
            pOut[j + 1] = Ctemp[1];  
            i += 3;   
            j += 2;  
        }   
    } 
    pOut[j] ='n'; 
    return; 
}

This is the code converted to GB2312


Related articles: