The node.js regular expression gets the code instance of all the links in the web page

  • 2020-03-30 03:12:22
  • OfStack

The implementation code


var http = require('http');
//Define a function
var getAHref = function(htmlstr){
    var reg = /<a.+?href=('|")?([^'"]+)('|")?(?:s+|>)/gim;
    var arr = [];
    while(tem=reg.exec(htmlstr)){
        arr.push(tem[2]);
    }
    return arr;
}

var qHref = "http://xxx";//Set the target url to be queried
    var req = http.get(qHref, function(res) {
    var pageData = "";
    res.setEncoding('utf8');
    res.on('error', function (errget) {
           //Error handling
    });
    res.on('data', function (chunk) {
           pageData += chunk;
    });
    res.on('end', function(){
          //console.dir(pageData);
          var content = pageData;//Get the web content
          var hrefs=getAHref(content);//For a link
    });
});


Related articles: