The node.js regular expression gets the code instance of all the links in the web page
- 2020-03-30 03:12:22
- OfStack
The implementation code
var http = require('http');
//Define a function
var getAHref = function(htmlstr){
var reg = /<a.+?href=('|")?([^'"]+)('|")?(?:s+|>)/gim;
var arr = [];
while(tem=reg.exec(htmlstr)){
arr.push(tem[2]);
}
return arr;
}
var qHref = "http://xxx";//Set the target url to be queried
var req = http.get(qHref, function(res) {
var pageData = "";
res.setEncoding('utf8');
res.on('error', function (errget) {
//Error handling
});
res.on('data', function (chunk) {
pageData += chunk;
});
res.on('end', function(){
//console.dir(pageData);
var content = pageData;//Get the web content
var hrefs=getAHref(content);//For a link
});
});