NodeJS url validation of url valid

  • 2020-03-29 23:48:58
  • OfStack

Javascript does url validation, usually using regular expressions to determine whether the format is correct, such as:

/^https?:///.test(url);

Of course, there are better detection methods such as the valid-url library for verification based on RFC 3986, RFC 3966, RFC 4694, RFC 4759, RFC 4904 and other standards.
But of course there's no way to verify the existence of the url based on the format, so we have url-valid, and we validate based on the HTTP request.

Interface design
All we really need is a function to pass in a url address and call back to see if the link is available.
However, the request is prone to unknown errors, so we passed an error parameter in the callback function, if not null, there is an error.
We may also want to be able to get the relevant data of the web page, which can be used for information extraction of the page in the future.
Chain operation if possible.
So the end result is something like this:

valid(url)
  .on('check', function (err, status) {
    if (err) throw err;
    status ?
      console.log('url Is available ') :
      console.log('url It's not available ');
  })
  .on('data', function (err, data) {
    console.log(data);
  })
  .on('end', function (err, data) {
    console.log(' End of the request ');
  })

HTTP GET or HTTP HEAD
We originally wanted to use HTTP HEAD requests to do this, because they only return headers, which can reduce the request time, but HEAD requests, not all links are supported.
So we end up using HTTP GET, abort the request as soon as we GET the correct statusCode.

Deal with 301-303
Since 301 through 303 are redirected, we need to continue to check whether the corresponding Location still exists.

Asynchronous execution using process.nexttick
To execute the code after registering the listener, we use process.nexttick as a step.

implementation


module.exports = (function () {
  'use strict';
  var http = require('http')
    , https = require('https')
    , EventEmitter = require('events').EventEmitter
    , URL = require('url')
    , urlReg = /^(https?):///;

  
  function Valid(url, callback) {
    var that = this;
    this.url = url;
    this.emitter = new EventEmitter();
    process.nextTick(function () {
      that.get(url);
    });
    this.fetch = false;
    callback && this.emitter.on('check', callback);
  }
  Valid.prototype = {
    constructor: Valid,
    
    get: function (url) {
      var match = url.match(urlReg)
        , that = this;
      if (match) {
        var httpLib = (match[1].toLowerCase() === 'http') ? http : https
          , opts = URL.parse(url)
          , req;
        opts.agent = false;
        opts.method = 'GET';
        req = httpLib.request(opts, function (res) {
          var statusCode = res.statusCode;
          if (statusCode === 200) {
            that.emitter.emit('check', null, true);
            that.fetch ? 
              (res.on('data', function (data) {
                that.emitter.emit('data', null, data);
              }) && res.on('end', function () {
                that.emitter.emit('end');
              })) :
              (req.abort() || that.emitter.emit('end'));
          } else if (300 < statusCode && statusCode < 304) {
            req.abort();
            var emitter = that.emitter
              , valid = one(URL.resolve(url, res.headers.location), function (err, valid) {
                emitter.emit('check', err, valid);
              });
            that.fetch && valid.on('data', function (err, data) {
              emitter.emit('data', err, data);
            });
            valid.on('error', function (err) {
              that.emitter.emit('error', err);
            });
            valid.on('end', function () {
              that.emitter.emit('end');
            });
          } else {
            that.emitter.emit('check', null, false);
          }
          res.on('error', function (err) {
            req.abort();
            that.emitter.emit('data', err);
          });
        });
        req.on('error', function (err) {
          req.abort();
          return that.emitter.emit('check', null, false);
        });
        req.end();
      } else {
        return that.emitter.emit('check', null, false);
      }
    },
    
    on: function (event, callback) {
      (event === 'data') && (this.fetch = true); 
      this.emitter.on(event, callback);
      return this;
    },
    
    destroy: function () {
      this.emitter.removeAllListeners();
      this.url = undefined;
      this.emitter = null;
      this.fetch = undefined;
    },
    
    removeAllListeners: function (event) {
      event ? 
        this.emitter.removeAllListeners(event) :
        this.emitter.removeAllListeners();
      return this;
    },
    
    listeners: function (event) {
      if (event) {
        return this.emitter.listeners(event);
      } else {
        var res = []
          , that = this
          , _push = Array.prototype.push;
        Object.keys(this.emitter._events).forEach(function (key) {
          _push.apply(res, that.emitter.listeners(key));
        });
        return res;
      }
    }
  }
  
  function one(url, callback) {
    return (new Valid(url, callback)); 
  }
  one.one = one;
  return one;
})();

Related articles: