Extract hostname name from string

Try this:

var matches = url.match(/^https?\:\/\/([^\/?#]+)(?:[\/?#]|$)/i);
var domain = matches && matches[1];  // domain will be null if no match is found

If you want to exclude the port from your result, use this expression instead:

/^https?\:\/\/([^\/:?#]+)(?:[\/:?#]|$)/i

Edit: To prevent specific domains from matching, use a negative lookahead. (?!youtube.com)

/^https?\:\/\/(?!(?:www\.)?(?:youtube\.com|youtu\.be))([^\/:?#]+)(?:[\/:?#]|$)/i

I recommend using the npm package psl (Public Suffix List). The "Public Suffix List" is a list of all valid domain suffixes and rules, not just Country Code Top-Level domains, but unicode characters as well that would be considered the root domain (i.e. www.食狮.公司.cn, b.c.kobe.jp, etc.). Read more about it here.

Try:

npm install --save psl

Then with my "extractHostname" implementation run:

let psl = require('psl');
let url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
psl.get(extractHostname(url)); // returns youtube.com

I can't use an npm package, so below only tests extractHostname.

function extractHostname(url) {
  var hostname;
  //find & remove protocol (http, ftp, etc.) and get hostname

  if (url.indexOf("//") > -1) {
    hostname = url.split('/')[2];
  } else {
    hostname = url.split('/')[0];
  }

  //find & remove port number
  hostname = hostname.split(':')[0];
  //find & remove "?"
  hostname = hostname.split('?')[0];

  return hostname;
}

// Warning: you can use this function to extract the "root" domain, but it will not be as accurate as using the psl package.

function extractRootDomain(url) {
  var domain = extractHostname(url),
  splitArr = domain.split('.'),
  arrLen = splitArr.length;

  //extracting the root domain here
  //if there is a subdomain
  if (arrLen > 2) {
    domain = splitArr[arrLen - 2] + '.' + splitArr[arrLen - 1];
    //check to see if it's using a Country Code Top Level Domain (ccTLD) (i.e. ".me.uk")
    if (splitArr[arrLen - 2].length == 2 && splitArr[arrLen - 1].length == 2) {
      //this is using a ccTLD
      domain = splitArr[arrLen - 3] + '.' + domain;
    }
  }
  return domain;
}

const urlHostname = url => {
  try {
    return new URL(url).hostname;
  }
  catch(e) { return e; }
};

const urls = [
    "http://www.blog.classroom.me.uk/index.php",
    "http://www.youtube.com/watch?v=ClkQA2Lb_iE",
    "https://www.youtube.com/watch?v=ClkQA2Lb_iE",
    "www.youtube.com/watch?v=ClkQA2Lb_iE",
    "ftps://ftp.websitename.com/dir/file.txt",
    "websitename.com:1234/dir/file.txt",
    "ftps://websitename.com:1234/dir/file.txt",
    "example.com?param=value",
    "https://facebook.github.io/jest/",
    "//youtube.com/watch?v=ClkQA2Lb_iE",
    "www.食狮.公司.cn",
    "b.c.kobe.jp",
    "a.d.kyoto.or.jp",
    "http://localhost:4200/watch?v=ClkQA2Lb_iE"
];

const test = (method, arr) => console.log(
`=== Testing "${method.name}" ===\n${arr.map(url => method(url)).join("\n")}\n`);

test(extractHostname, urls);
test(extractRootDomain, urls);
test(urlHostname, urls);

Regardless having the protocol or even port number, you can extract the domain. This is a very simplified, non-regex solution, so I think this will do.

URL(url).hostname is a valid solution but it doesn't work well with some edge cases that I have addressed. As you can see in my last test, it doesn't like some of the URLs. You can definitely use a combination of my solutions to make it all work though.

*Thank you @Timmerz, @renoirb, @rineez, @BigDong, @ra00l, @ILikeBeansTacos, @CharlesRobertson for your suggestions! @ross-allen, thank you for reporting the bug!


There is no need to parse the string, just pass your URL as an argument to URL constructor:

const url = 'http://www.youtube.com/watch?v=ClkQA2Lb_iE';
const { hostname } = new URL(url);

console.assert(hostname === 'www.youtube.com');

A neat trick without using regular expressions:

var tmp        = document.createElement ('a');
;   tmp.href   = "http://www.example.com/12xy45";

// tmp.hostname will now contain 'www.example.com'
// tmp.host will now contain hostname and port 'www.example.com:80'

Wrap the above in a function such as the below and you have yourself a superb way of snatching the domain part out of an URI.

function url_domain(data) {
  var    a      = document.createElement('a');
         a.href = data;
  return a.hostname;
}