Get domain name without subdomains using JavaScript?

Following is a solution to extract a domain name without any subdomains. This solution doesn't make any assumptions about the URL format, so it should work for any URL. Since some domain names have one suffix (.com), and some have two or more (.co.uk), to get an accurate result in all cases, we need to parse the hostname using the Public Suffix List, which contains a list of all public domain name suffixes.


Solution

First, include the public suffix list js api in a script tag in your HTML, then in JavaScript to get the hostname you can call:

var parsed = psl.parse('one.two.roothost.co.uk');
console.log(parsed.domain);

...which will return "roothost.co.uk". To get the name from the current page, you can use location.hostname instead of a static string:

var parsed = psl.parse(location.hostname);
console.log(parsed.domain);

Finally, if you need to parse a domain name directly out of a full URL string, you can use the following:

var url = "http://one.two.roothost.co.uk/page.html";
url = url.split("/")[2]; // Get the hostname
var parsed = psl.parse(url); // Parse the domain
document.getElementById("output").textContent = parsed.domain;

JSFiddle Example (it includes the entire minified library in the jsFiddle, so scroll down!): https://jsfiddle.net/6aqdbL71/2/


This works for me:

const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');

const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');

const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";

function removeSubdomain(s) {
    const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
    s = s.replace(knownSubdomainsRegExp, '');

    const parts = s.split('.');

    while (parts.length > 3) {
        parts.shift();
    }

    if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
        parts.shift();
    }

    return parts.join('.');
};

var tests = {
  'www.sidanmor.com':             'sidanmor.com',
  'exemple.com':                  'exemple.com',
  'argos.co.uk':                  'argos.co.uk',
  'www.civilwar.museum':          'civilwar.museum',
  'www.sub.civilwar.museum':      'civilwar.museum',
  'www.xxx.sub.civilwar.museum':  'civilwar.museum',
  'www.exemple.com':              'exemple.com',
  'main.testsite.com':            'testsite.com',
  'www.ex-emple.com.ar':          'ex-emple.com.ar',
  'main.test-site.co.uk':         'test-site.co.uk',
  'en.tour.mysite.nl':            'tour.mysite.nl',
  'www.one.lv':                   'one.lv',
  'www.onfdsadfsafde.lv':         'onfdsadfsafde.lv',
  'aaa.onfdsadfsafde.aa':         'onfdsadfsafde.aa',
};

const firstTLDs = "ac|ad|ae|af|ag|ai|al|am|an|ao|aq|ar|as|at|au|aw|ax|az|ba|bb|be|bf|bg|bh|bi|bj|bm|bo|br|bs|bt|bv|bw|by|bz|ca|cc|cd|cf|cg|ch|ci|cl|cm|cn|co|cr|cu|cv|cw|cx|cz|de|dj|dk|dm|do|dz|ec|ee|eg|es|et|eu|fi|fm|fo|fr|ga|gb|gd|ge|gf|gg|gh|gi|gl|gm|gn|gp|gq|gr|gs|gt|gw|gy|hk|hm|hn|hr|ht|hu|id|ie|il|im|in|io|iq|ir|is|it|je|jo|jp|kg|ki|km|kn|kp|kr|ky|kz|la|lb|lc|li|lk|lr|ls|lt|lu|lv|ly|ma|mc|md|me|mg|mh|mk|ml|mn|mo|mp|mq|mr|ms|mt|mu|mv|mw|mx|my|na|nc|ne|nf|ng|nl|no|nr|nu|nz|om|pa|pe|pf|ph|pk|pl|pm|pn|pr|ps|pt|pw|py|qa|re|ro|rs|ru|rw|sa|sb|sc|sd|se|sg|sh|si|sj|sk|sl|sm|sn|so|sr|st|su|sv|sx|sy|sz|tc|td|tf|tg|th|tj|tk|tl|tm|tn|to|tp|tr|tt|tv|tw|tz|ua|ug|uk|us|uy|uz|va|vc|ve|vg|vi|vn|vu|wf|ws|yt".split('|');

const secondTLDs = "com|edu|gov|net|mil|org|nom|sch|caa|res|off|gob|int|tur|ip6|uri|urn|asn|act|nsw|qld|tas|vic|pro|biz|adm|adv|agr|arq|art|ato|bio|bmd|cim|cng|cnt|ecn|eco|emp|eng|esp|etc|eti|far|fnd|fot|fst|g12|ggf|imb|ind|inf|jor|jus|leg|lel|mat|med|mus|not|ntr|odo|ppg|psc|psi|qsl|rec|slg|srv|teo|tmp|trd|vet|zlg|web|ltd|sld|pol|fin|k12|lib|pri|aip|fie|eun|sci|prd|cci|pvt|mod|idv|rel|sex|gen|nic|abr|bas|cal|cam|emr|fvg|laz|lig|lom|mar|mol|pmn|pug|sar|sic|taa|tos|umb|vao|vda|ven|mie|北海道|和歌山|神奈川|鹿児島|ass|rep|tra|per|ngo|soc|grp|plc|its|air|and|bus|can|ddr|jfk|mad|nrw|nyc|ski|spy|tcm|ulm|usa|war|fhs|vgs|dep|eid|fet|fla|flå|gol|hof|hol|sel|vik|cri|iwi|ing|abo|fam|gok|gon|gop|gos|aid|atm|gsm|sos|elk|waw|est|aca|bar|cpa|jur|law|sec|plo|www|bir|cbg|jar|khv|msk|nov|nsk|ptz|rnd|spb|stv|tom|tsk|udm|vrn|cmw|kms|nkz|snz|pub|fhv|red|ens|nat|rns|rnu|bbs|tel|bel|kep|nhs|dni|fed|isa|nsn|gub|e12|tec|орг|обр|упр|alt|nis|jpn|mex|ath|iki|nid|gda|inc".split('|');

const knownSubdomains = "www|studio|mail|remote|blog|webmail|server|ns1|ns2|smtp|secure|vpn|m|shop|ftp|mail2|test|portal|ns|ww1|host|support|dev|web|bbs|ww42|squatter|mx|email|1|mail1|2|forum|owa|www2|gw|admin|store|mx1|cdn|api|exchange|app|gov|2tty|vps|govyty|hgfgdf|news|1rer|lkjkui";

function removeSubdomain(s) {
const knownSubdomainsRegExp = new RegExp(`^(${knownSubdomains})\.`, 'i');
s = s.replace(knownSubdomainsRegExp, '');

const parts = s.split('.');

while (parts.length > 3) {
    parts.shift();
}

if (parts.length === 3 && ((parts[1].length > 2 && parts[2].length > 2) || (secondTLDs.indexOf(parts[1]) === -1) && firstTLDs.indexOf(parts[2]) === -1)) {
    parts.shift();
}

return parts.join('.');
};

for (var test in tests) {
  if (tests.hasOwnProperty(test)) {
    var t = test;
    var e = tests[test];
    var r = removeSubdomain(test);
    var s = e === r;
    if (s) {
      console.log('OK: "' + t + '" should be "' + e + '" and it is really "' + r + '"');
    } else {
      console.log('Fail: "' + t + '" should be "' + e + '" but it is NOT "' + r + '"');
    }
  }
}

Referance:

psl.min.js file

Maximillian Laumeister Answer to this question

The most popular subdomains on the internet


You can use parse-domain to do the heavy lifting for you. This package considers the public suffix list and returns an easy to work with object breaking up the domain.

Here is an example from their readme:

  npm install parse-domain
  import { parseDomain, ParseResultType } from 'parse-domain';

  const parseResult = parseDomain(
    // should be a string with basic latin characters only. more details in the readme
    'www.some.example.co.uk',
  );

  // check if the domain is listed in the public suffix list
  if (parseResult.type === ParseResultType.Listed) {
    const { subDomains, domain, topLevelDomains } = parseResult;

    console.log(subDomains); // ["www", "some"]
    console.log(domain); // "example"
    console.log(topLevelDomains); // ["co", "uk"]
  } else {
    // more about other parseResult types in the readme
  }

Tags:

Javascript