How do I parse a URL into hostname and path in javascript?

found here: https://gist.github.com/jlong/2428561

var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";

parser.protocol; // => "http:"
parser.host;     // => "example.com:3000"
parser.hostname; // => "example.com"
parser.port;     // => "3000"
parser.pathname; // => "/pathname/"
parser.hash;     // => "#hash"
parser.search;   // => "?search=test"
parser.origin;   // => "http://example.com:3000"

The modern way:

new URL("http://example.com/aa/bb/")

Returns an object with properties hostname and pathname, along with a few others.

The first argument is a relative or absolute URL; if it's relative, then you need to specify the second argument (the base URL). For example, for a URL relative to the current page:

new URL("/aa/bb/", location)

In addition to browsers, this API is also available in Node.js since v7, through require('url').URL.


var getLocation = function(href) {
    var l = document.createElement("a");
    l.href = href;
    return l;
};
var l = getLocation("http://example.com/path");
console.debug(l.hostname)
>> "example.com"
console.debug(l.pathname)
>> "/path"

Here's a simple function using a regexp that imitates the a tag behavior.

Pros

  • predictable behaviour (no cross browser issues)
  • doesn't need the DOM
  • it's really short.

Cons

  • The regexp is a bit difficult to read

-

function getLocation(href) {
    var match = href.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/);
    return match && {
        href: href,
        protocol: match[1],
        host: match[2],
        hostname: match[3],
        port: match[4],
        pathname: match[5],
        search: match[6],
        hash: match[7]
    }
}

-

getLocation("http://example.com/");
/*
{
    "protocol": "http:",
    "host": "example.com",
    "hostname": "example.com",
    "port": undefined,
    "pathname": "/"
    "search": "",
    "hash": "",
}
*/

getLocation("http://example.com:3000/pathname/?search=test#hash");
/*
{
    "protocol": "http:",
    "host": "example.com:3000",
    "hostname": "example.com",
    "port": "3000",
    "pathname": "/pathname/",
    "search": "?search=test",
    "hash": "#hash"
}
*/

EDIT:

Here's a breakdown of the regular expression

var reURLInformation = new RegExp([
    '^(https?:)//', // protocol
    '(([^:/?#]*)(?::([0-9]+))?)', // host (hostname and port)
    '(/{0,1}[^?#]*)', // pathname
    '(\\?[^#]*|)', // search
    '(#.*|)$' // hash
].join(''));
var match = href.match(reURLInformation);

Tags:

Javascript

Url