简体   繁体   中英

How do I parse a URL into hostname and path in javascript?

I would like to take a string

var a = "http://example.com/aa/bb/"

and process it into an object such that

a.hostname == "example.com"

and

a.pathname == "/aa/bb"

The modern way:

new URL("http://example.com/aa/bb/")

Returns an object with properties hostname and pathname , along witha few others .

The first argument is a relative or absolute URL; if it's relative, then you need to specify the second argument (the base URL). For example, for a URL relative to the current page:

new URL("/aa/bb/", location)

In addition to browsers, this API is also available in Node.js since v7, through require('url').URL .

var getLocation = function(href) {
    var l = document.createElement("a");
    l.href = href;
    return l;
};
var l = getLocation("http://example.com/path");
console.debug(l.hostname)
>> "example.com"
console.debug(l.pathname)
>> "/path"

found here: https://gist.github.com/jlong/2428561

var parser = document.createElement('a');
parser.href = "http://example.com:3000/pathname/?search=test#hash";

parser.protocol; // => "http:"
parser.host;     // => "example.com:3000"
parser.hostname; // => "example.com"
parser.port;     // => "3000"
parser.pathname; // => "/pathname/"
parser.hash;     // => "#hash"
parser.search;   // => "?search=test"
parser.origin;   // => "http://example.com:3000"

Here's a simple function using a regexp that imitates the a tag behavior.

Pros

  • predictable behaviour (no cross browser issues)
  • doesn't need the DOM
  • it's really short.

Cons

  • The regexp is a bit difficult to read

-

function getLocation(href) {
    var match = href.match(/^(https?\:)\/\/(([^:\/?#]*)(?:\:([0-9]+))?)([\/]{0,1}[^?#]*)(\?[^#]*|)(#.*|)$/);
    return match && {
        href: href,
        protocol: match[1],
        host: match[2],
        hostname: match[3],
        port: match[4],
        pathname: match[5],
        search: match[6],
        hash: match[7]
    }
}

-

getLocation("http://example.com/");
/*
{
    "protocol": "http:",
    "host": "example.com",
    "hostname": "example.com",
    "port": undefined,
    "pathname": "/"
    "search": "",
    "hash": "",
}
*/

getLocation("http://example.com:3000/pathname/?search=test#hash");
/*
{
    "protocol": "http:",
    "host": "example.com:3000",
    "hostname": "example.com",
    "port": "3000",
    "pathname": "/pathname/",
    "search": "?search=test",
    "hash": "#hash"
}
*/

EDIT:

Here's a breakdown of the regular expression

var reURLInformation = new RegExp([
    '^(https?:)//', // protocol
    '(([^:/?#]*)(?::([0-9]+))?)', // host (hostname and port)
    '(/{0,1}[^?#]*)', // pathname
    '(\\?[^#]*|)', // search
    '(#.*|)$' // hash
].join(''));
var match = href.match(reURLInformation);
var loc = window.location;  // => "http://example.com:3000/pathname/?search=test#hash"

returns the currentUrl.

If you want to pass your own string as a url ( doesn't work in IE11 ):

var loc = new URL("http://example.com:3000/pathname/?search=test#hash")

Then you can parse it like:

loc.protocol; // => "http:"
loc.host;     // => "example.com:3000"
loc.hostname; // => "example.com"
loc.port;     // => "3000"
loc.pathname; // => "/pathname/"
loc.hash;     // => "#hash"
loc.search;   // => "?search=test"

freddiefujiwara's answer is pretty good but I also needed to support relative URLs within Internet Explorer. I came up with the following solution:

function getLocation(href) {
    var location = document.createElement("a");
    location.href = href;
    // IE doesn't populate all link properties when setting .href with a relative URL,
    // however .href will return an absolute URL which then can be used on itself
    // to populate these additional fields.
    if (location.host == "") {
      location.href = location.href;
    }
    return location;
};

Now use it to get the needed properties:

var a = getLocation('http://example.com/aa/bb/');
document.write(a.hostname);
document.write(a.pathname);

Example:

 function getLocation(href) { var location = document.createElement("a"); location.href = href; // IE doesn't populate all link properties when setting .href with a relative URL, // however .href will return an absolute URL which then can be used on itself // to populate these additional fields. if (location.host == "") { location.href = location.href; } return location; }; var urlToParse = 'http://example.com/aa/bb/', a = getLocation(urlToParse); document.write('Absolute URL: ' + urlToParse); document.write('<br />'); document.write('Hostname: ' + a.hostname); document.write('<br />'); document.write('Pathname: ' + a.pathname);

js-uri (available on Google Code) takes a string URL and resolves a URI object from it:

var some_uri = new URI("http://www.example.com/foo/bar");

alert(some_uri.authority); // www.example.com
alert(some_uri);           // http://www.example.com/foo/bar

var blah      = new URI("blah");
var blah_full = blah.resolve(some_uri);
alert(blah_full);         // http://www.example.com/foo/blah

What about simple regular expression?

url = "http://www.example.com/path/to/somwhere";
urlParts = /^(?:\w+\:\/\/)?([^\/]+)(.*)$/.exec(url);
hostname = urlParts[1]; // www.example.com
path = urlParts[2]; // /path/to/somwhere

today I meet this problem and I found: URL - MDN Web APIs

var url = new URL("http://test.example.com/dir/subdir/file.html#hash");

This return:

{ hash:"#hash", host:"test.example.com", hostname:"test.example.com", href:"http://test.example.com/dir/subdir/file.html#hash", origin:"http://test.example.com", password:"", pathname:"/dir/subdir/file.html", port:"", protocol:"http:", search: "", username: "" }

Hoping my first contribution helps you !

Here is a version that I copied from https://gist.github.com/1847816 , but rewritten so it's easier to read and debug. The purpose of copying the of the anchor data to another variable named "result" is because the anchor data is pretty long, and so copying a limited number of values to the result will help simplify the result.

/**
 * See: https://gist.github.com/1847816
 * Parse a URI, returning an object similar to Location
 * Usage: var uri = parseUri("hello?search#hash")
 */
function parseUri(url) {

  var result = {};

  var anchor = document.createElement('a');
  anchor.href = url;

  var keys = 'protocol hostname host pathname port search hash href'.split(' ');
  for (var keyIndex in keys) {
    var currentKey = keys[keyIndex]; 
    result[currentKey] = anchor[currentKey];
  }

  result.toString = function() { return anchor.href; };
  result.requestUri = result.pathname + result.search;  
  return result;

}
function parseUrl(url) {
    var m = url.match(/^((?:([^:\/?#]+:)(?:\/\/))?((?:([^\/?#:]*):([^\/?#:]*)@)?([^\/?#:]*)(?::([^\/?#:]*))?))?([^?#]*)(\?[^#]*)?(#.*)?$/),
        r = {
            hash: m[10] || "",                   // #asd
            host: m[3] || "",                    // localhost:257
            hostname: m[6] || "",                // localhost
            href: m[0] || "",                    // http://username:password@localhost:257/deploy/?asd=asd#asd
            origin: m[1] || "",                  // http://username:password@localhost:257
            pathname: m[8] || (m[1] ? "/" : ""), // /deploy/
            port: m[7] || "",                    // 257
            protocol: m[2] || "",                // http:
            search: m[9] || "",                  // ?asd=asd
            username: m[4] || "",                // username
            password: m[5] || ""                 // password
        };
    if (r.protocol.length == 2) {
        r.protocol = "file:///" + r.protocol.toUpperCase();
        r.origin = r.protocol + "//" + r.host;
    }
    r.href = r.origin + r.pathname + r.search + r.hash;
    return r;
};
parseUrl("http://username:password@localhost:257/deploy/?asd=asd#asd");

It works with both absolute and relative urls

Cross-browser URL parsing , works around the relative path problem for IE 6, 7, 8 and 9:

function ParsedUrl(url) {
    var parser = document.createElement("a");
    parser.href = url;

    // IE 8 and 9 dont load the attributes "protocol" and "host" in case the source URL
    // is just a pathname, that is, "/example" and not "http://domain.com/example".
    parser.href = parser.href;

    // IE 7 and 6 wont load "protocol" and "host" even with the above workaround,
    // so we take the protocol/host from window.location and place them manually
    if (parser.host === "") {
        var newProtocolAndHost = window.location.protocol + "//" + window.location.host;
        if (url.charAt(1) === "/") {
            parser.href = newProtocolAndHost + url;
        } else {
            // the regex gets everything up to the last "/"
            // /path/takesEverythingUpToAndIncludingTheLastForwardSlash/thisIsIgnored
            // "/" is inserted before because IE takes it of from pathname
            var currentFolder = ("/"+parser.pathname).match(/.*\//)[0];
            parser.href = newProtocolAndHost + currentFolder + url;
        }
    }

    // copies all the properties to this object
    var properties = ['host', 'hostname', 'hash', 'href', 'port', 'protocol', 'search'];
    for (var i = 0, n = properties.length; i < n; i++) {
      this[properties[i]] = parser[properties[i]];
    }

    // pathname is special because IE takes the "/" of the starting of pathname
    this.pathname = (parser.pathname.charAt(0) !== "/" ? "/" : "") + parser.pathname;
}

Usage ( demo JSFiddle here ):

var myUrl = new ParsedUrl("http://www.example.com:8080/path?query=123#fragment");

Result:

{
    hash: "#fragment"
    host: "www.example.com:8080"
    hostname: "www.example.com"
    href: "http://www.example.com:8080/path?query=123#fragment"
    pathname: "/path"
    port: "8080"
    protocol: "http:"
    search: "?query=123"
}

For those looking for a modern solution that works in IE, Firefox, AND Chrome:

None of these solutions that use a hyperlink element will work the same in chrome. If you pass an invalid (or blank) url to chrome, it will always return the host where the script is called from. So in IE you will get blank, whereas in Chrome you will get localhost (or whatever).

If you are trying to look at the referrer, this is deceitful. You will want to make sure that the host you get back was in the original url to deal with this:

    function getHostNameFromUrl(url) {
        // <summary>Parses the domain/host from a given url.</summary>
        var a = document.createElement("a");
        a.href = url;

        // Handle chrome which will default to domain where script is called from if invalid
        return url.indexOf(a.hostname) != -1 ? a.hostname : '';
    }

The AngularJS way - fiddle here: http://jsfiddle.net/PT5BG/4/

<!DOCTYPE html>
<html>
<head>
    <title>Parse URL using AngularJS</title>
</head>
<body ng-app ng-controller="AppCtrl" ng-init="init()">

<h3>Parse URL using AngularJS</h3>

url: <input type="text" ng-model="url" value="" style="width:780px;">

<ul>
    <li>href = {{parser.href}}</li>
    <li>protocol = {{parser.protocol}}</li>
    <li>host = {{parser.host}}</li>
    <li>hostname = {{parser.hostname}}</li>
    <li>port = {{parser.port}}</li>
    <li>pathname = {{parser.pathname}}</li>
    <li>hash = {{parser.hash}}</li>
    <li>search = {{parser.search}}</li>
</ul>

<script src="https://ajax.googleapis.com/ajax/libs/angularjs/1.0.6/angular.min.js"></script>

<script>
function AppCtrl($scope) {

    $scope.$watch('url', function() {
        $scope.parser.href = $scope.url;
    });

    $scope.init = function() {
        $scope.parser = document.createElement('a');
        $scope.url = window.location;
    }

}
</script>

</body>
</html>

Expanded on acdcjunior solution by adding " searchParam " function
Mimicking the URL object , added " searchParam " to parse query string
Works for IE 6, 7, 8 9, 10, 11

USAGE - ( JSFiddle Link )

// USAGE:
var myUrl = new ParsedUrl("http://www.example.com/path?var1=123&var2=abc#fragment");
console.log(myUrl);
console.log(myUrl.searchParam('var1'));
console.log(myUrl.searchParam('var2'));

OUTPUT - ( JSFiddle Link )

{
  hash: "#fragment",
  host: "www.example.com:8080",
  hostname: "www.example.com",
  href: "http://www.example.com:8080/path?var1=123&amp;var2=abc#fragment",
  pathname: "/path",
  port: "80",
  protocol: "http:",
  search: "?var1=123&amp;var2=abc"
}

"123"
"abc"

CODE - ( JSFiddle Link )

function ParsedUrl(url) {
    var parser = document.createElement("a");
    parser.href = url;
    
    // IE 8 and 9 dont load the attributes "protocol" and "host" in case the source URL
    // is just a pathname, that is, "/example" and not "http://www.example.com/example".
    parser.href = parser.href;
    
    // IE 7 and 6 wont load "protocol" and "host" even with the above workaround,
    // so we take the protocol/host from window.location and place them manually
    if (parser.host === "") {
        var newProtocolAndHost = window.location.protocol + "//" + window.location.host;
        if (url.charAt(1) === "/") {
            parser.href = newProtocolAndHost + url;
        } else {
            // the regex gets everything up to the last "/"
            // /path/takesEverythingUpToAndIncludingTheLastForwardSlash/thisIsIgnored
            // "/" is inserted before because IE takes it of from pathname
            var currentFolder = ("/"+parser.pathname).match(/.*\//)[0];
            parser.href = newProtocolAndHost + currentFolder + url;
        }
    }
    
    // copies all the properties to this object
    var properties = ['host', 'hostname', 'hash', 'href', 'port', 'protocol', 'search'];
    for (var i = 0, n = properties.length; i < n; i++) {
      this[properties[i]] = parser[properties[i]];
    }
    
    // pathname is special because IE takes the "/" of the starting of pathname
    this.pathname = (parser.pathname.charAt(0) !== "/" ? "/" : "") + parser.pathname;
  
  //search Params
  this.searchParam =  function(variable) {
    var query = (this.search.indexOf('?') === 0) ? this.search.substr(1) : this.search;
    var vars = query.split('&');
    for (var i = 0; i < vars.length; i++) {
        var pair = vars[i].split('=');
        if (decodeURIComponent(pair[0]) == variable) {
            return decodeURIComponent(pair[1]);
        }
    }
    console.log('Query variable %s not found', variable);
    return '';
    };
}

You can also use parse_url() function from Locutus project (former php.js).

Code:

parse_url('http://username:password@hostname/path?arg=value#anchor');

Result:

{
  scheme: 'http',
  host: 'hostname',
  user: 'username',
  pass: 'password',
  path: '/path',
  query: 'arg=value',
  fragment: 'anchor'
}

Simple and robust solution using the module pattern. This includes a fix for IE where the pathname does not always have a leading forward-slash ( / ).

I have created a Gist along with a JSFiddle which offers a more dynamic parser. I recommend you check it out and provide feedback.

var URLParser = (function (document) {
    var PROPS = 'protocol hostname host pathname port search hash href'.split(' ');
    var self = function (url) {
        this.aEl = document.createElement('a');
        this.parse(url);
    };
    self.prototype.parse = function (url) {
        this.aEl.href = url;
        if (this.aEl.host == "") {
           this.aEl.href = this.aEl.href;
        }
        PROPS.forEach(function (prop) {
            switch (prop) {
                case 'hash':
                    this[prop] = this.aEl[prop].substr(1);
                    break;
                default:
                    this[prop] = this.aEl[prop];
            }
        }, this);
        if (this.pathname.indexOf('/') !== 0) {
            this.pathname = '/' + this.pathname;
        }
        this.requestUri = this.pathname + this.search;
    };
    self.prototype.toObj = function () {
        var obj = {};
        PROPS.forEach(function (prop) {
            obj[prop] = this[prop];
        }, this);
        obj.requestUri = this.requestUri;
        return obj;
    };
    self.prototype.toString = function () {
        return this.href;
    };
    return self;
})(document);

Demo

 var URLParser = (function(document) { var PROPS = 'protocol hostname host pathname port search hash href'.split(' '); var self = function(url) { this.aEl = document.createElement('a'); this.parse(url); }; self.prototype.parse = function(url) { this.aEl.href = url; if (this.aEl.host == "") { this.aEl.href = this.aEl.href; } PROPS.forEach(function(prop) { switch (prop) { case 'hash': this[prop] = this.aEl[prop].substr(1); break; default: this[prop] = this.aEl[prop]; } }, this); if (this.pathname.indexOf('/') !== 0) { this.pathname = '/' + this.pathname; } this.requestUri = this.pathname + this.search; }; self.prototype.toObj = function() { var obj = {}; PROPS.forEach(function(prop) { obj[prop] = this[prop]; }, this); obj.requestUri = this.requestUri; return obj; }; self.prototype.toString = function() { return this.href; }; return self; })(document); /* Main */ var out = document.getElementById('out'); var urls = [ 'https://www.example.org:5887/foo/bar?a=1&b=2#section-1', 'ftp://www.files.com:22/folder?id=7' ]; var parser = new URLParser(); urls.forEach(function(url) { parser.parse(url); println(out, JSON.stringify(parser.toObj(), undefined, ' '), 0, '#0000A7'); }); /* Utility functions */ function print(el, text, bgColor, fgColor) { var span = document.createElement('span'); span.innerHTML = text; span.style['backgroundColor'] = bgColor || '#FFFFFF'; span.style['color'] = fgColor || '#000000'; el.appendChild(span); } function println(el, text, bgColor, fgColor) { print(el, text, bgColor, fgColor); el.appendChild(document.createElement('br')); }
 body { background: #444; } span { background-color: #fff; border: thin solid black; display: inline-block; } #out { display: block; font-family: Consolas, Menlo, Monaco, Lucida Console, Liberation Mono, DejaVu Sans Mono, Bitstream Vera Sans Mono, Courier New, monospace, serif; font-size: 12px; white-space: pre; }
 <div id="out"></div>

Output

{
 "protocol": "https:",
 "hostname": "www.example.org",
 "host": "www.example.org:5887",
 "pathname": "/foo/bar",
 "port": "5887",
 "search": "?a=1&b=2",
 "hash": "section-1",
 "href": "https://www.example.org:5887/foo/bar?a=1&b=2#section-1",
 "requestUri": "/foo/bar?a=1&b=2"
}
{
 "protocol": "ftp:",
 "hostname": "www.files.com",
 "host": "www.files.com:22",
 "pathname": "/folder",
 "port": "22",
 "search": "?id=7",
 "hash": "",
 "href": "ftp://www.files.com:22/folder?id=7",
 "requestUri": "/folder?id=7"
}

为此使用https://www.npmjs.com/package/uri-parse-lib

var t = parserURI("http://user:pass@example.com:8080/directory/file.ext?query=1&next=4&sed=5#anchor");

Why do not use it?

        $scope.get_location=function(url_str){
        var parser = document.createElement('a');
        parser.href =url_str;//"http://example.com:3000/pathname/?search=test#hash";
        var info={
            protocol:parser.protocol,   
            hostname:parser.hostname, // => "example.com"
            port:parser.port,     // => "3000"
            pathname:parser.pathname, // => "/pathname/"
            search:parser.search,   // => "?search=test"
            hash:parser.hash,     // => "#hash"
            host:parser.host, // => "example.com:3000"      
        }
        return info;
    }
    alert( JSON.stringify( $scope.get_location("http://localhost:257/index.php/deploy/?asd=asd#asd"),null,4 ) );

Stop reinventing the wheel. Use https://github.com/medialize/URI.js/

var uri = new URI("http://example.org:80/foo/hello.html");
// get host
uri.host(); // returns string "example.org:80"
// set host
uri.host("example.org:80");

Just use url.js library (for web and node.js).

https://github.com/websanova/js-url

url: http://example.com?param=test#param=again

url('?param'); // test
url('#param'); // again
url('protocol'); // http
url('port'); // 80
url('domain'); // example.com
url('tld'); // com

etc...

a simple hack with the first answer

var getLocation = function(href=window.location.href) {
    var l = document.createElement("a");
    l.href = href;
    return l;
};

this can used even without argument to figure out the current hostname getLocation().hostname will give current hostname

This doesn't parse the query and hash, but otherwise it works well otherwise.

 const getURIParts = (url) => { const matches = url.match(/^(\w+?:\/\/)?([\w-\.]+(?=\/?))?:?(\d*)?([^:]*)/) return { scheme: matches ? matches[1] : undefined, host: matches ? matches[2] : '', port: matches ? matches[3] : undefined, pathname: matches ? matches[4] : '' } } console.log(getURIParts("")) console.log(getURIParts("http://localhost/bla")) console.log(getURIParts("https://api.spotify.com/")) console.log(getURIParts("https://api.spotify.com")) console.log(getURIParts("wss://wss.slack.com/link/?ticket=1234-5678")) console.log(getURIParts("localhost")) console.log(getURIParts("localhost/bla")) console.log(getURIParts("localhost/")) console.log(getURIParts("api.spotify.com/bla/two")) console.log(getURIParts("api.spotify.com:8000/bla/two")) console.log(getURIParts("https://api.spotify.com:8800/")) console.log(getURIParts("/mp3-preview/f504e6b8e037771318656394f532dede4f9bcaea"))

Try This :

 function getUrlPath(str){ //fakepath when url not have a path var fakepath = "/FakPath"; var url = str+fakepath; var reg = /.+?\:\/\/.+?(\/.+?)(?:#|\?|$)/; var output = reg.exec(url); // check "output" != null return (output) ? output[1].replace(fakepath,"") : fakepath; } var myurl = "https://stackoverflow.com/questions/736513/"; const path = getUrlPath(myurl); console.log( path ); //output : /questions/736513/

Of course in >2016 the right answer is using URL API
For page URL window.location
And for <a href="..."> HTMLAnchorElement API

Also for supporting older browser, using polyfill:

<script crossorigin="anonymous" src="https://polyfill.io/v3/polyfill.min.js?features=URL"></script>

But just as an other option, it can be handle easy and accurate by RegEx pattern :

 function URIinfo(s) { s=s.match(/^(([^/]*?:)\/*((?:([^:]+):([^@]+)@)?([^/:]{2,}|\[[\w:]+])(:\d*)?(?=\/|$))?)?((.*?\/)?(([^/]*?)(\.[^/.]+?)?))(\?.*?)?(#.*)?$/); return {origin:s[1],protocol:s[2],host:s[3],username:s[4],password:s[5],hostname:s[6],port:s[7],path:s[8],folders:s[9],file:s[10],filename:s[11],fileext:s[12],search:s[13],hash:s[14]}; } var sample='http://user:password@my.site.com:8080/onefolder/folder.name/file.min.js?query=http://my.site.com:8080/file.exe#hash-root:/file/1.txt'; console.log (URIinfo(sample)); /* { "origin": "http://user:password@my.site.com:8080", "protocol": "http:", "host": "user:password@my.site.com:8080", "username": "user", "password": "password", "hostname": "my.site.com", "port": ":8080", "path": "/onefolder/folder.name/file.min.js", "folders": "/onefolder/folder.name/", "file": "file.min.js", "filename": "file.min", "fileext": ".js", "search": "?query=http://my.site.com:8080/file.exe", "hash": "#hash-root:/file/1.txt" } */

Work with the RegEx

It is fine with any kind of

  • Absolute / Relative paths
  • IPv4 / IPv6
  • Net protocols / Local Files
  • Query / Hash

And will return all URL options but not searchParams

(+) Also will return file info like PHP pathInfo

How about?

'https://stackoverflow.com/questions/736513/how-do-i-parse-a-url-into-hostname-and-path-in-javascript'.split('//').pop().split('/')[0]

Resulting:

'stackoverflow.com'

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM