简体   繁体   中英

How to calculate the XPath position of an element using Javascript?

Let's say I have a large HTML file with different kinds of tags, similar to the StackOverflow one you're looking at right now.

Now let's say you click an element on the page, what would the Javascript function look like that calculates the most basic XPath that refers to that specific element?

I know there are an infinite ways of refering to that element in XPath, but I'm looking for something that just looks at the DOM tree, with no regard for IDs, classes, etc.

Example:

<html>
<head><title>Fruit</title></head>
<body>
<ol>
  <li>Bananas</li>
  <li>Apples</li>
  <li>Strawberries</li>
</ol>
</body>
</html>

Let's say you click on Apples . The Javascript function would return the following:

/html/body/ol/li[2]

It would basically just work its way upward the DOM tree all the way to the HTML element.

Just to clarify, the 'on-click' event-handler isn't the problem. I can make that work. I'm just not sure how to calculate the element's position within the DOM tree and represent it as an XPath.

PS Any answer with or without the use of the JQuery library is appreciated.

PPS I completely new to XPath, so I might even have made a mistake in the above example, but you'll get the idea.

Edit at August 11, 2010: Looks like somebody else asked a similar question: generate/get the Xpath for a selected textnode

Firebug can do this, and it's open source ( BSD ) so you can reuse their implementation , which does not require any libraries.

3rd party edit

This is an extract from the linked source above. Just in case the link above will change. Please check the source to benefit from changes and updates or the full featureset provided.

Xpath.getElementXPath = function(element)
{
    if (element && element.id)
        return '//*[@id="' + element.id + '"]';
    else
        return Xpath.getElementTreeXPath(element);
};

Above code calls this function. Attention i added some line-wrapping to avoid horizontal scroll bar

Xpath.getElementTreeXPath = function(element)
{
    var paths = [];  // Use nodeName (instead of localName) 
    // so namespace prefix is included (if any).
    for (; element && element.nodeType == Node.ELEMENT_NODE; 
           element = element.parentNode)
    {
        var index = 0;
        var hasFollowingSiblings = false;
        for (var sibling = element.previousSibling; sibling; 
              sibling = sibling.previousSibling)
        {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

            if (sibling.nodeName == element.nodeName)
                ++index;
        }

        for (var sibling = element.nextSibling; 
            sibling && !hasFollowingSiblings;
            sibling = sibling.nextSibling)
        {
            if (sibling.nodeName == element.nodeName)
                hasFollowingSiblings = true;
        }

        var tagName = (element.prefix ? element.prefix + ":" : "") 
                          + element.localName;
        var pathIndex = (index || hasFollowingSiblings ? "[" 
                   + (index + 1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }

    return paths.length ? "/" + paths.join("/") : null;
};

A function I use to get an XPath similar to your situation, it uses jQuery:

function getXPath( element )
{
    var xpath = '';
    for ( ; element && element.nodeType == 1; element = element.parentNode )
    {
        var id = $(element.parentNode).children(element.tagName).index(element) + 1;
        id > 1 ? (id = '[' + id + ']') : (id = '');
        xpath = '/' + element.tagName.toLowerCase() + id + xpath;
    }
    return xpath;
}

Small, powerfull and pure-js function

It returns xpath for the element and elements iterator for xpath.

https://gist.github.com/iimos/e9e96f036a3c174d0bf4

function xpath(el) {
  if (typeof el == "string") return document.evaluate(el, document, null, 0, null)
  if (!el || el.nodeType != 1) return ''
  if (el.id) return "//*[@id='" + el.id + "']"
  var sames = [].filter.call(el.parentNode.children, function (x) { return x.tagName == el.tagName })
  return xpath(el.parentNode) + '/' + el.tagName.toLowerCase() + (sames.length > 1 ? '['+([].indexOf.call(sames, el)+1)+']' : '')
}

Probably you will need to add a shim for IE8 that don't support the [].filter method: this MDN page gives such code.

Usage

Getting xpath for node:
 var xp = xpath(elementNode)
Executing xpath:
 var iterator = xpath("//h2") var el = iterator.iterateNext(); while (el) { // work with element el = iterator.iterateNext(); }

The firebug implementation can be modified slightly to check for element.id further up the dom tree:

  /**
   * Gets an XPath for an element which describes its hierarchical location.
   */
  var getElementXPath = function(element) {
      if (element && element.id)
          return '//*[@id="' + element.id + '"]';
      else
          return getElementTreeXPath(element);
  };

  var getElementTreeXPath = function(element) {
      var paths = [];

      // Use nodeName (instead of localName) so namespace prefix is included (if any).
      for (; element && element.nodeType == 1; element = element.parentNode)  {
          var index = 0;
          // EXTRA TEST FOR ELEMENT.ID
          if (element && element.id) {
              paths.splice(0, 0, '/*[@id="' + element.id + '"]');
              break;
          }

          for (var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling) {
              // Ignore document type declaration.
              if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

              if (sibling.nodeName == element.nodeName)
                  ++index;
          }

          var tagName = element.nodeName.toLowerCase();
          var pathIndex = (index ? "[" + (index+1) + "]" : "");
          paths.splice(0, 0, tagName + pathIndex);
      }

      return paths.length ? "/" + paths.join("/") : null;
  };

I have just modified DanS' solution in order to use it with textNodes. Very useful to serialize HTML range object.

/**
 * Gets an XPath for an node which describes its hierarchical location.
 */
var getNodeXPath = function(node) {
    if (node && node.id)
        return '//*[@id="' + node.id + '"]';
    else
        return getNodeTreeXPath(node);
};

var getNodeTreeXPath = function(node) {
    var paths = [];

    // Use nodeName (instead of localName) so namespace prefix is included (if any).
    for (; node && (node.nodeType == 1 || node.nodeType == 3) ; node = node.parentNode)  {
        var index = 0;
        // EXTRA TEST FOR ELEMENT.ID
        if (node && node.id) {
            paths.splice(0, 0, '/*[@id="' + node.id + '"]');
            break;
        }

        for (var sibling = node.previousSibling; sibling; sibling = sibling.previousSibling) {
            // Ignore document type declaration.
            if (sibling.nodeType == Node.DOCUMENT_TYPE_NODE)
                continue;

            if (sibling.nodeName == node.nodeName)
                ++index;
        }

        var tagName = (node.nodeType == 1 ? node.nodeName.toLowerCase() : "text()");
        var pathIndex = (index ? "[" + (index+1) + "]" : "");
        paths.splice(0, 0, tagName + pathIndex);
    }

    return paths.length ? "/" + paths.join("/") : null;
};

There is nothing built in to get the xpath of an HTML element, but the reverse is common for example usingthe jQuery xpath selector .

If you need to determine the xpath of an HTML element you will have to provide a custom function to do this. Here are a couple of example javascript/jQuery impls to calculate the xpath.

The solution below is preferable if you need to reliably determine the absolute XPath of an element.

Some other answers either rely partly on the element id (which is not reliable since there can potentially be multiple elements with identical ids) or they generate XPaths that actually specify more elements than the one given (by erroneously omitting the sibling index in certain circumstances).

The code has been adapted from Firebug's source code by fixing the above-mentioned problems.

getXElementTreeXPath = function( element ) {
    var paths = [];

    // Use nodeName (instead of localName) so namespace prefix is included (if any).
    for ( ; element && element.nodeType == Node.ELEMENT_NODE; element = element.parentNode )  {
        var index = 0;

        for ( var sibling = element.previousSibling; sibling; sibling = sibling.previousSibling ) {
            // Ignore document type declaration.
            if ( sibling.nodeType == Node.DOCUMENT_TYPE_NODE ) {
                continue;
            }

            if ( sibling.nodeName == element.nodeName ) {
                ++index;
            }
        }

        var tagName = element.nodeName.toLowerCase();

        // *always* include the sibling index
        var pathIndex = "[" + (index+1) + "]";

        paths.unshift( tagName + pathIndex );
    }

    return paths.length ? "/" + paths.join( "/") : null;
};

Just for fun, an XPath 2.0 one line implementation:

string-join(ancestor-or-self::*/concat(name(),
                                       '[',
                                       for $x in name() 
                                          return count(preceding-sibling::*
                                                          [name() = $x]) 
                                                 + 1,
                                       ']'),
            '/')
function getPath(event) {
  event = event || window.event;

  var pathElements = [];
  var elem = event.currentTarget;
  var index = 0;
  var siblings = event.currentTarget.parentNode.getElementsByTagName(event.currentTarget.tagName);
  for (var i=0, imax=siblings.length; i<imax; i++) {
      if (event.currentTarget === siblings[i] {
        index = i+1; // add 1 for xpath 1-based
      }
  }


  while (elem.tagName.toLowerCase() != "html") {
    pathElements.unshift(elem.tagName);
    elem = elem.parentNode;
  }
  return pathElements.join("/") + "[" + index + "]";
}

EDITED TO ADD SIBLING INDEX INFORMATION

Use https://github.com/KajeNick/jquery-get-xpath

<script src="https://code.jquery.com/jquery-3.4.1.min.js"></script>
<script src="../src/jquery-get-xpath.js"></script> 

<script>
    jQuery(document).ready(function ($) {

        $('body').on('click', 'ol li', function () {
           let xPath = $(this).jGetXpath();

           console.log(xPath);
        });

    });
</script>

Console will show: /html/body/ol/li[2]

I have came across this problem and found it hard to solve fully. as in my case it was giving half xpath. so i modified it a little to give full path. here is my answer.

window.onclick = (e) => {
    let pathArr = e.path;
    let element = pathArr[0];
    var xpath = '';
if(pathArr.length<=2 && pathArr[0].nodeType!=1){
    for (let i = 0; i < pathArr.length - 1 && pathArr[i].nodeType == 1; i++) {
        element = pathArr[i];
        var id = $(element.parentNode).children(element.tagName).index(element) + 1;
        id > 1 ? (id = '[' + id + ']') : (id = '');
        xpath = '/' + element.tagName.toLowerCase() + id + xpath;
    }
}
else{
  xpath="/html/document"
}
    return xpath;

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM