Parse entire HTML document from string into jQuery

The reason why it does not work is because jQuery expects a DOM node to find the 'title' tags. As you noted, you need to parse the html text first.

From here and here, the solution is to parse the string and append it into a temporal div (or other element):

var tempDom = $('<div></div>').append($.parseHTML(str));

Then, you can manipulate tempDom to find elements.

Working demo: http://codepen.io/anon/pen/wKwLMP


TL;DR ... use the DOMParser API

var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";
var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");

Unfortunately, there current answers don't hit a lot of edge cases

You should not use $.parseHTML(htmlString) as it's immediately lossy. If we check the source code on $.parseHtml, it'll call buildFragment which creates a temporary DOM element and sets the innerHTML property.

innerHtml Parsing

Element.innerHTML provides an API for:

  • Parsing (string -> DOM) in the write operation
  • Serializing (DOM -> string) in the read operation

And here's the spec for Html Fragment Parsing Algorithm

Taking a sample string, here's the result of trying various HTML Parsing approaches:

var htmlString = "<html><head><title>Name</title></head><body><div class='content'>Hello</div></body></html>";

function ParseHtmlTests() {

  /*** $.parseHTML ***/
  var $parseHtml = $.parseHTML(htmlString)

  console.LogOutput(
    '1. $.parseHTML',
    $parseHtml,
    $parseHtml.map(function(el, i) { return el.outerHTML }),
    $($parseHtml).find("title").text(),
    $($parseHtml).find(".content").text()
  )


  /*** tempDiv.innerHTML ***/
  var tempDiv = document.createElement("div")
  tempDiv.innerHTML = htmlString

  console.LogOutput(
    '2. tempDiv.innerHTML',
    tempDiv,
    tempDiv.outerHTML,
    $(tempDiv).find("title").text(),
    $(tempDiv).find(".content").text()
  )


  /*** divAppendContents ***/
  var $divAppendContents = $('<div></div>').append(htmlString)

  console.LogOutput(
    '3. divAppendContents',
    $divAppendContents,
    $divAppendContents.html(),
    $divAppendContents.find("title").text(),
    $divAppendContents.find(".content").text()
  )


  /*** tempHtml.innerHTML ***/
  var tmpHtml = document.createElement( 'html' );
  tmpHtml.innerHTML = htmlString;

  console.LogOutput(
   '4. tempHtml.innerHTML',
    tmpHtml,
    tmpHtml.outerHTML,
    tmpHtml.getElementsByTagName('title')[0].innerText,
    tmpHtml.getElementsByClassName('content')[0].innerText
  )


  /*** DOMParser.parseFromString ***/
  var htmlDoc = (new DOMParser()).parseFromString(htmlString, "text/xml");

  console.LogOutput(
    '5. DOMParser.parseFromString',
    htmlDoc,
    htmlDoc.documentElement.outerHTML,
    htmlDoc.documentElement.getElementsByTagName('title')[0].innerHTML,
    htmlDoc.documentElement.getElementsByClassName('content')[0].innerHTML
  )
}

/*** Create Console Log Methods ***/
console.group = console.group || function(msg) {
  console.log(msg)
}
console.groupEnd = console.groupEnd || function(msg) {
  console.log("----------------------------")
}
console.LogOutput = function(method, dom, html, title, content) {
  console.group(method);
  console.log("DOM:", dom)
  console.log("HTML:", html)
  console.log("Title:", title)
  console.log("Content:", content)
  console.groupEnd();
};

/*** Execute Script ***/
ParseHtmlTests()
<script src="https://cdnjs.cloudflare.com/ajax/libs/jquery/3.3.1/jquery.js"></script>

And here's the output from the above script in chrome:

Output

The best approach seems to be creating a HTML Root object by setting the innerHTML of a temporary HTML document or by using the DOMParser API

Further Reading:

  • Parse an HTML string with JS
  • Parsing of html string using jquery
  • jQuery not finding elements in jQuery.parseHTML()

Tags:

Jquery