I'm developing a Google Chrome extension and I'd like to know how to open a new tab (ok, this is simple: chrome.tabs.create({'url': chrome.extension.getURL(mypage)}, function(tab) { /* ... */ });
) and retrieve the source code of that page.
I know that I can use AJAX to get the source, but the problem is that the web page contains some Javascript code that edits the page, and I need the edited page.
Is it possible?
To serialize the full, live HTML document, use the following code:
// @author Rob W <http://stackoverflow.com/users/938089/rob-w>
// Demo: var serialized_html = DOMtoString(document);
function DOMtoString(document_root) {
var html = '',
node = document_root.firstChild;
while (node) {
switch (node.nodeType) {
case Node.ELEMENT_NODE:
html += node.outerHTML;
break;
case Node.TEXT_NODE:
html += node.nodeValue;
break;
case Node.CDATA_SECTION_NODE:
html += '<![CDATA[' + node.nodeValue + ']]>';
break;
case Node.COMMENT_NODE:
html += '<!--' + node.nodeValue + '-->';
break;
case Node.DOCUMENT_TYPE_NODE:
// (X)HTML documents are identified by public identifiers
html += "<!DOCTYPE "
+ node.name
+ (node.publicId ? ' PUBLIC "' + node.publicId + '"' : '')
+ (!node.publicId && node.systemId ? ' SYSTEM' : '')
+ (node.systemId ? ' "' + node.systemId + '"' : '')
+ '>\n';
break;
}
node = node.nextSibling;
}
return html;
}
Now, in a Chrome extension, you have to add some events to the extension page such as a background page or popup page:
/**
* Get the HTML source for the main frame of a given tab.
*
* @param {integer} tabId - ID of tab.
* @param {function} callback - Called with the tab's source upon completion.
*/
function getSourceFromTab(tabId, callback) {
// Capture the page when it has fully loaded.
// When we know the tab, execute the content script
chrome.tabs.onUpdated.addListener(onUpdated);
chrome.tabs.onRemoved.addListener(onRemoved);
function onUpdated(updatedTabId, details) {
if (details.status == 'complete') {
removeListeners();
chrome.tabs.executeScript(tabId, {
file: 'get_source.js'
}, function(results) {
// TODO: Detect injection error using chrome.runtime.lastError
var source = results[0];
done(source);
});
}
}
function removeListeners() {
chrome.tabs.onUpdated.removeListener(onUpdated);
chrome.tabs.onRemoved.removeListener(onRemoved);
}
function onRemoved() {
removeListeners();
callback(''); // Tab closed, no response.
}
}
The above function returns the source code of the main frame in a tab. If you want to get the source of a child frame, call chrome.tabs.executeScript
with a frameId
parameter.
The next snippet shows an example of how your extension could use the function. Paste the snippet in the background page's console , or declare a browserAction , put the snippet in the onClicked
listener and click on the extension button.
var mypage = 'https://example.com';
var callback = function(html_string) {
console.log('HTML string, from extension: ', html_string);
};
chrome.tabs.create({
url: mypage
}, function(tab) {
getSourceFromTab(tab.id, callback);
});
The referenced get_source.js
contains the following code:
function DOMtoString(document_root) {
... see top of the answer...
}
// The value of the last expression of the content script is passed
// to the chrome.tabs.executeScript callback
DOMtoString(document);
Don't forget to add the appropriate host permissions , so that you can read DOM from the page. In the above example, you have to add "https://example.com/*"
to the "permissions" section of manifest.json.
Node
MDN DocumentType
( document.doctype
, <!DOCTYPE ... >
) MDN chrome.tabs.create
Google Chrome Extension docs chrome.tabs.executeScript
Google Chrome Extension docs chrome.tabs.onUpdated
Google Chrome Extension docs chrome.tabs.onRemoved
Google Chrome Extension docs
The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.