Skip to content

JavaScript Techniques

Discover advanced JavaScript techniques to capture links, images, stylesheets, emails, and various types of URLs on web pages. Enhance your web scraping and analysis capabilities with these powerful methods.


Dump all urls

var urls = document.getElementsByTagName('a');
    for (url in urls) {
        console.log ( urls[url].href );
    }

Capture All Links

[...document.links].forEach(({ href }) => console.log(href));

Capture All Links (Alternative)

[...document.querySelectorAll("a")].forEach(a => console.log(a.href));

Capture All Links (Alternative)

Array.from(document.querySelectorAll("a")).forEach(a => console.log(a.href));

Capture All Links (Alternative)

Array.prototype.forEach.call(document.querySelectorAll("a"), a => console.log(a.href));

Capture All Links (Alternative)

console.log(Array.from(new Set([...document.querySelectorAll("a")].map(a => a.href))).join(", "));

Capture All Links (Alternative)

console.log([...new Set([...document.querySelectorAll("a")].map(a => a.href))].join(", "));

Capture All Links (Alternative)

Array.prototype.forEach.call(document.querySelectorAll("a"), a => console.log(a.href));

Capture All Links (Alternative)

const links = document.querySelectorAll("a");
for (const link of links) {
  console.log(link.href);
}

Capture All Links (Alternative)

const linksArray = Array.from(document.querySelectorAll("a"));
linksArray.map(link => {
  console.log(link.href);
});

Capture All Links (Alternative)

const links = document.getElementsByTagName("a");
for (let i = 0; i < links.length; i++) {
  console.log(links[i].href);
}

Capture All Links (Alternative)

const linksArray = Array.from(document.querySelectorAll("a"));
const linksHref = linksArray.reduce((acc, link) => {
  console.log(link.href);
  return acc.concat(link.href);
}, []);

Capture All Links (Alternative)

const linksArray = [...document.querySelectorAll("a")];
const linksHref = linksArray.map(link => {
  console.log(link.href);
  return link.href;
});

Capture All Links (Alternative)

const urls = [...document.links].map(({ href }) => href);

// Open the URLs in a new tab with clickable links
const newTab = window.open();
newTab.document.write("<html><body>");
urls.forEach(url => {
  newTab.document.write(`<a href="${url}" target="_blank">${url}</a><br>`);
});
newTab.document.write("</body></html>");
newTab.document.close();

Capture All Links (Alternative)

Array.from(document.links).forEach(({ href }) => console.log(href));

Capture All Links (Alternative)

Array.from(document.links, ({ href }) => href).forEach(console.log);

Capture Links Using getElementsByTagName

[...document.getElementsByTagName('a')].forEach(a => console.log(a.href));

Capture Links Using getElementsByTagName (Alternative)

Array.from(document.getElementsByTagName('a'), a => a.href).forEach(url => console.log(url));

Capture Links Using a for Loop

var urls = Array.from(document.getElementsByTagName('a'));
for (var url of urls) {
    console.log(url.href);
}

Capture Emails Using Regular Expression

const regex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
const html = document.documentElement.innerHTML;
let match;
while ((match = regex.exec(html))) {
  console.log(match[0]);
}

Capture All Images

Array.from(document.images).forEach(({ src }) => console.log(src));

Capture Stylesheets

Array.from(document.styleSheets).forEach(({ href }) => console.log(href));

Capture Internal Links

Array.from(document.links).filter(a => a.hostname === location.hostname).forEach(({ href }) => console.log(href));

Capture External Links

Array.from(document.links).filter(a => a.hostname !== location.hostname).forEach(({ href }) => console.log(href));

Capture Unique URLs

let uniqueURLs = new Set(Array.from(document.links).map(({ href }) => href));
uniqueURLs.forEach(url => console.log(url));

Capture PDF Links

Array.from(document.links).filter(a => a.href.endsWith('.pdf')).forEach(({ href }) => console.log(href));

Capture Download Links

Array.from(document

.querySelectorAll('a[download]')).forEach(({ href }) => console.log(href));

Capture Mailto Links

Array.from(document.querySelectorAll('a[href^="mailto:"]')).forEach(({ href }) => console.log(href));

Capture Tel Links

Array.from(document.querySelectorAll('a[href^="tel:"]')).forEach(({ href }) => console.log(href));