Skip to content

JavaScript Extraction Techniques


Dump All Links (Basic)

[...document.links].forEach(link => console.log(link.href));

Dump All Links Using querySelectorAll

[...document.querySelectorAll("a")].forEach(a => console.log(a.href));

Dump All Links Using for…of

for (const link of document.querySelectorAll("a")) {
  console.log(link.href);
}

Dump All Links Using getElementsByTagName

[...document.getElementsByTagName("a")].forEach(a => console.log(a.href));

Dump All Links (Classic for-loop)

const links = document.getElementsByTagName("a");
for (let i = 0; i < links.length; i++) {
  console.log(links[i].href);
}

Dump Unique Links

[...new Set([...document.querySelectorAll("a")].map(a => a.href))]
  .forEach(url => console.log(url));

Open All Links in a New Tab (Clickable List)

const urls = [...document.links].map(l => l.href);
const tab = window.open();
tab.document.write("<html><body>");
urls.forEach(url => tab.document.write(`<a href="${url}" target="_blank">${url}</a><br>`));
tab.document.write("</body></html>");
tab.document.close();

Internal Links Only

[...document.links]
  .filter(a => a.hostname === location.hostname)
  .forEach(a => console.log(a.href));

External Links Only

[...document.links]
  .filter(a => a.hostname !== location.hostname)
  .forEach(a => console.log(a.href));

PDF Links

[...document.links]
  .filter(a => a.href.endsWith(".pdf"))
  .forEach(a => console.log(a.href));

Download Attribute Links

[...document.querySelectorAll("a[download]")]
  .forEach(a => console.log(a.href));

Mailto Links

[...document.querySelectorAll('a[href^="mailto:"]')]
  .forEach(a => console.log(a.href));

Telephone Links

[...document.querySelectorAll('a[href^="tel:"]')]
  .forEach(a => console.log(a.href));

Media Extraction

Capture All Images

[...document.images].forEach(img => console.log(img.src));

Capture Stylesheets

[...document.styleSheets].forEach(sheet => console.log(sheet.href));

Dump all image urls into console window

Array.from(document.images).forEach(({ src }) => console.log(src));

Dump all files by file extension to a List with links to a new tab

let links = [];
const fileExtensions = ['png', 'jpg', 'gif', 'mkv', 'tar', 'zip', 'rar', 'mp4', 'jpeg'];

document.querySelectorAll('img').forEach(img => {
    let src = img.src;
    let extension = src.split('.').pop().toLowerCase();
    if (fileExtensions.includes(extension)) {
        links.push(src);
    }
});

// Open a new tab with a blank page
let newTab = window.open('about:blank', '_blank');
let newTabDocument = newTab.document;

// Create a list of href links
let ul = newTabDocument.createElement('ul');
links.forEach(link => {
    let li = newTabDocument.createElement('li');
    let a = newTabDocument.createElement('a');
    a.href = link;
    a.textContent = link.split('/').pop(); // Set the text content to the file name
    a.style.display = 'block'; // Display each link on a new line
    li.appendChild(a);
    ul.appendChild(li);
});
newTabDocument.body.appendChild(ul);

Dump all images to a tab and show a preview in default size

const images = Array.from(document.images);
const imageUrls = images.map((image) => image.src);
const anchorTags = imageUrls.map((url) => `<a href="${url}" target="_blank"><img src="${url}" ></a>`);
const newTab = window.open();
newTab.document.write('<ul style="list-style-type:none; padding: 0;">' + anchorTags.map((tag) => `<li>${tag}</li>`).join('') + '</ul>');

Email Extraction

Extract Emails Using Regex

const regex = /\b[A-Za-z0-9._%+-]+@[A-Za-z0-9.-]+\.[A-Z|a-z]{2,}\b/g;
const html = document.documentElement.innerHTML;
let match;
while ((match = regex.exec(html))) {
  console.log(match[0]);
}