Puppeteer - NodeJS Scraping: Unterschied zwischen den Versionen

Aus Wikizone
Wechseln zu: Navigation, Suche
 
(35 dazwischenliegende Versionen von 4 Benutzern werden nicht angezeigt)
Zeile 1: Zeile 1:
 +
== Puppeteer ==
 +
Puppeteer Hauptseite.
 +
 +
== Links ==
 +
https://pptr.dev/
 +
https://www.youtube.com/watch?v=CngYXf9aeg8&list=PLGreOtbNU07rDURvnQpDaT3XokxlranUQ
 +
https://blog.risingstack.com/pdf-from-html-node-js-puppeteer/
 +
https://advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
 +
https://jsoverson.medium.com/using-chrome-devtools-protocol-with-puppeteer-737a1300bac0 *
 
== Quickstart ==
 
== Quickstart ==
 
  https://www.youtube.com/watch?v=Sag-Hz9jJNg
 
  https://www.youtube.com/watch?v=Sag-Hz9jJNg
 
Voraussetzung: VisualStudioCode, NodeJS installiert
 
Voraussetzung: VisualStudioCode, NodeJS installiert
  
Ordner erstellen und NodeJS Projekt starten
+
=== Ordner erstellen und NodeJS Projekt starten ===
  
Terminal
+
'''Terminal'''
 
  npm init -y
 
  npm init -y
 
  npm install puppeteer
 
  npm install puppeteer
  
Installiert auch Chromium. Schau mal in die
+
Installiert auch Chromium. Schau mal in die package.json
 +
 
 +
==== Zusätzliche Module ====
 +
Oft benötigt man zusäzliche Module z.b. zum csv Parsen oder Zugriff auf das Filesystem. Diese einfach mit npm installieren:
 +
npm install csv-parser
 +
npm install fs
 +
usw.
 +
Dann kann man sie im Skript einbinden wie Pupeteer.
 +
const csv = require('csv-parser');
 +
const fs = require('fs');
 +
const puppeteer = require("puppeteer");
 +
 
 +
=== Grundstruktur ===
 +
Als Basis kommt fast immer ein Konstrukt ähnlich dem folgenden zum Einsatz. Im Wesentlichen passiert folgendes:
  
 
index.js erstellen. Puppeteer laden mit asynchroner Funktion. Diese Funktion
 
index.js erstellen. Puppeteer laden mit asynchroner Funktion. Diese Funktion
 +
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer"); //pup
 +
(async () => {
 +
  const browser = await puppeteer.launch({headless: true}); // open a new browser - headless (default) or with displaying
 +
  const page = await browser.newPage();  // open a new tab
 +
  await page.goto("https://schlegel.media/"); // navigate to a url
 +
  // do s.th.
 +
  await browser.close(); // close the browser
 +
}) ();
 +
</syntaxhighlight>
 +
 +
=== Beispiel Screenshot von Seite anfertigen ===
 
<syntaxhighlight lang="javascript">
 
<syntaxhighlight lang="javascript">
 
const puppeteer = require("puppeteer");
 
const puppeteer = require("puppeteer");
 
(async () => {
 
(async () => {
 +
  const browser = await puppeteer.launch({headless: false}) // launch can launch headless or with displaying
 +
  const page = await browser.newPage() // open new tab in browser
 +
  await page.goto("https://schlegel.media")
 +
  await page.screenshot({path: "screenshot.png"})
 +
 +
  await browser.close()
 
}) ();
 
}) ();
 
</syntaxhighlight>
 
</syntaxhighlight>
 +
 +
'''Starten mit'''
 +
node index.js
 +
 +
== Beispiel Skripte ==
 +
Hinweis: Da die Skripte in diesem Setup keine ES Module sind, gab es bei mir Probleme in Node wenn man die Strichpunkte weglässt.
 +
 +
=== DOM Elemente scrapen mit evaluate ===
 +
Zum Scrapen bietet sich die evaluate Funk
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer")
 +
(async () => {
 +
  const browser = await puppeteer.launch({headless: false}) // launch can launch headless or with displaying
 +
  const page = await browser.newPage() // open new tab in browser
 +
  await page.goto("https://schlegel.media")
 +
 +
  const grabSlogan = await page.evaluate( () => {
 +
    const slogan = document.querySelector(".uk-text-lead")
 +
    //return slogan.innerHTML // with html tags
 +
    return slogan.innerText // only the text
 +
  })
 +
 +
  console.log(grabSlogan)
 +
  await browser.close()
 +
}) ()
 +
</syntaxhighlight>
 +
 +
// grab multiple elements
 +
<syntaxhighlight lang="javascript">
 +
//... wie oben
 +
  const grabList = await page.evaluate( () => {
 +
    const listTags = document.querySelectorAll(".uk-nav-default li")
 +
    let listItems = []
 +
    listTags.forEach((tag) => {
 +
      listItems.push(tag.innerText)
 +
    })
 +
 +
    return listItems
 +
  })
 +
  console.log(grabList)
 +
</syntaxhighlight>
 +
 +
Komplexere DOM-Zugriffe
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
  const browser = await puppeteer.launch({headless: false}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://quotes.toscrape.com/");
 +
 +
  const grab = await page.evaluate( () => {
 +
    let arrElements = [];
 +
    const quotes = document.querySelectorAll(".quote");
 +
    quotes.forEach( (quote) => {
 +
      const quoteSpans = quote.querySelectorAll("span");
 +
      const quoteText = quoteSpans[0].innerHTML;
 +
      const quoteAuthor = quoteSpans[1].querySelector("small").innerHTML;
 +
      arrElements.push({'quote': quoteText, 'author': quoteAuthor});
 +
    });
 +
    return arrElements;
 +
  });
 +
 +
  console.log(grab);
 +
  await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
 +
=== User actions simulieren ===
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
  const browser = await puppeteer.launch({headless: false}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://quotes.toscrape.com/");
 +
 +
  await page.click('a[href="/login"]'); // click login link
 +
  await page.type('#username','myUserName',{delay:300});
 +
  await page.type('#password','mySecret');
 +
  await page.click('input[type="submit"]');
 +
  //await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
 +
=== Computed Styles von DOM Elementen auslesen ===
 +
'''Styles eines DOM Elements''' finden. Hier nutzen wir mal die $eval Funktion.
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
 +
  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://schlegel.media/");
 +
 +
  // get styles of element
 +
  const myStyles = await page.$eval('body', el => getComputedStyle(el).getPropertyValue('font-family')
 +
  );
 +
  console.log(myStyles);
 +
 +
  await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
Hinweis: Handle Functions sind nicht so performant aber eher menschenähnlich. Bei einem Klick würde der Browser tatsächlich die Maus bewegen statt einfach einen Klick Event zu senden.
 +
 +
'''Evaluate Version''' - besser zu debuggen Unterschiede in der Ausführung. Siehe: https://stackoverflow.com/questions/55664420/page-evaluate-vs-puppeteer-methods
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://schlegel.media/");
 +
 +
  // get styles of element
 +
  const getStyles = await page.evaluate( () =>{
 +
    const el = document.querySelector('body');
 +
    const myStyle = getComputedStyle(el).getPropertyValue('font-family');
 +
    return myStyle
 +
  });
 +
  console.log(getStyles);
 +
 +
  await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
 +
'''So kann man alle Styles auslesen:'''
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
 +
  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://schlegel.media/");
 +
 +
  // get styles of element
 +
  const getStyles = await page.evaluate( () =>{
 +
    const el = document.querySelector('p');
 +
    //const myStyle = getComputedStyle(el).getPropertyValue('font-family'); // get a specific style
 +
    const stylesObject = getComputedStyle(el);
 +
    const myStyles = {};
 +
    for (const prop in stylesObject) {
 +
      if(stylesObject.hasOwnProperty(prop)){ // filter out
 +
        myStyles[prop] = stylesObject[prop];
 +
      }
 +
    }
 +
    //return myStyle;
 +
    return myStyles;
 +
  });
 +
  console.log(getStyles);
 +
 +
  await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
 +
=== PDF generieren ===
 +
Siehe auch
 +
https://blog.risingstack.com/pdf-from-html-node-js-puppeteer/
 +
 +
<syntaxhighlight lang="javascript">
 +
const puppeteer = require("puppeteer");
 +
(async () => {
 +
 +
  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
 +
  const page = await browser.newPage(); // open new tab in browser
 +
  await page.goto("https://schlegel.media/");
 +
 +
  // print pdf
 +
  await page.pdf({
 +
    path: "myWebsite.pdf", // mandatory - rest ist optional
 +
    format: 'A4', // default is letter
 +
    margin: {
 +
      top: '100px',
 +
      bottom: '100px'
 +
    },
 +
    printBackground: true,
 +
    displayHeaderFooter: true,
 +
    headerTemplate: `<p style="font-size: 10px; font-family: Arial, Helvetica, sans-serif; margin: 0 auto;"><span class="title"></span></p>`,
 +
    footerTemplate: `<p style="font-size:10px; font-family: Arial, Helvetica, sans-serif; margin: 0 auto;"><span class="pageNumber"></span> of <span class="totalPages"></span></p>`
 +
  })
 +
 +
  await browser.close();
 +
}) ();
 +
</syntaxhighlight>
 +
=== Crawl multiple pages ===
 +
https://stackoverflow.com/questions/46293216/crawling-multiple-urls-in-a-loop-using-puppeteer
 +
<syntaxhighlight lang="javascript">
 +
page.setDefaultNavigationTimeout(0); // prevent timeout after 30s.
 +
//...
 +
urls = ['url','url','url'...]
 +
 +
for (let i = 0; i < urls.length; i++) {
 +
    const url = urls[i];
 +
    await page.goto(`${url}`);
 +
    await page.waitForNavigation({ waitUntil: 'networkidle2' });
 +
}
 +
</syntaxhighlight>
 +
 +
=== Input- und Output-Files ===
 +
https://github.com/Zrce/puppeteer-coverage-report-test/blob/master/index.js
 +
https://stackoverflow.com/questions/59981135/puppeteer-iterate-over-a-csv-file-and-screenshot-for-each-row
 +
 +
'''CSV Datei mit URLs abarbeiten - seriell/parallel'''
 +
[[Puppeteer - CSV Datei sequentiell / parallel a abarbeiten (Beispiel)]]
 +
 +
<syntaxhighlight lang="javascript">
 +
 +
</syntaxhighlight>
 +
 +
=== Bilder und Screenshots ===
 +
==== Screenshots in pdf einbetten ====
 +
* Voraussetzung sind diese Flags für den Filezugriff: ''--allow-file-access-from-files, --enable-local-file-accesses''
 +
* Screenshot anfertigen
 +
* Base64 codieren
 +
* Einbetten
 +
<syntaxhighlight lang="javascript">
 +
 +
</syntaxhighlight>
 +
 +
 +
==== Bilder aus DOM speichern und in pdf umwandeln ====
 +
https://stackoverflow.com/questions/59677228/convert-screenshot-to-pdf-in-puppeteer
 +
<syntaxhighlight lang="javascript">
 +
await page.goto('https://www.chromestatus.com/samples', {waitUntil: 'networkidle0'});
 +
 +
    async function screenshotDOMElement(opts = {}) {
 +
        const padding = 'padding' in opts ? opts.padding : 0;
 +
        const path = 'path' in opts ? opts.path : null;
 +
        const selector = opts.selector;
 +
 +
        if (!selector)
 +
            throw Error('Please provide a selector.');
 +
 +
        const rect = await page.evaluate(selector => {
 +
            const element = document.querySelector(selector);
 +
            if (!element)
 +
                return null;
 +
            const {x, y, width, height} = element.getBoundingClientRect();
 +
            return {left: x, top: y, width, height, id: element.id};
 +
        }, selector);
 +
 +
        if (!rect)
 +
            throw Error(`Could not find element that matches selector: ${selector}.`);
 +
 +
        return await page.screenshot({
 +
            path,
 +
            clip: {
 +
                x: rect.left - padding,
 +
                y: rect.top - padding,
 +
                width: rect.width + padding * 2,
 +
                height: rect.height + padding * 2
 +
            }
 +
        });
 +
    }
 +
 +
    await screenshotDOMElement({
 +
        path: 'element.png',
 +
        selector: 'header aside',
 +
        padding: 16
 +
    });
 +
 +
    browser.close();
 +
    captureDomTOoPDF();
 +
})();
 +
 +
 +
function captureDomTOoPDF(){
 +
    (async () => {
 +
        const browser = await puppeteer.launch({args: ['--allow-file-access-from-files', '--enable-local-file-accesses']});
 +
        const page = await browser.newPage();
 +
        const image = 'data:image/png;base64,' + base64Encode('element.png');
 +
        await page.goto(image, {waitUntil: 'networkidle0'});
 +
        await page.pdf({path: 'output.pdf', format: 'A4'});
 +
   
 +
        await browser.close();
 +
        console.log("done");
 +
    })();
 +
}
 +
</syntaxhighlight>
 +
 +
=== Zertifikate und Puppeteer ===
 +
[[Puppeteer - Zertifikate handeln]]
 +
 +
=== Network Request Control ===
 +
https://github.com/puppeteer/puppeteer/blob/main/examples/block-images.js
 +
[[Puppeteer - RequestInterception]]

Aktuelle Version vom 7. Dezember 2022, 14:16 Uhr

Puppeteer[Bearbeiten]

Puppeteer Hauptseite.

Links[Bearbeiten]

https://pptr.dev/
https://www.youtube.com/watch?v=CngYXf9aeg8&list=PLGreOtbNU07rDURvnQpDaT3XokxlranUQ
https://blog.risingstack.com/pdf-from-html-node-js-puppeteer/
https://advancedweb.hu/how-to-speed-up-puppeteer-scraping-with-parallelization/
https://jsoverson.medium.com/using-chrome-devtools-protocol-with-puppeteer-737a1300bac0 *

Quickstart[Bearbeiten]

https://www.youtube.com/watch?v=Sag-Hz9jJNg

Voraussetzung: VisualStudioCode, NodeJS installiert

Ordner erstellen und NodeJS Projekt starten[Bearbeiten]

Terminal

npm init -y
npm install puppeteer

Installiert auch Chromium. Schau mal in die package.json

Zusätzliche Module[Bearbeiten]

Oft benötigt man zusäzliche Module z.b. zum csv Parsen oder Zugriff auf das Filesystem. Diese einfach mit npm installieren:

npm install csv-parser
npm install fs

usw. Dann kann man sie im Skript einbinden wie Pupeteer.

const csv = require('csv-parser');
const fs = require('fs');
const puppeteer = require("puppeteer");

Grundstruktur[Bearbeiten]

Als Basis kommt fast immer ein Konstrukt ähnlich dem folgenden zum Einsatz. Im Wesentlichen passiert folgendes:

index.js erstellen. Puppeteer laden mit asynchroner Funktion. Diese Funktion

const puppeteer = require("puppeteer"); //pup
(async () => {
  const browser = await puppeteer.launch({headless: true}); // open a new browser - headless (default) or with displaying
  const page = await browser.newPage();  // open a new tab
  await page.goto("https://schlegel.media/"); // navigate to a url
  // do s.th.
  await browser.close(); // close the browser
}) ();

Beispiel Screenshot von Seite anfertigen[Bearbeiten]

const puppeteer = require("puppeteer");
(async () => {
  const browser = await puppeteer.launch({headless: false}) // launch can launch headless or with displaying
  const page = await browser.newPage() // open new tab in browser
  await page.goto("https://schlegel.media")
  await page.screenshot({path: "screenshot.png"})

  await browser.close()
}) ();

Starten mit

node index.js

Beispiel Skripte[Bearbeiten]

Hinweis: Da die Skripte in diesem Setup keine ES Module sind, gab es bei mir Probleme in Node wenn man die Strichpunkte weglässt.

DOM Elemente scrapen mit evaluate[Bearbeiten]

Zum Scrapen bietet sich die evaluate Funk

const puppeteer = require("puppeteer")
(async () => {
  const browser = await puppeteer.launch({headless: false}) // launch can launch headless or with displaying
  const page = await browser.newPage() // open new tab in browser
  await page.goto("https://schlegel.media")

  const grabSlogan = await page.evaluate( () => {
    const slogan = document.querySelector(".uk-text-lead")
    //return slogan.innerHTML // with html tags
    return slogan.innerText // only the text
  })

  console.log(grabSlogan)
  await browser.close()
}) ()

// grab multiple elements

//... wie oben
  const grabList = await page.evaluate( () => {
    const listTags = document.querySelectorAll(".uk-nav-default li")
    let listItems = []
    listTags.forEach((tag) => {
      listItems.push(tag.innerText)
    })

    return listItems
  })
  console.log(grabList)

Komplexere DOM-Zugriffe

const puppeteer = require("puppeteer");
(async () => {
  const browser = await puppeteer.launch({headless: false}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://quotes.toscrape.com/");

  const grab = await page.evaluate( () => {
    let arrElements = [];
    const quotes = document.querySelectorAll(".quote");
    quotes.forEach( (quote) => {
      const quoteSpans = quote.querySelectorAll("span");
      const quoteText = quoteSpans[0].innerHTML;
      const quoteAuthor = quoteSpans[1].querySelector("small").innerHTML;
      arrElements.push({'quote': quoteText, 'author': quoteAuthor});
    });
    return arrElements;
  });

  console.log(grab);
  await browser.close();
}) ();

User actions simulieren[Bearbeiten]

const puppeteer = require("puppeteer");
(async () => {
  const browser = await puppeteer.launch({headless: false}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://quotes.toscrape.com/");

  await page.click('a[href="/login"]'); // click login link
  await page.type('#username','myUserName',{delay:300});
  await page.type('#password','mySecret');
  await page.click('input[type="submit"]');
  //await browser.close();
}) ();

Computed Styles von DOM Elementen auslesen[Bearbeiten]

Styles eines DOM Elements finden. Hier nutzen wir mal die $eval Funktion.

const puppeteer = require("puppeteer");
(async () => {

  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://schlegel.media/");

  // get styles of element
  const myStyles = await page.$eval('body', el => getComputedStyle(el).getPropertyValue('font-family')
  );
  console.log(myStyles);

  await browser.close();
}) ();

Hinweis: Handle Functions sind nicht so performant aber eher menschenähnlich. Bei einem Klick würde der Browser tatsächlich die Maus bewegen statt einfach einen Klick Event zu senden.

Evaluate Version - besser zu debuggen Unterschiede in der Ausführung. Siehe: https://stackoverflow.com/questions/55664420/page-evaluate-vs-puppeteer-methods

const puppeteer = require("puppeteer");
(async () => {
  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://schlegel.media/");

  // get styles of element
  const getStyles = await page.evaluate( () =>{
    const el = document.querySelector('body');
    const myStyle = getComputedStyle(el).getPropertyValue('font-family');
    return myStyle
  });
  console.log(getStyles);

  await browser.close();
}) ();

So kann man alle Styles auslesen:

const puppeteer = require("puppeteer");
(async () => {

  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://schlegel.media/");

  // get styles of element
  const getStyles = await page.evaluate( () =>{
    const el = document.querySelector('p');
    //const myStyle = getComputedStyle(el).getPropertyValue('font-family'); // get a specific style
    const stylesObject = getComputedStyle(el);
    const myStyles = {};
    for (const prop in stylesObject) {
      if(stylesObject.hasOwnProperty(prop)){ // filter out 
        myStyles[prop] = stylesObject[prop];
      }
    }
    //return myStyle;
    return myStyles;
  });
  console.log(getStyles);

  await browser.close();
}) ();

PDF generieren[Bearbeiten]

Siehe auch

https://blog.risingstack.com/pdf-from-html-node-js-puppeteer/
const puppeteer = require("puppeteer");
(async () => {

  const browser = await puppeteer.launch({headless: true}); // launch can launch headless or with displaying
  const page = await browser.newPage(); // open new tab in browser
  await page.goto("https://schlegel.media/");

  // print pdf
  await page.pdf({
    path: "myWebsite.pdf", // mandatory - rest ist optional
    format: 'A4', // default is letter
    margin: {
      top: '100px',
      bottom: '100px'
    },
    printBackground: true,
    displayHeaderFooter: true,
    headerTemplate: `<p style="font-size: 10px; font-family: Arial, Helvetica, sans-serif; margin: 0 auto;"><span class="title"></span></p>`,
    footerTemplate: `<p style="font-size:10px; font-family: Arial, Helvetica, sans-serif; margin: 0 auto;"><span class="pageNumber"></span> of <span class="totalPages"></span></p>`
  })

  await browser.close();
}) ();

Crawl multiple pages[Bearbeiten]

https://stackoverflow.com/questions/46293216/crawling-multiple-urls-in-a-loop-using-puppeteer
page.setDefaultNavigationTimeout(0); // prevent timeout after 30s.
//...
urls = ['url','url','url'...]

for (let i = 0; i < urls.length; i++) {
    const url = urls[i];
    await page.goto(`${url}`);
    await page.waitForNavigation({ waitUntil: 'networkidle2' });
}

Input- und Output-Files[Bearbeiten]

https://github.com/Zrce/puppeteer-coverage-report-test/blob/master/index.js
https://stackoverflow.com/questions/59981135/puppeteer-iterate-over-a-csv-file-and-screenshot-for-each-row

CSV Datei mit URLs abarbeiten - seriell/parallel Puppeteer - CSV Datei sequentiell / parallel a abarbeiten (Beispiel)

Bilder und Screenshots[Bearbeiten]

Screenshots in pdf einbetten[Bearbeiten]

  • Voraussetzung sind diese Flags für den Filezugriff: --allow-file-access-from-files, --enable-local-file-accesses
  • Screenshot anfertigen
  • Base64 codieren
  • Einbetten


Bilder aus DOM speichern und in pdf umwandeln[Bearbeiten]

https://stackoverflow.com/questions/59677228/convert-screenshot-to-pdf-in-puppeteer
 await page.goto('https://www.chromestatus.com/samples', {waitUntil: 'networkidle0'});

    async function screenshotDOMElement(opts = {}) {
        const padding = 'padding' in opts ? opts.padding : 0;
        const path = 'path' in opts ? opts.path : null;
        const selector = opts.selector;

        if (!selector)
            throw Error('Please provide a selector.');

        const rect = await page.evaluate(selector => {
            const element = document.querySelector(selector);
            if (!element)
                return null;
            const {x, y, width, height} = element.getBoundingClientRect();
            return {left: x, top: y, width, height, id: element.id};
        }, selector);

        if (!rect)
            throw Error(`Could not find element that matches selector: ${selector}.`);

        return await page.screenshot({
            path,
            clip: {
                x: rect.left - padding,
                y: rect.top - padding,
                width: rect.width + padding * 2,
                height: rect.height + padding * 2
            }
        });
    }

    await screenshotDOMElement({
        path: 'element.png',
        selector: 'header aside',
        padding: 16
    });

    browser.close();
    captureDomTOoPDF();
})();


function captureDomTOoPDF(){
    (async () => {
        const browser = await puppeteer.launch({args: ['--allow-file-access-from-files', '--enable-local-file-accesses']});
        const page = await browser.newPage();
        const image = 'data:image/png;base64,' + base64Encode('element.png');
        await page.goto(image, {waitUntil: 'networkidle0'});
        await page.pdf({path: 'output.pdf', format: 'A4'});
    
        await browser.close();
        console.log("done");
    })();
}

Zertifikate und Puppeteer[Bearbeiten]

Puppeteer - Zertifikate handeln

Network Request Control[Bearbeiten]

https://github.com/puppeteer/puppeteer/blob/main/examples/block-images.js
Puppeteer - RequestInterception