import { test, expect } from "@playwright/test"; import fs from "fs/promises"; import Papa from "papaparse"; const OUTPUT_FILE = "output.csv"; test("scrape data from example site", async ({ page }) => { await page.goto("https://example.com"); const heading = await page.locator("h1").textContent(); console.log({ heading }); expect(heading).toBeTruthy(); // optional check const data = await page.evaluate(() => { const heading = document.querySelector("h1")?.textContent?.trim() ?? ""; return { heading }; }); // Wait for the list to load await page.waitForSelector("ul.list"); const documents = await page.$$eval("ul.list > li", (items) => items.map((li) => { const anchor = li.querySelector("a"); const href = anchor?.getAttribute("href") ?? ""; const nameEl = anchor?.querySelector(".name p"); // Clone the node to strip tags const cleanText = nameEl ? (nameEl.cloneNode(true) as HTMLElement) : null; if (cleanText) { // Remove tags cleanText.querySelectorAll("em").forEach((em) => em.remove()); } const name = cleanText?.textContent?.trim() ?? ""; return { name, href }; }) ); // Convert to CSV const csv = Papa.unparse(documents); await fs.writeFile(OUTPUT_FILE, csv, "utf-8"); console.log("✅ Saved to documents.csv"); });