From 9b1c8a79e85e45426e58ced07e147a66d29b1fb1 Mon Sep 17 00:00:00 2001 From: Ivan <8611739+IRBorisov@users.noreply.github.com> Date: Thu, 22 May 2025 13:02:10 +0300 Subject: [PATCH] Initial setup --- .gitignore | 35 ++++ .vscode/launch.json | 37 +++++ .vscode/settings.json | 3 + output.json | 3 + package-lock.json | 320 ++++++++++++++++++++++++++++++++++++ package.json | 27 +++ playwright.config.ts | 18 ++ src/scrape.spec.ts | 48 ++++++ test-results/.last-run.json | 4 + tsconfig.json | 12 ++ 10 files changed, 507 insertions(+) create mode 100644 .gitignore create mode 100644 .vscode/launch.json create mode 100644 .vscode/settings.json create mode 100644 output.json create mode 100644 package-lock.json create mode 100644 package.json create mode 100644 playwright.config.ts create mode 100644 src/scrape.spec.ts create mode 100644 test-results/.last-run.json create mode 100644 tsconfig.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b08e69d --- /dev/null +++ b/.gitignore @@ -0,0 +1,35 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ +.mypy_cache/ + +# React +.DS_* +*.log +*.tsbuildinfo +logs +**/*.backup.* +**/*.back.* + +node_modules +bower_components + +# Environments +/GitExtensions.settings +/playwright-report diff --git a/.vscode/launch.json b/.vscode/launch.json new file mode 100644 index 0000000..d8fbed6 --- /dev/null +++ b/.vscode/launch.json @@ -0,0 +1,37 @@ +{ + // Use IntelliSense to learn about possible attributes. + // Hover to view descriptions of existing attributes. + // For more information, visit: https://go.microsoft.com/fwlink/?linkid=830387 + "version": "0.2.0", + "configurations": [ + { + // Run all + "name": "Run all", + "type": "node", + "request": "launch", + "cwd": "${workspaceFolder}", + "runtimeExecutable": "npx", + "runtimeArgs": ["playwright", "test"], + "console": "integratedTerminal" + }, + { + // Run Tests file Debug mode + "name": "DebugCurrent", + "type": "node", + "request": "launch", + "cwd": "${workspaceFolder}", + "runtimeExecutable": "npx", + "runtimeArgs": [ + "playwright", + "test", + "${fileBasename}", + "--headed", + "--project=Desktop Chrome" + ], + "env": { + "PWDEBUG": "1" + }, + "console": "integratedTerminal" + } + ] +} diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..ba36a27 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,3 @@ +{ + "cSpell.words": ["papaparse", "unparse"] +} diff --git a/output.json b/output.json new file mode 100644 index 0000000..cf310e8 --- /dev/null +++ b/output.json @@ -0,0 +1,3 @@ +{ + "heading": "Example Domain" +} \ No newline at end of file diff --git a/package-lock.json b/package-lock.json new file mode 100644 index 0000000..ca8fd6a --- /dev/null +++ b/package-lock.json @@ -0,0 +1,320 @@ +{ + "name": "concept-garant", + "version": "1.0.0", + "lockfileVersion": 3, + "requires": true, + "packages": { + "": { + "name": "concept-garant", + "version": "1.0.0", + "license": "ISC", + "devDependencies": { + "@playwright/test": "^1.52.0", + "@types/node": "^22.15.21", + "@types/papaparse": "^5.3.16", + "papaparse": "^5.5.3", + "playwright": "^1.52.0", + "ts-node": "^10.9.2", + "typescript": "^5.8.3" + } + }, + "node_modules/@cspotcode/source-map-support": { + "version": "0.8.1", + "resolved": "https://registry.npmjs.org/@cspotcode/source-map-support/-/source-map-support-0.8.1.tgz", + "integrity": "sha512-IchNf6dN4tHoMFIn/7OE8LWZ19Y6q/67Bmf6vnGREv8RSbBVb9LPJxEcnwrcwX6ixSvaiGoomAUvu4YSxXrVgw==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/trace-mapping": "0.3.9" + }, + "engines": { + "node": ">=12" + } + }, + "node_modules/@jridgewell/resolve-uri": { + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@jridgewell/resolve-uri/-/resolve-uri-3.1.2.tgz", + "integrity": "sha512-bRISgCIjP20/tbWSPWMEi54QVPRZExkuD9lJL+UIxUKtwVJA8wW1Trb1jMs1RFXo1CBTNZ/5hpC9QvmKWdopKw==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6.0.0" + } + }, + "node_modules/@jridgewell/sourcemap-codec": { + "version": "1.5.0", + "resolved": "https://registry.npmjs.org/@jridgewell/sourcemap-codec/-/sourcemap-codec-1.5.0.tgz", + "integrity": "sha512-gv3ZRaISU3fjPAgNsriBRqGWQL6quFx04YMPW/zD8XMLsU32mhCCbfbO6KZFLjvYpCZ8zyDEgqsgf+PwPaM7GQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/@jridgewell/trace-mapping": { + "version": "0.3.9", + "resolved": "https://registry.npmjs.org/@jridgewell/trace-mapping/-/trace-mapping-0.3.9.tgz", + "integrity": "sha512-3Belt6tdc8bPgAtbcmdtNJlirVoTmEb5e2gC94PnkwEW9jI6CAHUeoG85tjWP5WquqfavoMtMwiG4P926ZKKuQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@jridgewell/resolve-uri": "^3.0.3", + "@jridgewell/sourcemap-codec": "^1.4.10" + } + }, + "node_modules/@playwright/test": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/@playwright/test/-/test-1.52.0.tgz", + "integrity": "sha512-uh6W7sb55hl7D6vsAeA+V2p5JnlAqzhqFyF0VcJkKZXkgnFcVG9PziERRHQfPLfNGx1C292a4JqbWzhR8L4R1g==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright": "1.52.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/@tsconfig/node10": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node10/-/node10-1.0.11.tgz", + "integrity": "sha512-DcRjDCujK/kCk/cUe8Xz8ZSpm8mS3mNNpta+jGCA6USEDfktlNvm1+IuZ9eTcDbNk41BHwpHHeW+N1lKCz4zOw==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node12": { + "version": "1.0.11", + "resolved": "https://registry.npmjs.org/@tsconfig/node12/-/node12-1.0.11.tgz", + "integrity": "sha512-cqefuRsh12pWyGsIoBKJA9luFu3mRxCA+ORZvA4ktLSzIuCUtWVxGIuXigEwO5/ywWFMZ2QEGKWvkZG1zDMTag==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node14": { + "version": "1.0.3", + "resolved": "https://registry.npmjs.org/@tsconfig/node14/-/node14-1.0.3.tgz", + "integrity": "sha512-ysT8mhdixWK6Hw3i1V2AeRqZ5WfXg1G43mqoYlM2nc6388Fq5jcXyr5mRsqViLx/GJYdoL0bfXD8nmF+Zn/Iow==", + "dev": true, + "license": "MIT" + }, + "node_modules/@tsconfig/node16": { + "version": "1.0.4", + "resolved": "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.4.tgz", + "integrity": "sha512-vxhUy4J8lyeyinH7Azl1pdd43GJhZH/tP2weN8TntQblOY+A0XbT8DJk1/oCPuOOyg/Ja757rG0CgHcWC8OfMA==", + "dev": true, + "license": "MIT" + }, + "node_modules/@types/node": { + "version": "22.15.21", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.15.21.tgz", + "integrity": "sha512-EV/37Td6c+MgKAbkcLG6vqZ2zEYHD7bvSrzqqs2RIhbA6w3x+Dqz8MZM3sP6kGTeLrdoOgKZe+Xja7tUB2DNkQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "undici-types": "~6.21.0" + } + }, + "node_modules/@types/papaparse": { + "version": "5.3.16", + "resolved": "https://registry.npmjs.org/@types/papaparse/-/papaparse-5.3.16.tgz", + "integrity": "sha512-T3VuKMC2H0lgsjI9buTB3uuKj3EMD2eap1MOuEQuBQ44EnDx/IkGhU6EwiTf9zG3za4SKlmwKAImdDKdNnCsXg==", + "dev": true, + "license": "MIT", + "dependencies": { + "@types/node": "*" + } + }, + "node_modules/acorn": { + "version": "8.14.1", + "resolved": "https://registry.npmjs.org/acorn/-/acorn-8.14.1.tgz", + "integrity": "sha512-OvQ/2pUDKmgfCg++xsTX1wGxfTaszcHVcTctW4UJB4hibJx2HXxxO5UmVgyjMa+ZDsiaf5wWLXYpRWMmBI0QHg==", + "dev": true, + "license": "MIT", + "bin": { + "acorn": "bin/acorn" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/acorn-walk": { + "version": "8.3.4", + "resolved": "https://registry.npmjs.org/acorn-walk/-/acorn-walk-8.3.4.tgz", + "integrity": "sha512-ueEepnujpqee2o5aIYnvHU6C0A42MNdsIDeqy5BydrkuC5R1ZuUFnm27EeFJGoEHJQgn3uleRvmTXaJgfXbt4g==", + "dev": true, + "license": "MIT", + "dependencies": { + "acorn": "^8.11.0" + }, + "engines": { + "node": ">=0.4.0" + } + }, + "node_modules/arg": { + "version": "4.1.3", + "resolved": "https://registry.npmjs.org/arg/-/arg-4.1.3.tgz", + "integrity": "sha512-58S9QDqG0Xx27YwPSt9fJxivjYl432YCwfDMfZ+71RAqUrZef7LrKQZ3LHLOwCS4FLNBplP533Zx895SeOCHvA==", + "dev": true, + "license": "MIT" + }, + "node_modules/create-require": { + "version": "1.1.1", + "resolved": "https://registry.npmjs.org/create-require/-/create-require-1.1.1.tgz", + "integrity": "sha512-dcKFX3jn0MpIaXjisoRvexIJVEKzaq7z2rZKxf+MSr9TkdmHmsU4m2lcLojrj/FHl8mk5VxMmYA+ftRkP/3oKQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/diff": { + "version": "4.0.2", + "resolved": "https://registry.npmjs.org/diff/-/diff-4.0.2.tgz", + "integrity": "sha512-58lmxKSA4BNyLz+HHMUzlOEpg09FV+ev6ZMe3vJihgdxzgcwZ8VoEEPmALCZG9LmqfVoNMMKpttIYTVG6uDY7A==", + "dev": true, + "license": "BSD-3-Clause", + "engines": { + "node": ">=0.3.1" + } + }, + "node_modules/fsevents": { + "version": "2.3.2", + "resolved": "https://registry.npmjs.org/fsevents/-/fsevents-2.3.2.tgz", + "integrity": "sha512-xiqMQR4xAeHTuB9uWm+fFRcIOgKBMiOBP+eXiyT7jsgVCq1bkVygt00oASowB7EdtpOHaaPgKt812P9ab+DDKA==", + "dev": true, + "hasInstallScript": true, + "license": "MIT", + "optional": true, + "os": [ + "darwin" + ], + "engines": { + "node": "^8.16.0 || ^10.6.0 || >=11.0.0" + } + }, + "node_modules/make-error": { + "version": "1.3.6", + "resolved": "https://registry.npmjs.org/make-error/-/make-error-1.3.6.tgz", + "integrity": "sha512-s8UhlNe7vPKomQhC1qFelMokr/Sc3AgNbso3n74mVPA5LTZwkB9NlXf4XPamLxJE8h0gh73rM94xvwRT2CVInw==", + "dev": true, + "license": "ISC" + }, + "node_modules/papaparse": { + "version": "5.5.3", + "resolved": "https://registry.npmjs.org/papaparse/-/papaparse-5.5.3.tgz", + "integrity": "sha512-5QvjGxYVjxO59MGU2lHVYpRWBBtKHnlIAcSe1uNFCkkptUh63NFRj0FJQm7nR67puEruUci/ZkjmEFrjCAyP4A==", + "dev": true, + "license": "MIT" + }, + "node_modules/playwright": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/playwright/-/playwright-1.52.0.tgz", + "integrity": "sha512-JAwMNMBlxJ2oD1kce4KPtMkDeKGHQstdpFPcPH3maElAXon/QZeTvtsfXmTMRyO9TslfoYOXkSsvao2nE1ilTw==", + "dev": true, + "license": "Apache-2.0", + "dependencies": { + "playwright-core": "1.52.0" + }, + "bin": { + "playwright": "cli.js" + }, + "engines": { + "node": ">=18" + }, + "optionalDependencies": { + "fsevents": "2.3.2" + } + }, + "node_modules/playwright-core": { + "version": "1.52.0", + "resolved": "https://registry.npmjs.org/playwright-core/-/playwright-core-1.52.0.tgz", + "integrity": "sha512-l2osTgLXSMeuLZOML9qYODUQoPPnUsKsb5/P6LJ2e6uPKXUdPK5WYhN4z03G+YNbWmGDY4YENauNu4ZKczreHg==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "playwright-core": "cli.js" + }, + "engines": { + "node": ">=18" + } + }, + "node_modules/ts-node": { + "version": "10.9.2", + "resolved": "https://registry.npmjs.org/ts-node/-/ts-node-10.9.2.tgz", + "integrity": "sha512-f0FFpIdcHgn8zcPSbf1dRevwt047YMnaiJM3u2w2RewrB+fob/zePZcrOyQoLMMO7aBIddLcQIEK5dYjkLnGrQ==", + "dev": true, + "license": "MIT", + "dependencies": { + "@cspotcode/source-map-support": "^0.8.0", + "@tsconfig/node10": "^1.0.7", + "@tsconfig/node12": "^1.0.7", + "@tsconfig/node14": "^1.0.0", + "@tsconfig/node16": "^1.0.2", + "acorn": "^8.4.1", + "acorn-walk": "^8.1.1", + "arg": "^4.1.0", + "create-require": "^1.1.0", + "diff": "^4.0.1", + "make-error": "^1.1.1", + "v8-compile-cache-lib": "^3.0.1", + "yn": "3.1.1" + }, + "bin": { + "ts-node": "dist/bin.js", + "ts-node-cwd": "dist/bin-cwd.js", + "ts-node-esm": "dist/bin-esm.js", + "ts-node-script": "dist/bin-script.js", + "ts-node-transpile-only": "dist/bin-transpile.js", + "ts-script": "dist/bin-script-deprecated.js" + }, + "peerDependencies": { + "@swc/core": ">=1.2.50", + "@swc/wasm": ">=1.2.50", + "@types/node": "*", + "typescript": ">=2.7" + }, + "peerDependenciesMeta": { + "@swc/core": { + "optional": true + }, + "@swc/wasm": { + "optional": true + } + } + }, + "node_modules/typescript": { + "version": "5.8.3", + "resolved": "https://registry.npmjs.org/typescript/-/typescript-5.8.3.tgz", + "integrity": "sha512-p1diW6TqL9L07nNxvRMM7hMMw4c5XOo/1ibL4aAIGmSAt9slTE1Xgw5KWuof2uTOvCg9BY7ZRi+GaF+7sfgPeQ==", + "dev": true, + "license": "Apache-2.0", + "bin": { + "tsc": "bin/tsc", + "tsserver": "bin/tsserver" + }, + "engines": { + "node": ">=14.17" + } + }, + "node_modules/undici-types": { + "version": "6.21.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.21.0.tgz", + "integrity": "sha512-iwDZqg0QAGrg9Rav5H4n0M64c3mkR59cJ6wQp+7C4nI0gsmExaedaYLNO44eT4AtBBwjbTiGPMlt2Md0T9H9JQ==", + "dev": true, + "license": "MIT" + }, + "node_modules/v8-compile-cache-lib": { + "version": "3.0.1", + "resolved": "https://registry.npmjs.org/v8-compile-cache-lib/-/v8-compile-cache-lib-3.0.1.tgz", + "integrity": "sha512-wa7YjyUGfNZngI/vtK0UHAN+lgDCxBPCylVXGp0zu59Fz5aiGtNXaq3DhIov063MorB+VfufLh3JlF2KdTK3xg==", + "dev": true, + "license": "MIT" + }, + "node_modules/yn": { + "version": "3.1.1", + "resolved": "https://registry.npmjs.org/yn/-/yn-3.1.1.tgz", + "integrity": "sha512-Ux4ygGWsu2c7isFWe8Yu1YluJmqVhxqK2cLXNQA5AcC3QfbGNpM7fu0Y8b/z16pXLnFxZYvWhd3fhBY9DLmC6Q==", + "dev": true, + "license": "MIT", + "engines": { + "node": ">=6" + } + } + } +} diff --git a/package.json b/package.json new file mode 100644 index 0000000..c4fa041 --- /dev/null +++ b/package.json @@ -0,0 +1,27 @@ +{ + "name": "concept-garant", + "version": "1.0.0", + "description": "", + "main": "index.js", + "scripts": { + "start": "playwright test", + "test": "echo \"Error: no test specified\" && exit 1" + }, + "repository": { + "type": "git", + "url": "http://dev.concept.ru:3000/iborisov/Concept-Garant.git" + }, + "keywords": [], + "author": "", + "license": "ISC", + "type": "commonjs", + "devDependencies": { + "@playwright/test": "^1.52.0", + "@types/node": "^22.15.21", + "@types/papaparse": "^5.3.16", + "papaparse": "^5.5.3", + "playwright": "^1.52.0", + "ts-node": "^10.9.2", + "typescript": "^5.8.3" + } +} diff --git a/playwright.config.ts b/playwright.config.ts new file mode 100644 index 0000000..2aa89e5 --- /dev/null +++ b/playwright.config.ts @@ -0,0 +1,18 @@ +import { defineConfig, devices } from "@playwright/test"; + +export default defineConfig({ + testDir: "src", + retries: 0, + reporter: "list", + fullyParallel: true, + projects: [ + { + name: "Desktop Chrome", + use: { ...devices["Desktop Chrome"] }, + }, + ], + use: { + baseURL: "https://internet.garant.ru/", + trace: "on-first-retry", + }, +}); diff --git a/src/scrape.spec.ts b/src/scrape.spec.ts new file mode 100644 index 0000000..9a633fd --- /dev/null +++ b/src/scrape.spec.ts @@ -0,0 +1,48 @@ +import { test, expect } from "@playwright/test"; +import fs from "fs/promises"; +import Papa from "papaparse"; + +const OUTPUT_FILE = "output.csv"; + +test("scrape data from example site", async ({ page }) => { + await page.goto("https://example.com"); + + const heading = await page.locator("h1").textContent(); + console.log({ heading }); + + expect(heading).toBeTruthy(); // optional check + + const data = await page.evaluate(() => { + const heading = document.querySelector("h1")?.textContent?.trim() ?? ""; + return { heading }; + }); + + // Wait for the list to load + await page.waitForSelector("ul.list"); + + const documents = await page.$$eval("ul.list > li", (items) => + items.map((li) => { + const anchor = li.querySelector("a"); + const href = anchor?.getAttribute("href") ?? ""; + + const nameEl = anchor?.querySelector(".name p"); + // Clone the node to strip tags + const cleanText = nameEl ? (nameEl.cloneNode(true) as HTMLElement) : null; + + if (cleanText) { + // Remove tags + cleanText.querySelectorAll("em").forEach((em) => em.remove()); + } + + const name = cleanText?.textContent?.trim() ?? ""; + + return { name, href }; + }) + ); + + // Convert to CSV + const csv = Papa.unparse(documents); + await fs.writeFile(OUTPUT_FILE, csv, "utf-8"); + + console.log("✅ Saved to documents.csv"); +}); diff --git a/test-results/.last-run.json b/test-results/.last-run.json new file mode 100644 index 0000000..cbcc1fb --- /dev/null +++ b/test-results/.last-run.json @@ -0,0 +1,4 @@ +{ + "status": "passed", + "failedTests": [] +} \ No newline at end of file diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000..1d33832 --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,12 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "CommonJS", + "strict": true, + "esModuleInterop": true, + "moduleResolution": "node", + "skipLibCheck": true, + "outDir": "dist" + }, + "include": ["src", "playwright.config.ts"] +}