From 4927837ac661ab5eeaed353861a236d8a6a7e4ae Mon Sep 17 00:00:00 2001 From: Ivan <8611739+IRBorisov@users.noreply.github.com> Date: Fri, 23 May 2025 20:37:41 +0300 Subject: [PATCH] F: Improve features --- src/save-auth.ts | 36 +++++++++++++++++++----------------- src/scrape.spec.ts | 17 +++++++++++++---- src/search.ts | 3 +-- 3 files changed, 33 insertions(+), 23 deletions(-) diff --git a/src/save-auth.ts b/src/save-auth.ts index a7f4803..75b54b8 100644 --- a/src/save-auth.ts +++ b/src/save-auth.ts @@ -5,8 +5,8 @@ import { USER_DATA_DIR } from './setup'; const IN_LOGIN = 'iborisov@acconcept.ru'; const IN_PASSWORD = 'PuNS8br2'; -// const IN_LOGIN = "dm9175025694"; -// const IN_PASSWORD = "52547"; +// const IN_LOGIN = 'dm9175025694'; +// const IN_PASSWORD = '52547'; (async () => { // Launch persistent context with visible browser @@ -14,23 +14,25 @@ const IN_PASSWORD = 'PuNS8br2'; headless: false }); - const page = await context.newPage(); + try { + const page = context.pages().length > 0 ? context.pages()[0] : await context.newPage(); - await page.goto('https://account.garant.ru/login'); + await page.goto('https://account.garant.ru/login'); - await page.getByRole('textbox').fill(IN_LOGIN); - await page.getByRole('button', { name: 'Войти' }).click(); - await page.getByRole('textbox').fill(IN_PASSWORD); - await page.getByRole('button', { name: 'Войти' }).click(); + await page.getByRole('textbox').fill(IN_LOGIN); + await page.getByRole('button', { name: 'Войти' }).click(); + await page.getByRole('textbox').fill(IN_PASSWORD); + await page.getByRole('button', { name: 'Войти' }).click(); - await page - .getByRole('button', { - name: 'Выйти из текущего сеанса и сменить пользователя' - }) - .waitFor(); + await page + .getByRole('button', { + name: 'Выйти из текущего сеанса и сменить пользователя' + }) + .waitFor(); - // Save storage state to file - await fs.writeFile('auth.json', JSON.stringify(await page.context().storageState())); - - console.log('✅ Saved login state to auth.json'); + await fs.writeFile('auth.json', JSON.stringify(await page.context().storageState())); + console.log('✅ Saved login state to auth.json'); + } finally { + await context.close(); + } })(); diff --git a/src/scrape.spec.ts b/src/scrape.spec.ts index 73e6a09..1aff85b 100644 --- a/src/scrape.spec.ts +++ b/src/scrape.spec.ts @@ -5,12 +5,12 @@ import { test } from './setup'; import { executeSearch } from './search'; /** Input text prompt */ -const IN_TEXT_PROMPT = 'Министерство труда, занятости и социальной защиты Республики Коми'; +const IN_TEXT_PROMPT = 'Представительство Республики Коми в Северо-Западном регионе Российской Федерации'; const IN_TITLE_PROMPT = ''; /** Output file naming */ const OUT_NAME = 'output'; -const OUT_SUFFIX = '_1'; +const OUT_SUFFIX = '_22'; const OUT_FILENAME = `output/${OUT_NAME}${OUT_SUFFIX}.csv`; interface DocumentInfo { @@ -31,6 +31,7 @@ test('scrape documents list', async ({ page }) => { }); const documents = await readData(page); + if (documents.length === 0) { console.log('No data found'); return; @@ -55,11 +56,19 @@ async function readData(page: Page): Promise { const cleanText = nameEl ? (nameEl.cloneNode(true) as HTMLElement) : null; if (cleanText) { - cleanText.querySelectorAll('em').forEach(em => em.remove()); + cleanText.querySelectorAll('em').forEach(em => { + const parent = em.parentNode; + if (parent) { + while (em.firstChild) { + parent.insertBefore(em.firstChild, em); + } + parent.removeChild(em); + } + }); } const name = cleanText?.textContent?.trim() ?? ''; - return { name: name, href: href }; + return { name: name, href: 'https://internet.garant.ru' + href }; }) .filter(item => !!item.href && !!item.name) ); diff --git a/src/search.ts b/src/search.ts index b126e00..c84c5b4 100644 --- a/src/search.ts +++ b/src/search.ts @@ -4,7 +4,6 @@ const GLOBAL_FILTERS = [ 'Федеральные министерства и ведомства', 'Правительство России и СССР', 'Президент России и СССР', - 'Органы судебной власти РФ и СССР', 'Органы законодательной власти России и СССР' ] as const; @@ -88,8 +87,8 @@ export async function executeSearch(page: Page, options: SearchOptions) { await listItem.locator('img').nth(1).click(); } } - await page.getByRole('button', { name: 'Выбрать', exact: true }).click(); + await page.getByRole('button', { name: 'Найти (Enter)' }).click(); }