'''BRE Portal API''' import csv import time import logging import warnings from colorama import init as color_init from colorama import Fore, Style from .selenium_wrapper import WebBrowser from .config import Config from .info_models import FieldType, FilterType from .uploader import GreatbookUploader from .data_reader import DataReader from .bre_browser_options import get_browser_options from .crypto import validate_password from .document import DocxTextProcessor warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl') def _log_start(): logging.info( 'Start time is ' + Style.BRIGHT + Fore.GREEN + '%s' + Style.RESET_ALL, time.strftime('%H:%M:%S') ) def _log_end(): logging.info( 'Done ... end time is ' + Style.BRIGHT + Fore.GREEN + '%s' + Style.RESET_ALL, time.strftime('%H:%M:%S') ) def _chunks(lst, n: int): """Yield successive n-sized chunks from lst.""" for i in range(0, len(lst), n): yield lst[i:i + n] def _format_for(status: str): if status in ['OK', 'NOT EXISTS']: return Style.BRIGHT + Fore.GREEN if status in ['EXCEPTION', 'FAIL', 'EXISTS', 'NO TASK']: return Style.BRIGHT + Fore.RED if status in ['ONLY TASK']: return Style.BRIGHT + Fore.BLUE return '' class PortalAPI: '''Main entrpoint to Portal''' def __init__(self, config: Config): color_init(autoreset=True) chrome = WebBrowser() chrome_options = get_browser_options(show_window=True) chrome.start_chrome(chrome_options) self.config = config self._browser = chrome self._debug = config['Options'].getboolean('Debug') self._loader = GreatbookUploader(chrome, config) self._reader = None self._output_tasks = None self._writer_tasks = None self._output_content = None self._writer_content = None self._document_processor = None def __del__(self): pass def validate(self, password: str) -> bool: '''Validate API status''' if not validate_password(password): return False if not self._loader.login(): return False return True def set_input(self, input_file: str) -> bool: '''Initialize input file''' self._reader = DataReader() if not self._reader.load(input_file): logging.error('Failed to access %s', input_file) return False return True def set_output_tasks(self, output_file: str) -> bool: '''Initialize output file''' self._output_tasks = open(output_file, 'w', newline='', encoding='utf-8') self._writer_tasks = csv.writer(self._output_tasks) return True def set_output_content(self, output_file: str) -> bool: '''Initialize output file''' self._output_content = open(output_file, 'w', newline='', encoding='utf-8') self._writer_content = csv.writer(self._output_content) return True def check_existence(self) -> int: '''Check existence of card-slots from Excel input''' _log_start() self._writer_tasks.writerow(['Слово', 'Статус', 'Текст', 'Идентификатор']) content_it = self._reader.get_content() while not content_it.is_done(): data = content_it.read_row() (content, status, has_text, task_id) = self._process_existence(data) self._write_task(content, status, has_text, task_id) content_it.next() self._output_tasks.close() _log_end() return 0 def import_cardslots(self) -> int: '''Check existence of card-slots from Excel input''' _log_start() self._writer_tasks.writerow(['Слово', 'Статус', 'Текст', 'Идентификатор']) content_it = self._reader.get_content() while not content_it.is_done(): data = content_it.read_row() attempts = 0 while attempts <= self.config['Options'].getint('CardslotRetries') + 1: if attempts > 0: logging.info('Retrying after failed attempt # %d...', attempts) (content, status, has_text, task_id) = self._process_cardslots(data) self._write_task(content, status, has_text, task_id) if status != 'FAILED': break attempts += 1 content_it.next() self._output_tasks.close() _log_end() return 0 def import_meta(self) -> int: '''Import content metadata''' _log_start() content_it = self._reader.get_content() while not content_it.is_done(): data = content_it.read_row() content = data[FieldType.content_name_db] attributes = self._reader.get_attributes_for(content) status = self._process_metadata(data, attributes) logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status) content_it.next() _log_end() return 0 def update_meta(self) -> int: '''Update content metadata''' _log_start() content_it = self._reader.get_content() while not content_it.is_done(): data = content_it.read_row() content = data[FieldType.content_name_db] attributes = self._reader.get_attributes_for(content) status = self._update_metadata(data, attributes) logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status) content_it.next() _log_end() return 0 def load_texts(self) -> int: '''Load content text''' _log_start() self._document_processor = DocxTextProcessor() content_it = self._reader.get_content() while not content_it.is_done(): data = content_it.read_row() content = data[FieldType.content_name] filename = self.config['AppData']['DocxFolder'] + '/' + content.upper() + '.docx' if not self._document_processor.process_document(filename): status = 'FAIL' else: authors = self._document_processor.authors text = self._document_processor.text bibliography = self._document_processor.bibliography status = self._load_content_text(content, authors, text, bibliography) content_it.next() logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status) _log_end() return 0 def export_tasks(self) -> int: '''Update content metadata''' _log_start() scanTasks = self.config['AppData']['ScanTasks'] == 'true' scanContent = self.config['AppData']['ScanContent'] == 'true' excluded = self.config['AppData']['ExcludeID'].split(';') if '' in excluded: excluded.remove('') included = self.config['AppData']['IncludeID'].split(';') if '' in included: included.remove('') logging.info('Excluded tasks: %s', len(excluded)) logging.info('Included tasks: %s', len(included)) logging.info('Scan tasks: %s', scanTasks) logging.info('Scan content: %s', scanContent) if scanTasks: logging.info('Loading tasks data...') filters = [] for filter_id in FilterType: filters.append([s.strip() for s in self.config['AppData'][filter_id.to_config()].split(';')]) data = self._loader.get_tasks_data(filters) logging.info('Loaded %s tasks', len(data)) self._writer_tasks.writerows(data) self._output_tasks.close() if not scanContent: _log_end() return 0 tasks_with_content = [ item[6] for item in filter( lambda x: x[0] in ['МИКРОПОНЯТИЕ', 'СТАТЬЯ', 'АКТУАЛИЗАЦИЯ СТАТЬИ ИЗ ЭВ БРЭ', 'СЮЖЕТ'] and x[1] not in ['Отменена'] and x[2] not in ['Нет Контента', 'Нет Медиа'] and x[6] not in excluded and x[6] not in included, data ) ] included = included + tasks_with_content chunks = list(_chunks(included, 50)) logging.info('Scanning %s content in %s bundles', len(included), len(chunks)) for index, tasks_bundle in enumerate(chunks): # Split in 50 bunches to ensure login is valid logging.info('%s: Processing bundle %s / %s', time.strftime('%H:%M:%S'), index + 1, len(chunks)) try: content = self._loader.get_tasks_content(tasks_bundle) self._writer_content.writerows(content) self._output_content.flush() except: # pylint: disable=bare-except logging.info('EXCEPTION during processing! Skipping bundle %s', index + 1) logging.info('\n'.join(tasks_bundle)) self._output_content.close() _log_end() return 0 def _write_task(self, content: str, status: str, has_text: bool, task_id: str): has_text_str = 'Да' if has_text else 'Нет' # pylint: disable=logging-not-lazy logging.info( '%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL + ' ... [%s] ... %s', content, status, has_text_str, task_id ) self._writer_tasks.writerow([content, status, has_text_str, task_id]) self._output_tasks.flush() def _process_existence(self, data: dict): content = data[FieldType.content_name_db] has_text = False try: task_id = self._loader.find_task_id(data[FieldType.task_name]) if self._loader.content_exists(content): status = 'EXISTS' has_text = self._loader.content_has_text(content) else: status = 'ONLY TASK' if task_id != '' else 'NOT EXISTS' except: # pylint: disable=bare-except if not self._debug: logging.exception('Got exception...') task_id = '' status = 'EXCEPTION' else: raise if content == 'Нет Контента': content = data[FieldType.task_name] return (content, status, has_text, task_id) def _process_cardslots(self, data: dict): content = data[FieldType.content_name_db] has_text = False try: task_id = self._loader.find_task_id(data[FieldType.task_name]) if self._loader.content_exists(content): status = 'EXISTS' has_text = self._loader.content_has_text(content) elif task_id != '': status = 'ONLY TASK' else: task_id = self._loader.create_task(data) status = 'OK' if task_id != '' else 'FAILED' except: # pylint: disable=bare-except if not self._debug: logging.exception('Got exception...') status = 'EXCEPTION' task_id = '' else: raise return (content, status, has_text, task_id) def _process_metadata(self, data: dict, attributes: list) -> str: try: task_id = self._loader.find_task_id(data[FieldType.task_name]) if task_id == '': return 'NO TASK' if self._loader.fill_metadata(task_id, data, attributes): return 'OK' else: return 'FAIL' except: # pylint: disable=bare-except if not self._debug: logging.exception('Got exception...') return 'EXCEPTION' else: raise def _update_metadata(self, data: dict, attributes: list) -> str: try: task_id = self._loader.find_task_id(data[FieldType.task_name]) if task_id == '': return 'NO TASK' if self._loader.update_metadata(task_id, data, attributes): return 'OK' else: return 'FAIL' except: # pylint: disable=bare-except if not self._debug: logging.exception('Got exception...') return 'EXCEPTION' else: raise def _load_content_text(self, content: str, authors, text: str, bibliography: str) -> str: try: if self._loader.load_content(content, authors, text, bibliography): return 'OK' else: return 'FAIL' except: # pylint: disable=bare-except if not self._debug: logging.exception('Got exception...') return 'EXCEPTION' else: raise