348 lines
13 KiB
Python
348 lines
13 KiB
Python
'''BRE Portal API'''
|
|
import csv
|
|
import time
|
|
import logging
|
|
import warnings
|
|
|
|
from colorama import init as color_init
|
|
from colorama import Fore, Style
|
|
|
|
from .selenium_wrapper import WebBrowser
|
|
from .config import Config
|
|
from .info_models import FieldType, FilterType
|
|
from .uploader import GreatbookUploader
|
|
from .data_reader import DataReader
|
|
from .bre_browser_options import get_browser_options
|
|
from .crypto import validate_password
|
|
|
|
from .document import DocxTextProcessor
|
|
|
|
warnings.filterwarnings('ignore', category=UserWarning, module='openpyxl')
|
|
|
|
|
|
def _log_start():
|
|
logging.info(
|
|
'Start time is ' + Style.BRIGHT + Fore.GREEN + '%s' + Style.RESET_ALL,
|
|
time.strftime('%H:%M:%S')
|
|
)
|
|
|
|
|
|
def _log_end():
|
|
logging.info(
|
|
'Done ... end time is ' + Style.BRIGHT + Fore.GREEN + '%s' + Style.RESET_ALL,
|
|
time.strftime('%H:%M:%S')
|
|
)
|
|
|
|
|
|
def _chunks(lst, n: int):
|
|
"""Yield successive n-sized chunks from lst."""
|
|
for i in range(0, len(lst), n):
|
|
yield lst[i:i + n]
|
|
|
|
|
|
def _format_for(status: str):
|
|
if status in ['OK', 'NOT EXISTS']:
|
|
return Style.BRIGHT + Fore.GREEN
|
|
if status in ['EXCEPTION', 'FAIL', 'EXISTS', 'NO TASK']:
|
|
return Style.BRIGHT + Fore.RED
|
|
if status in ['ONLY TASK']:
|
|
return Style.BRIGHT + Fore.BLUE
|
|
return ''
|
|
|
|
|
|
class PortalAPI:
|
|
'''Main entrpoint to Portal'''
|
|
def __init__(self, config: Config):
|
|
color_init(autoreset=True)
|
|
chrome = WebBrowser()
|
|
chrome_options = get_browser_options(show_window=True)
|
|
chrome.start_chrome(chrome_options)
|
|
|
|
self.config = config
|
|
self._browser = chrome
|
|
self._debug = config['Options'].getboolean('Debug')
|
|
self._loader = GreatbookUploader(chrome, config)
|
|
|
|
self._reader = None
|
|
self._output_tasks = None
|
|
self._writer_tasks = None
|
|
self._output_content = None
|
|
self._writer_content = None
|
|
|
|
self._document_processor = None
|
|
|
|
def __del__(self):
|
|
pass
|
|
|
|
def validate(self, password: str) -> bool:
|
|
'''Validate API status'''
|
|
if not validate_password(password):
|
|
return False
|
|
if not self._loader.login():
|
|
return False
|
|
return True
|
|
|
|
def set_input(self, input_file: str) -> bool:
|
|
'''Initialize input file'''
|
|
self._reader = DataReader()
|
|
if not self._reader.load(input_file):
|
|
logging.error('Failed to access %s', input_file)
|
|
return False
|
|
return True
|
|
|
|
def set_output_tasks(self, output_file: str) -> bool:
|
|
'''Initialize output file'''
|
|
self._output_tasks = open(output_file, 'w', newline='', encoding='utf-8')
|
|
self._writer_tasks = csv.writer(self._output_tasks)
|
|
return True
|
|
|
|
def set_output_content(self, output_file: str) -> bool:
|
|
'''Initialize output file'''
|
|
self._output_content = open(output_file, 'w', newline='', encoding='utf-8')
|
|
self._writer_content = csv.writer(self._output_content)
|
|
return True
|
|
|
|
def check_existence(self) -> int:
|
|
'''Check existence of card-slots from Excel input'''
|
|
_log_start()
|
|
self._writer_tasks.writerow(['Слово', 'Статус', 'Текст', 'Идентификатор'])
|
|
content_it = self._reader.get_content()
|
|
while not content_it.is_done():
|
|
data = content_it.read_row()
|
|
(content, status, has_text, task_id) = self._process_existence(data)
|
|
self._write_task(content, status, has_text, task_id)
|
|
content_it.next()
|
|
self._output_tasks.close()
|
|
_log_end()
|
|
return 0
|
|
|
|
def import_cardslots(self) -> int:
|
|
'''Check existence of card-slots from Excel input'''
|
|
_log_start()
|
|
self._writer_tasks.writerow(['Слово', 'Статус', 'Текст', 'Идентификатор'])
|
|
content_it = self._reader.get_content()
|
|
while not content_it.is_done():
|
|
data = content_it.read_row()
|
|
attempts = 0
|
|
while attempts <= self.config['Options'].getint('CardslotRetries') + 1:
|
|
if attempts > 0:
|
|
logging.info('Retrying after failed attempt # %d...', attempts)
|
|
(content, status, has_text, task_id) = self._process_cardslots(data)
|
|
self._write_task(content, status, has_text, task_id)
|
|
if status != 'FAILED':
|
|
break
|
|
attempts += 1
|
|
content_it.next()
|
|
self._output_tasks.close()
|
|
_log_end()
|
|
return 0
|
|
|
|
def import_meta(self) -> int:
|
|
'''Import content metadata'''
|
|
_log_start()
|
|
content_it = self._reader.get_content()
|
|
while not content_it.is_done():
|
|
data = content_it.read_row()
|
|
content = data[FieldType.content_name_db]
|
|
attributes = self._reader.get_attributes_for(content)
|
|
status = self._process_metadata(data, attributes)
|
|
logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status)
|
|
content_it.next()
|
|
_log_end()
|
|
return 0
|
|
|
|
def update_meta(self) -> int:
|
|
'''Update content metadata'''
|
|
_log_start()
|
|
content_it = self._reader.get_content()
|
|
while not content_it.is_done():
|
|
data = content_it.read_row()
|
|
content = data[FieldType.content_name_db]
|
|
attributes = self._reader.get_attributes_for(content)
|
|
status = self._update_metadata(data, attributes)
|
|
logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status)
|
|
content_it.next()
|
|
_log_end()
|
|
return 0
|
|
|
|
def load_texts(self) -> int:
|
|
'''Load content text'''
|
|
_log_start()
|
|
self._document_processor = DocxTextProcessor()
|
|
content_it = self._reader.get_content()
|
|
while not content_it.is_done():
|
|
data = content_it.read_row()
|
|
content = data[FieldType.content_name]
|
|
filename = self.config['AppData']['DocxFolder'] + '/' + content.upper() + '.docx'
|
|
if not self._document_processor.process_document(filename):
|
|
status = 'FAIL'
|
|
else:
|
|
authors = self._document_processor.authors
|
|
text = self._document_processor.text
|
|
bibliography = self._document_processor.bibliography
|
|
status = self._load_content_text(content, authors, text, bibliography)
|
|
content_it.next()
|
|
logging.info('%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL, content, status)
|
|
_log_end()
|
|
return 0
|
|
|
|
def export_tasks(self) -> int:
|
|
'''Update content metadata'''
|
|
_log_start()
|
|
scanTasks = self.config['AppData']['ScanTasks'] == 'true'
|
|
scanContent = self.config['AppData']['ScanContent'] == 'true'
|
|
excluded = self.config['AppData']['ExcludeID'].split(';')
|
|
if '' in excluded:
|
|
excluded.remove('')
|
|
included = self.config['AppData']['IncludeID'].split(';')
|
|
if '' in included:
|
|
included.remove('')
|
|
|
|
logging.info('Excluded tasks: %s', len(excluded))
|
|
logging.info('Included tasks: %s', len(included))
|
|
logging.info('Scan tasks: %s', scanTasks)
|
|
logging.info('Scan content: %s', scanContent)
|
|
|
|
if scanTasks:
|
|
logging.info('Loading tasks data...')
|
|
|
|
filters = []
|
|
for filter_id in FilterType:
|
|
filters.append([s.strip() for s in self.config['AppData'][filter_id.to_config()].split(';')])
|
|
|
|
data = self._loader.get_tasks_data(filters)
|
|
logging.info('Loaded %s tasks', len(data))
|
|
self._writer_tasks.writerows(data)
|
|
self._output_tasks.close()
|
|
|
|
if not scanContent:
|
|
_log_end()
|
|
return 0
|
|
|
|
tasks_with_content = [
|
|
item[6] for item in
|
|
filter(
|
|
lambda x: x[0] in ['МИКРОПОНЯТИЕ', 'СТАТЬЯ', 'АКТУАЛИЗАЦИЯ СТАТЬИ ИЗ ЭВ БРЭ', 'СЮЖЕТ']
|
|
and x[1] not in ['Отменена']
|
|
and x[2] not in ['Нет Контента', 'Нет Медиа']
|
|
and x[6] not in excluded
|
|
and x[6] not in included,
|
|
data
|
|
)
|
|
]
|
|
included = included + tasks_with_content
|
|
|
|
chunks = list(_chunks(included, 50))
|
|
logging.info('Scanning %s content in %s bundles', len(included), len(chunks))
|
|
for index, tasks_bundle in enumerate(chunks): # Split in 50 bunches to ensure login is valid
|
|
logging.info('%s: Processing bundle %s / %s', time.strftime('%H:%M:%S'), index + 1, len(chunks))
|
|
try:
|
|
content = self._loader.get_tasks_content(tasks_bundle)
|
|
self._writer_content.writerows(content)
|
|
self._output_content.flush()
|
|
except: # pylint: disable=bare-except
|
|
logging.info('EXCEPTION during processing! Skipping bundle %s', index + 1)
|
|
logging.info('\n'.join(tasks_bundle))
|
|
self._output_content.close()
|
|
_log_end()
|
|
return 0
|
|
|
|
def _write_task(self, content: str, status: str, has_text: bool, task_id: str):
|
|
has_text_str = 'Да' if has_text else 'Нет'
|
|
# pylint: disable=logging-not-lazy
|
|
logging.info(
|
|
'%s ... ' + _format_for(status) + '[%s]' + Style.RESET_ALL + ' ... [%s] ... %s',
|
|
content, status, has_text_str, task_id
|
|
)
|
|
self._writer_tasks.writerow([content, status, has_text_str, task_id])
|
|
self._output_tasks.flush()
|
|
|
|
def _process_existence(self, data: dict):
|
|
content = data[FieldType.content_name_db]
|
|
has_text = False
|
|
try:
|
|
task_id = self._loader.find_task_id(data[FieldType.task_name])
|
|
if self._loader.content_exists(content):
|
|
status = 'EXISTS'
|
|
has_text = self._loader.content_has_text(content)
|
|
else:
|
|
status = 'ONLY TASK' if task_id != '' else 'NOT EXISTS'
|
|
except: # pylint: disable=bare-except
|
|
if not self._debug:
|
|
logging.exception('Got exception...')
|
|
task_id = ''
|
|
status = 'EXCEPTION'
|
|
else:
|
|
raise
|
|
if content == 'Нет Контента':
|
|
content = data[FieldType.task_name]
|
|
return (content, status, has_text, task_id)
|
|
|
|
def _process_cardslots(self, data: dict):
|
|
content = data[FieldType.content_name_db]
|
|
has_text = False
|
|
try:
|
|
task_id = self._loader.find_task_id(data[FieldType.task_name])
|
|
if self._loader.content_exists(content):
|
|
status = 'EXISTS'
|
|
has_text = self._loader.content_has_text(content)
|
|
elif task_id != '':
|
|
status = 'ONLY TASK'
|
|
else:
|
|
task_id = self._loader.create_task(data)
|
|
status = 'OK' if task_id != '' else 'FAILED'
|
|
except: # pylint: disable=bare-except
|
|
if not self._debug:
|
|
logging.exception('Got exception...')
|
|
status = 'EXCEPTION'
|
|
task_id = ''
|
|
else:
|
|
raise
|
|
return (content, status, has_text, task_id)
|
|
|
|
def _process_metadata(self, data: dict, attributes: list) -> str:
|
|
try:
|
|
task_id = self._loader.find_task_id(data[FieldType.task_name])
|
|
if task_id == '':
|
|
return 'NO TASK'
|
|
if self._loader.fill_metadata(task_id, data, attributes):
|
|
return 'OK'
|
|
else:
|
|
return 'FAIL'
|
|
except: # pylint: disable=bare-except
|
|
if not self._debug:
|
|
logging.exception('Got exception...')
|
|
return 'EXCEPTION'
|
|
else:
|
|
raise
|
|
|
|
def _update_metadata(self, data: dict, attributes: list) -> str:
|
|
try:
|
|
task_id = self._loader.find_task_id(data[FieldType.task_name])
|
|
if task_id == '':
|
|
return 'NO TASK'
|
|
if self._loader.update_metadata(task_id, data, attributes):
|
|
return 'OK'
|
|
else:
|
|
return 'FAIL'
|
|
except: # pylint: disable=bare-except
|
|
if not self._debug:
|
|
logging.exception('Got exception...')
|
|
return 'EXCEPTION'
|
|
else:
|
|
raise
|
|
|
|
def _load_content_text(self, content: str, authors, text: str, bibliography: str) -> str:
|
|
try:
|
|
|
|
if self._loader.load_content(content, authors, text, bibliography):
|
|
return 'OK'
|
|
else:
|
|
return 'FAIL'
|
|
except: # pylint: disable=bare-except
|
|
if not self._debug:
|
|
logging.exception('Got exception...')
|
|
return 'EXCEPTION'
|
|
else:
|
|
raise
|