'''Reading data from Excel spreadsheets''' from enum import IntEnum, unique from datetime import datetime import pandas from .info_models import FieldType, InputMethod, text_to_method _ContentFields = [ FieldType.task_type, FieldType.status, FieldType.content_name, FieldType.skip, FieldType.change_score, FieldType.biblio_name, FieldType.definition, FieldType.is_immutable, FieldType.object_type, FieldType.markers, FieldType.tags, FieldType.author, FieldType.supervisor, FieldType.executor, FieldType.task_manager, FieldType.responsible, FieldType.department, FieldType.date_target, FieldType.source, FieldType.electron_bre, FieldType.main_page, FieldType.is_general, FieldType.actualize_period, FieldType.age_restriction, FieldType.priority, FieldType.article_type, FieldType.date_exchange, FieldType.date_ees1, FieldType.date_ex_tools, FieldType.date_ees2, FieldType.expert, FieldType.contract, FieldType.comment, FieldType.task_id, FieldType.content_name_db, FieldType.task_name ] @unique class _ContentColumns(IntEnum): task_type = 0 status = 1 content_name = 2 change_score = 4 biblio_name = 5 definition = 6 is_immutable = 7 object_type = 8 markers = 9 tags = 10 author = 11 supervisor = 12 executor = 13 task_manager = 14 responsible = 15 department = 16 date_target = 17 source = 18 electron_bre = 19 main_page = 20 is_general = 21 actualize_period = 22 age_restriction = 23 priority = 24 article_type = 25 date_exchange = 26 date_ees1 = 27 date_ex_tools = 28 date_ees2 = 29 expert = 30 contract = 31 comment = 32 task_id = 33 content_name_db = 34 task_name = 35 def to_field(self) -> FieldType: '''Transform metadata column to FieldType''' return _ContentFields[self.value] def _get_task_name(content_name: str, is_immutable: bool) -> str: UNMUTABLE_TEMPLATE = 'Неизменные {} (библиография+корректура+транскрипция)' if not is_immutable: return content_name else: return UNMUTABLE_TEMPLATE.format(content_name) def _drop_from_nan(target: pandas.DataFrame) -> pandas.DataFrame: rows_with_nan = [index for index, row in target.iterrows() if pandas.isna(row.iloc[0])] if len(rows_with_nan) > 0: return target[:rows_with_nan[0]] else: return target class ContentIterator: '''Iterates over metadata sheet rows''' def __init__(self, data: pandas.DataFrame): self._data = data self._row = 0 self._count = len(self._data.index) def __del__(self): pass def is_done(self) -> bool: '''Indicates end of iteration''' return self._row >= self._count def next(self) -> bool: '''Iteration''' if self.is_done(): return False self._row = self._row + 1 return True def read_row(self) -> dict: '''Data access''' data = {} for column in _ContentColumns: if not pandas.isna(self._data.iat[self._row, column]): field = column.to_field() value = self._data.iat[self._row, column] if field.input_method() == InputMethod.combo_dialog or \ field.input_method() == InputMethod.combo_dialog_simple_list: data[field] = list(filter(None, [element.strip() for element in value.split(';')])) elif isinstance(value, str): data[field] = value.strip() elif isinstance(value, pandas.Timestamp): data[field] = value.strftime('%d.%m.%Y') elif isinstance(value, datetime): data[field] = value.strftime('%d.%m.%Y') else: data[field] = value if FieldType.is_immutable in data: data[FieldType.is_immutable] = data[FieldType.is_immutable] == 'Да' if FieldType.electron_bre in data: data[FieldType.electron_bre] = data[FieldType.electron_bre] == 'Да' if FieldType.main_page in data: data[FieldType.main_page] = data[FieldType.main_page] == 'Да' if FieldType.is_general in data: data[FieldType.is_general] = data[FieldType.is_general] == 'Да' if FieldType.content_name_db not in data: data[FieldType.content_name_db] = data[FieldType.content_name] if FieldType.task_name not in data: is_immutable = FieldType.is_immutable in data and data[FieldType.is_immutable] data[FieldType.task_name] = _get_task_name(data[FieldType.content_name], is_immutable) if FieldType.department in data: data[FieldType.department] = 'Редакция ' + data[FieldType.department] data[FieldType.editorial] = data[FieldType.department] if FieldType.article_type in data: data[FieldType.article_type][0] = data[FieldType.article_type][0] + ' статья' return data class DataReader: '''BRE data reader for Excel''' _SHEET_CONTENT = 'Контент' _SHEET_ATTRIBUTES = 'Признаки' def __init__(self): self._xls = None self._content = None self._attributes = None def load(self, input_file: str) -> bool: '''Load file''' try: self._xls = pandas.ExcelFile(input_file) self._content = _drop_from_nan(pandas.read_excel(self._xls, DataReader._SHEET_CONTENT)) self._attributes = _drop_from_nan(pandas.read_excel(self._xls, DataReader._SHEET_ATTRIBUTES)) except (FileNotFoundError, ValueError): return False return True def get_content(self) -> ContentIterator: '''Return iterator for cards''' return ContentIterator(self._content) def get_attributes_for(self, content_name) -> list: '''Return attributes list for specific content''' filtered = self._attributes.loc[self._attributes['Название контента'] == content_name] return [(row[1], text_to_method(row[3]), row[2]) for index, row in filtered.iterrows()]