197 lines
6.2 KiB
Python
197 lines
6.2 KiB
Python
'''Reading data from Excel spreadsheets'''
|
|
from enum import IntEnum, unique
|
|
from datetime import datetime
|
|
|
|
import pandas
|
|
|
|
from .info_models import FieldType, InputMethod, text_to_method
|
|
|
|
_ContentFields = [
|
|
FieldType.task_type,
|
|
FieldType.status,
|
|
FieldType.content_name,
|
|
FieldType.skip,
|
|
FieldType.change_score,
|
|
FieldType.biblio_name,
|
|
FieldType.definition,
|
|
FieldType.is_immutable,
|
|
FieldType.object_type,
|
|
FieldType.markers,
|
|
FieldType.tags,
|
|
FieldType.author,
|
|
FieldType.supervisor,
|
|
FieldType.executor,
|
|
FieldType.task_manager,
|
|
FieldType.responsible,
|
|
FieldType.department,
|
|
FieldType.date_target,
|
|
FieldType.source,
|
|
FieldType.electron_bre,
|
|
FieldType.main_page,
|
|
FieldType.is_general,
|
|
FieldType.actualize_period,
|
|
FieldType.age_restriction,
|
|
FieldType.priority,
|
|
FieldType.article_type,
|
|
FieldType.date_exchange,
|
|
FieldType.date_ees1,
|
|
FieldType.date_ex_tools,
|
|
FieldType.date_ees2,
|
|
FieldType.expert,
|
|
FieldType.contract,
|
|
FieldType.comment,
|
|
FieldType.task_id,
|
|
FieldType.content_name_db,
|
|
FieldType.task_name
|
|
]
|
|
|
|
|
|
@unique
|
|
class _ContentColumns(IntEnum):
|
|
task_type = 0
|
|
status = 1
|
|
content_name = 2
|
|
change_score = 4
|
|
biblio_name = 5
|
|
definition = 6
|
|
is_immutable = 7
|
|
object_type = 8
|
|
markers = 9
|
|
tags = 10
|
|
author = 11
|
|
supervisor = 12
|
|
executor = 13
|
|
task_manager = 14
|
|
responsible = 15
|
|
department = 16
|
|
date_target = 17
|
|
source = 18
|
|
electron_bre = 19
|
|
main_page = 20
|
|
is_general = 21
|
|
actualize_period = 22
|
|
age_restriction = 23
|
|
priority = 24
|
|
article_type = 25
|
|
date_exchange = 26
|
|
date_ees1 = 27
|
|
date_ex_tools = 28
|
|
date_ees2 = 29
|
|
expert = 30
|
|
contract = 31
|
|
comment = 32
|
|
task_id = 33
|
|
content_name_db = 34
|
|
task_name = 35
|
|
|
|
def to_field(self) -> FieldType:
|
|
'''Transform metadata column to FieldType'''
|
|
return _ContentFields[self.value]
|
|
|
|
|
|
def _get_task_name(content_name: str, is_immutable: bool) -> str:
|
|
UNMUTABLE_TEMPLATE = 'Неизменные {} (библиография+корректура+транскрипция)'
|
|
if not is_immutable:
|
|
return content_name
|
|
else:
|
|
return UNMUTABLE_TEMPLATE.format(content_name)
|
|
|
|
|
|
def _drop_from_nan(target: pandas.DataFrame) -> pandas.DataFrame:
|
|
rows_with_nan = [index for index, row in target.iterrows() if pandas.isna(row.iloc[0])]
|
|
if len(rows_with_nan) > 0:
|
|
return target[:rows_with_nan[0]]
|
|
else:
|
|
return target
|
|
|
|
|
|
class ContentIterator:
|
|
'''Iterates over metadata sheet rows'''
|
|
def __init__(self, data: pandas.DataFrame):
|
|
self._data = data
|
|
self._row = 0
|
|
self._count = len(self._data.index)
|
|
|
|
def __del__(self):
|
|
pass
|
|
|
|
def is_done(self) -> bool:
|
|
'''Indicates end of iteration'''
|
|
return self._row >= self._count
|
|
|
|
def next(self) -> bool:
|
|
'''Iteration'''
|
|
if self.is_done():
|
|
return False
|
|
self._row = self._row + 1
|
|
return True
|
|
|
|
def read_row(self) -> dict:
|
|
'''Data access'''
|
|
data = {}
|
|
for column in _ContentColumns:
|
|
if not pandas.isna(self._data.iat[self._row, column]):
|
|
field = column.to_field()
|
|
value = self._data.iat[self._row, column]
|
|
if field.input_method() == InputMethod.combo_dialog or \
|
|
field.input_method() == InputMethod.combo_dialog_simple_list:
|
|
data[field] = list(filter(None, [element.strip() for element in value.split(';')]))
|
|
elif isinstance(value, str):
|
|
data[field] = value.strip()
|
|
elif isinstance(value, pandas.Timestamp):
|
|
data[field] = value.strftime('%d.%m.%Y')
|
|
elif isinstance(value, datetime):
|
|
data[field] = value.strftime('%d.%m.%Y')
|
|
else:
|
|
data[field] = value
|
|
|
|
if FieldType.is_immutable in data:
|
|
data[FieldType.is_immutable] = data[FieldType.is_immutable] == 'Да'
|
|
if FieldType.electron_bre in data:
|
|
data[FieldType.electron_bre] = data[FieldType.electron_bre] == 'Да'
|
|
if FieldType.main_page in data:
|
|
data[FieldType.main_page] = data[FieldType.main_page] == 'Да'
|
|
if FieldType.is_general in data:
|
|
data[FieldType.is_general] = data[FieldType.is_general] == 'Да'
|
|
if FieldType.content_name_db not in data:
|
|
data[FieldType.content_name_db] = data[FieldType.content_name]
|
|
if FieldType.task_name not in data:
|
|
is_immutable = FieldType.is_immutable in data and data[FieldType.is_immutable]
|
|
data[FieldType.task_name] = _get_task_name(data[FieldType.content_name], is_immutable)
|
|
if FieldType.department in data:
|
|
data[FieldType.department] = 'Редакция ' + data[FieldType.department]
|
|
data[FieldType.editorial] = data[FieldType.department]
|
|
if FieldType.article_type in data:
|
|
data[FieldType.article_type][0] = data[FieldType.article_type][0] + ' статья'
|
|
return data
|
|
|
|
|
|
class DataReader:
|
|
'''BRE data reader for Excel'''
|
|
_SHEET_CONTENT = 'Контент'
|
|
_SHEET_ATTRIBUTES = 'Признаки'
|
|
|
|
def __init__(self):
|
|
self._xls = None
|
|
self._content = None
|
|
self._attributes = None
|
|
|
|
def load(self, input_file: str) -> bool:
|
|
'''Load file'''
|
|
try:
|
|
self._xls = pandas.ExcelFile(input_file)
|
|
self._content = _drop_from_nan(pandas.read_excel(self._xls, DataReader._SHEET_CONTENT))
|
|
self._attributes = _drop_from_nan(pandas.read_excel(self._xls, DataReader._SHEET_ATTRIBUTES))
|
|
except (FileNotFoundError, ValueError):
|
|
return False
|
|
return True
|
|
|
|
def get_content(self) -> ContentIterator:
|
|
'''Return iterator for cards'''
|
|
return ContentIterator(self._content)
|
|
|
|
def get_attributes_for(self, content_name) -> list:
|
|
'''Return attributes list for specific content'''
|
|
filtered = self._attributes.loc[self._attributes['Название контента'] == content_name]
|
|
return [(row[1], text_to_method(row[3]), row[2]) for index, row in filtered.iterrows()]
|