ConceptPortal-public/rsconcept/backend/cctext/resolver.py

130 lines
4.3 KiB
Python
Raw Normal View History

''' Reference resolution API. '''
import re
from typing import cast, Optional
from dataclasses import dataclass
from .conceptapi import inflect_dependant
from .context import TermContext
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
_REF_ENTITY_PATTERN = re.compile(r'@{([^0-9\-].*?)\|.*?}')
def extract_entities(text: str) -> list[str]:
''' Extract list of entities that are referenced. '''
result: list[str] = []
for segment in re.finditer(_REF_ENTITY_PATTERN, text):
entity = segment.group(1)
if entity not in result:
result.append(entity)
return result
def resolve_entity(ref: EntityReference, context: TermContext) -> str:
''' Resolve entity reference. '''
alias = ref.entity
if alias not in context:
return f'!Неизвестная сущность: {alias}!'
2023-08-20 14:19:45 +03:00
resolved = context[alias].get_form(ref.form)
if resolved == '':
return f'!Отсутствует термин: {alias}!'
else:
return resolved
def resolve_syntactic(ref: SyntacticReference, index: int, allrefs: list['ResolvedReference']) -> str:
''' Resolve syntactic reference. '''
offset = ref.offset
mainref: Optional['ResolvedReference'] = None
if offset > 0:
index += 1
while index < len(allrefs):
if isinstance(allrefs[index].ref, EntityReference):
if offset == 1:
mainref = allrefs[index]
else:
offset -= 1
index += 1
else:
index -= 1
while index >= 0:
if isinstance(allrefs[index].ref, EntityReference):
if offset == -1:
mainref = allrefs[index]
else:
offset += 1
index -= 1
if mainref is None:
return f'!Некорректное смещение: {ref.offset}!'
return inflect_dependant(ref.nominal, mainref.resolved)
@dataclass
class Position:
''' 0-indexed contiguous segment position in text. '''
start: int = 0
finish: int = 0
@dataclass
class ResolvedReference:
''' Resolved reference data '''
ref: Reference
resolved: str = ''
pos_input: Position = Position()
pos_output: Position = Position()
class Resolver:
''' Text reference resolver '''
REFERENCE_PATTERN = re.compile(r'@{.*?}')
def __init__(self, context: TermContext):
self.context = context
self.refs = cast(list[ResolvedReference], [])
self.input = ''
self.output = ''
def resolve(self, text: str) -> str:
''' Resolve references in input text.
Note: data on references positions is accessed through class attributes '''
self._reset(text)
self._parse_refs()
if len(self.refs) == 0:
self.output = self.input
return self.output
else:
self._resolve_refs()
self._combine_output()
return self.output
def _reset(self, input_text: str):
self.refs = cast(list[ResolvedReference], [])
self.input = input_text
self.output = ''
def _parse_refs(self):
for segment in re.finditer(Resolver.REFERENCE_PATTERN, self.input):
parse = parse_reference(segment[0])
if parse is not None:
ref_info = ResolvedReference(ref=parse,
resolved='',
pos_input=Position(segment.start(0), segment.end(0)))
self.refs.append(ref_info)
def _resolve_refs(self):
for ref in self.refs:
if isinstance(ref.ref, EntityReference):
ref.resolved = resolve_entity(ref.ref, self.context)
for (index, ref) in enumerate(self.refs):
if isinstance(ref.ref, SyntacticReference):
ref.resolved = resolve_syntactic(ref.ref, index, self.refs)
def _combine_output(self):
pos_in = 0
for ref in self.refs:
self.output += self.input[pos_in : ref.pos_input.start]
self.output += ref.resolved
ref.pos_output = Position(len(self.output) - len(ref.resolved), len(self.output))
pos_in = ref.pos_input.finish
self.output += self.input[pos_in : len(self.input)]