Update build process and remove cctext inlining

This commit is contained in:
IRBorisov 2024-06-14 18:33:34 +03:00
parent 2796ffd5f3
commit a210de14ff
14 changed files with 39 additions and 1131 deletions

View File

@ -84,7 +84,7 @@
</PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<ClCompile>
<AdditionalIncludeDirectories>include;header;src\pch;..\ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>include;header;src\pch;..\GH-ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>POCO_STATIC; NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
@ -101,7 +101,7 @@
<BuildStlModules>false</BuildStlModules>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>lib\x86;..\ConceptCore\output\lib\x86;..\OfficeOLE\output\lib\x86;C:\Program Files (x86)\Python312-32\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>lib\x86;..\GH-ConceptCore\output\lib\x86;..\OfficeOLE\output\lib\x86;C:\Program Files (x86)\Python312-32\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>Version.lib;OfficeOLE.lib;ConceptCoreLibrary.lib;RSlang.lib;cclGraph.lib;cclLang.lib;oldnames.lib;Htmlhelp.Lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -121,8 +121,7 @@
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "import\cctext\" "$(OutDir)\cctext"
xcopy /y /s /q /i "distr\app" "$(OutDir)"
<Command>xcopy /y /s /q /i "distr\app" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312.dll" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312.pdb" "$(OutDir)"
if not exist bin\x86 mkdir bin\x86
@ -131,7 +130,7 @@ copy "$(OutDir)Exteor.exe" "bin\x86\Exteor.exe"</Command>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<ClCompile>
<AdditionalIncludeDirectories>include;header;src\pch;..\ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>include;header;src\pch;..\GH-ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>POCO_STATIC;NDEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<PrecompiledHeader>Use</PrecompiledHeader>
<WarningLevel>Level4</WarningLevel>
@ -148,7 +147,7 @@ copy "$(OutDir)Exteor.exe" "bin\x86\Exteor.exe"</Command>
<BuildStlModules>false</BuildStlModules>
</ClCompile>
<Link>
<AdditionalLibraryDirectories>lib\x64;..\ConceptCore\output\lib\x64;..\OfficeOLE\output\lib\x64;C:\Program Files\Python312\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>lib\x64;..\GH-ConceptCore\output\lib\x64;..\OfficeOLE\output\lib\x64;C:\Program Files\Python312\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>Version.lib;OfficeOLE.lib;ConceptCoreLibrary.lib;RSlang.lib;cclGraph.lib;cclLang.lib;oldnames.lib;Htmlhelp.Lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<SubSystem>Windows</SubSystem>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
@ -168,8 +167,7 @@ copy "$(OutDir)Exteor.exe" "bin\x86\Exteor.exe"</Command>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "import\cctext" "$(OutDir)\cctext\"
xcopy /y /s /q /i "distr\app" "$(OutDir)"
<Command>xcopy /y /s /q /i "distr\app" "$(OutDir)"
if not exist bin\x64 mkdir bin\x64
copy "$(OutDir)Exteor.exe" "bin\x64\Exteor.exe"</Command>
</PostBuildEvent>
@ -182,7 +180,7 @@ copy "$(OutDir)Exteor.exe" "bin\x64\Exteor.exe"</Command>
<SDLCheck>true</SDLCheck>
<LanguageStandard>stdcpplatest</LanguageStandard>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>include;header;src\pch;..\ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>include;header;src\pch;..\GH-ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>POCO_STATIC;_DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
@ -191,7 +189,7 @@ copy "$(OutDir)Exteor.exe" "bin\x64\Exteor.exe"</Command>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<AdditionalLibraryDirectories>lib\x86;..\ConceptCore\output\lib\x86;..\OfficeOLE\output\lib\x86;C:\Program Files (x86)\Python312-32\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>lib\x86;..\GH-ConceptCore\output\lib\x86;..\OfficeOLE\output\lib\x86;C:\Program Files (x86)\Python312-32\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>Version.lib;OfficeOLEd.lib;ConceptCoreLibraryd.lib;cclLangd.lib;RSlangd.lib;cclGraphd.lib;Htmlhelp.Lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
<ProgramDatabaseFile>$(OutDir)$(ProjectName).pdb</ProgramDatabaseFile>
@ -211,8 +209,7 @@ copy "$(OutDir)Exteor.exe" "bin\x64\Exteor.exe"</Command>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "import\cctext" "$(OutDir)\cctext\"
xcopy /y /s /q /i "distr\app" "$(OutDir)"
<Command>xcopy /y /s /q /i "distr\app" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312_d.dll" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
</PostBuildEvent>
@ -229,7 +226,7 @@ copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
<SDLCheck>true</SDLCheck>
<LanguageStandard>stdcpplatest</LanguageStandard>
<ConformanceMode>true</ConformanceMode>
<AdditionalIncludeDirectories>include;header;src\pch;..\ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>include;header;src\pch;..\GH-ConceptCore\output\include;..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<PreprocessorDefinitions>POCO_STATIC; _DEBUG;_WINDOWS;%(PreprocessorDefinitions)</PreprocessorDefinitions>
<AssemblerListingLocation>$(IntDir)</AssemblerListingLocation>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
@ -238,7 +235,7 @@ copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
</ClCompile>
<Link>
<SubSystem>Windows</SubSystem>
<AdditionalLibraryDirectories>lib\x64;..\ConceptCore\output\lib\x64;..\OfficeOLE\output\lib\x64;C:\Program Files\Python312\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>lib\x64;..\GH-ConceptCore\output\lib\x64;..\OfficeOLE\output\lib\x64;C:\Program Files\Python312\libs;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>Version.lib;OfficeOLEd.lib;ConceptCoreLibraryd.lib;cclLangd.lib;RSlangd.lib;cclGraphd.lib;Htmlhelp.Lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<OutputFile>$(OutDir)$(TargetName)$(TargetExt)</OutputFile>
<ProgramDatabaseFile>$(OutDir)$(ProjectName).pdb</ProgramDatabaseFile>
@ -257,8 +254,7 @@ copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
<AdditionalIncludeDirectories>$(IntDir);%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
</ResourceCompile>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "import\cctext" "$(OutDir)\cctext\"
xcopy /y /s /q /i "distr\app" "$(OutDir)"</Command>
<Command>xcopy /y /s /q /i "distr\app" "$(OutDir)"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemGroup>
@ -437,7 +433,7 @@ xcopy /y /s /q /i "distr\app" "$(OutDir)"</Command>
<Text Include="Doc\Порядок выпуска версии.txt" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\ConceptCore\ccl\core\ConceptLibrary.vcxproj">
<ProjectReference Include="..\GH-ConceptCore\ccl\core\ConceptLibrary.vcxproj">
<Project>{b0aba27b-9d39-4b48-9977-aff20925b309}</Project>
</ProjectReference>
<ProjectReference Include="..\OfficeOLE\OfficeOLE.vcxproj">

View File

@ -8,9 +8,9 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "Exteor", "Exteor.vcxproj",
{B0ABA27B-9D39-4B48-9977-AFF20925B309} = {B0ABA27B-9D39-4B48-9977-AFF20925B309}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ConceptCore", "..\ConceptCore\ccl\core\ConceptLibrary.vcxproj", "{B0ABA27B-9D39-4B48-9977-AFF20925B309}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ConceptCore", "..\GH-ConceptCore\ccl\core\ConceptLibrary.vcxproj", "{B0ABA27B-9D39-4B48-9977-AFF20925B309}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclTest", "..\ConceptCore\ccl\core\test\cclTest.vcxproj", "{F87048D4-952A-460E-96E8-1E2E1EAE34FC}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclTest", "..\GH-ConceptCore\ccl\core\test\cclTest.vcxproj", "{F87048D4-952A-460E-96E8-1E2E1EAE34FC}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XTRCoreTest", "test\XTRCoreTest.vcxproj", "{576D16B8-96BF-4B5A-8B09-E1916375E34F}"
ProjectSection(ProjectDependencies) = postProject
@ -18,19 +18,19 @@ Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "XTRCoreTest", "test\XTRCore
{F87048D4-952A-460E-96E8-1E2E1EAE34FC} = {F87048D4-952A-460E-96E8-1E2E1EAE34FC}
EndProjectSection
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RSlang", "..\ConceptCore\ccl\rslang\RSlang.vcxproj", "{A8529C63-42F5-43E6-97B8-2EC83F23E1F9}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RSlang", "..\GH-ConceptCore\ccl\rslang\RSlang.vcxproj", "{A8529C63-42F5-43E6-97B8-2EC83F23E1F9}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RSLangTest", "..\ConceptCore\ccl\rslang\test\rslTest.vcxproj", "{32469CE1-303B-4DB4-8E03-B7EBED5851EB}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "RSLangTest", "..\GH-ConceptCore\ccl\rslang\test\rslTest.vcxproj", "{32469CE1-303B-4DB4-8E03-B7EBED5851EB}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclGraph", "..\ConceptCore\ccl\cclGraph\cclGraph.vcxproj", "{7E1D5338-F819-4C96-B461-9EAAB8D02E1D}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclGraph", "..\GH-ConceptCore\ccl\cclGraph\cclGraph.vcxproj", "{7E1D5338-F819-4C96-B461-9EAAB8D02E1D}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclGraphTest", "..\ConceptCore\ccl\cclGraph\test\cclGraphTest.vcxproj", "{5A2501C1-FEFB-4B14-A94D-E8F19ADEA239}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclGraphTest", "..\GH-ConceptCore\ccl\cclGraph\test\cclGraphTest.vcxproj", "{5A2501C1-FEFB-4B14-A94D-E8F19ADEA239}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclCommonsTest", "..\ConceptCore\ccl\cclCommons\test\cclCommonsTest.vcxproj", "{53A380CF-B599-4170-89B1-642F1C3772E1}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclCommonsTest", "..\GH-ConceptCore\ccl\cclCommons\test\cclCommonsTest.vcxproj", "{53A380CF-B599-4170-89B1-642F1C3772E1}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclLang", "..\ConceptCore\ccl\cclLang\cclLang.vcxproj", "{76B03803-56CC-47C2-A8F0-2241FCAF2898}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclLang", "..\GH-ConceptCore\ccl\cclLang\cclLang.vcxproj", "{76B03803-56CC-47C2-A8F0-2241FCAF2898}"
EndProject
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclLangTest", "..\ConceptCore\ccl\cclLang\test\cclLangTest.vcxproj", "{4754356B-DC01-4564-A035-270FFB72F6A0}"
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "cclLangTest", "..\GH-ConceptCore\ccl\cclLang\test\cclLangTest.vcxproj", "{4754356B-DC01-4564-A035-270FFB72F6A0}"
EndProject
Project("{2150E333-8FDC-42A3-9474-1A3956D46DE8}") = "libs", "libs", "{DC591058-8A8A-460E-93D0-B57C848DF12B}"
EndProject

View File

@ -1,16 +0,0 @@
''' Concept core text processing library. '''
# pylint: skip-file
from .syntax import RuSyntax, Capitalization
from .rumodel import Morphology, SemanticRole, WordTag, morpho, split_grams, combine_grams
from .ruparser import PhraseParser, WordToken, Collation
from .reference import EntityReference, ReferenceType, SyntacticReference, parse_reference
from .context import TermForm, Entity, TermContext
from .resolver import Reference, Position, Resolver, ResolvedReference, resolve_entity, resolve_syntactic, extract_entities
from .conceptapi import (
parse, normalize,
generate_lexeme, inflect, inflect_context, inflect_substitute, inflect_dependant,
match_all_morpho, find_substr
)
# TODO: implement Part of speech transition for VERB <-> NOUN

View File

@ -1,90 +0,0 @@
'''
Concept API Python functions.
::guarantee:: doesn't raise exceptions and returns workable outputs
'''
from cctext.rumodel import Morphology
from .syntax import RuSyntax
from .ruparser import PhraseParser
from .rumodel import split_grams
parser = PhraseParser()
def parse(text: str, require_grams: str = '') -> str:
''' Determine morpho tags for input text.
::returns:: string of comma separated grammar tags or empty string '''
model = parser.parse(text, require_grams=split_grams(require_grams))
if model is None:
return ''
result = model.get_morpho().to_text()
return result if result != 'UNKN' else ''
# def parse_variants(text: str, require_grams: str = '') -> list[tuple[str, str]]:
# ''' Get all variants of a parse.
# ::returns:: string of comma separated grammar tags or empty string '''
def generate_lexeme(text_normal: str) -> list[tuple[str, str]]:
''' Get all inflected forms belonging to same Lexeme. '''
model = parser.parse(text_normal)
if not model:
return []
result = []
for form in model.get_form().lexeme:
result.append((model.inflect(form.tag.grammemes), Morphology(form.tag).to_text()))
return result
def normalize(text: str) -> str:
''' Generate normal form.
::returns:: normal form of input text or text itself if no parse is available '''
model = parser.parse(text)
if model is None:
return text
return model.normal_form()
def inflect(text: str, target_grams: str) -> str:
''' Inflect text to match required tags.
::returns:: infected text or initial text if infection failed '''
target_set = split_grams(target_grams)
model = parser.parse(text)
if model is None:
return text
return model.inflect(target_set)
def inflect_context(target: str, before: str = '', after: str = '') -> str:
''' Inflect text in accordance to context before and after. '''
return parser.inflect_context(target, before, after)
def inflect_substitute(substitute_normal: str, original: str) -> str:
''' Inflect substitute to match original form. '''
return parser.inflect_substitute(substitute_normal, original)
def inflect_dependant(dependant_normal: str, master: str) -> str:
''' Inflect dependant to coordinate with master text. '''
return parser.inflect_dependant(dependant_normal, master)
def match_all_morpho(text: str, filter_grams: str) -> list[list[int]]:
''' Search for all words corresponding to tags. '''
target_set = split_grams(filter_grams)
if len(target_set) == 0:
return []
result = []
for elem in RuSyntax.tokenize(text):
model = parser.parse(elem.text, require_grams=target_set)
if model:
result.append([elem.start, elem.stop])
return result
def find_substr(text: str, sub: str) -> tuple[int, int]:
''' Search for substring position in text regardless of morphology. '''
return parser.find_substr(text, sub)

View File

@ -1,84 +0,0 @@
''' Term context for reference resolution. '''
from typing import Iterable, Optional, TypedDict
from .ruparser import PhraseParser
from .rumodel import WordTag
parser = PhraseParser()
class TermForm(TypedDict):
''' Represents term in a specific form. '''
text: str
grams: Iterable[str]
def _match_grams(query: Iterable[str], test: Iterable[str]) -> bool:
''' Check if grams from test fit query. '''
for gram in test:
if not gram in query:
if not gram in WordTag.PARTS_OF_SPEECH:
return False
for pos in WordTag.PARTS_OF_SPEECH:
if pos in query:
return False
return True
def _search_form(query: Iterable[str], data: Iterable[TermForm]) -> Optional[str]:
for form in data:
if _match_grams(query, form['grams']):
return form['text']
return None
class Entity:
''' Represents text entity. '''
def __init__(self, alias: str, nominal: str, manual_forms: Optional[Iterable[TermForm]]=None):
if manual_forms is None:
self.manual = []
else:
self.manual = list(manual_forms)
self.alias = alias
self._nominal = nominal
self._cached: list[TermForm] = []
def get_nominal(self) -> str:
''' Getter for _nominal. '''
return self._nominal
def set_nominal(self, new_text: str):
''' Setter for _nominal.
Note: clears manual and cached forms. '''
if self._nominal == new_text:
return
self._nominal = new_text
self.manual = []
self._cached = []
def get_form(self, grams: Iterable[str]) -> str:
''' Get specific term form. '''
if all(False for _ in grams):
return self._nominal
text = _search_form(grams, self.manual)
if text is not None:
return text
text = _search_form(grams, self._cached)
if text is not None:
return text
model = parser.parse(self._nominal)
if model is None:
text = self._nominal
else:
try:
text = model.inflect(grams)
except ValueError as error:
text = f'!{error}!'.replace('Unknown grammeme', 'Неизвестная граммема')
self._cached.append({'text': text, 'grams': grams})
return text
# Represents term context for resolving entity references.
TermContext = dict[str, Entity]

View File

@ -1,60 +0,0 @@
''' Text reference API. '''
from enum import Enum, unique
from typing import Optional, Union
@unique
class ReferenceType(Enum):
''' Text reference types. '''
entity = 'entity'
syntactic = 'syntax'
class EntityReference:
''' Reference to entity. '''
def __init__(self, identifier: str, form: str):
self.entity = identifier
self.form = form
def get_type(self) -> ReferenceType:
return ReferenceType.entity
def to_text(self) -> str:
return f'@{{{self.entity}|{self.form}}}'
class SyntacticReference:
''' Reference to syntactic dependency on EntityReference. '''
def __init__(self, referral_offset: int, text: str):
self.nominal = text
self.offset = referral_offset
def get_type(self) -> ReferenceType:
return ReferenceType.syntactic
def to_text(self) -> str:
return f'@{{{self.offset}|{self.nominal}}}'
Reference = Union[EntityReference, SyntacticReference]
def parse_reference(text: str) -> Optional[Reference]:
if len(text) < 4 or text[-1] != '}' or text[0:2] != '@{':
return None
blocks: list[str] = [block.strip() for block in text[2:-1].split('|')]
if len(blocks) != 2 or blocks[0] == '' or blocks[0][0] in '0':
return None
if blocks[0][0] in '-123456789':
if blocks[1] == '':
return None
try:
offset = int(blocks[0])
return SyntacticReference(offset, blocks[1])
except ValueError:
return None
else:
form = blocks[1].replace(' ', '')
return EntityReference(blocks[0], form)

View File

@ -1,140 +0,0 @@
''' Reference resolution API. '''
import re
from typing import cast, Optional
from dataclasses import dataclass
from .rumodel import split_grams
from .conceptapi import inflect_dependant
from .context import TermContext
from .reference import EntityReference, SyntacticReference, parse_reference, Reference
_REF_ENTITY_PATTERN = re.compile(r'@{([^0-9\-][^\}\|\{]*?)\|([^\}\|\{]*?)}')
def extract_entities(text: str) -> list[str]:
''' Extract list of entities that are referenced. '''
result: list[str] = []
for segment in re.finditer(_REF_ENTITY_PATTERN, text):
entity = segment.group(1)
if entity not in result:
result.append(entity)
return result
def resolve_entity(ref: EntityReference, context: TermContext) -> str:
''' Resolve entity reference. '''
alias = ref.entity
if alias not in context:
return f'!Неизвестная сущность: {alias}!'
grams = split_grams(ref.form)
resolved = context[alias].get_form(grams)
if resolved == '':
return f'!Отсутствует термин: {alias}!'
else:
return resolved
def resolve_syntactic(ref: SyntacticReference, index: int, references: list['ResolvedReference']) -> str:
''' Resolve syntactic reference. '''
offset = ref.offset
master: Optional['ResolvedReference'] = None
if offset > 0:
index += 1
while index < len(references):
if isinstance(references[index].ref, EntityReference):
if offset == 1:
master = references[index]
else:
offset -= 1
index += 1
else:
index -= 1
while index >= 0:
if isinstance(references[index].ref, EntityReference):
if offset == -1:
master = references[index]
else:
offset += 1
index -= 1
if master is None:
return f'!Некорректное смещение: {ref.offset}!'
return inflect_dependant(ref.nominal, master.resolved)
@dataclass
class Position:
''' 0-indexed contiguous segment position in text. '''
start: int = 0
finish: int = 0
def __hash__(self) -> int:
return hash((self.start, self.finish))
@dataclass
class ResolvedReference:
''' Resolved reference data '''
ref: Reference
resolved: str = ''
pos_input: Position = Position()
pos_output: Position = Position()
def __hash__(self) -> int:
return hash((self.resolved, self.pos_input, self.pos_output, self.ref.to_text()))
class Resolver:
''' Text reference resolver '''
REFERENCE_PATTERN = re.compile(r'@{[^\}\{]*?}')
def __init__(self, context: TermContext):
self.context = context
self.refs = cast(list[ResolvedReference], [])
self.input = ''
self.output = ''
def resolve(self, text: str) -> str:
''' Resolve references in input text.
Note: data on references positions is accessed through class attributes '''
self._reset(text)
self._parse_refs()
if len(self.refs) == 0:
self.output = self.input
return self.output
else:
self._resolve_refs()
self._combine_output()
return self.output
def _reset(self, input_text: str):
self.refs = cast(list[ResolvedReference], [])
self.input = input_text
self.output = ''
def _parse_refs(self):
for segment in re.finditer(Resolver.REFERENCE_PATTERN, self.input):
parse = parse_reference(segment[0])
if parse is not None:
ref_info = ResolvedReference(ref=parse,
resolved='',
pos_input=Position(segment.start(0), segment.end(0)))
self.refs.append(ref_info)
def _resolve_refs(self):
for ref in self.refs:
if isinstance(ref.ref, EntityReference):
ref.resolved = resolve_entity(ref.ref, self.context)
for (index, ref) in enumerate(self.refs):
if isinstance(ref.ref, SyntacticReference):
ref.resolved = resolve_syntactic(ref.ref, index, self.refs)
def _combine_output(self):
pos_in = 0
for ref in self.refs:
self.output += self.input[pos_in : ref.pos_input.start]
self.output += ref.resolved
ref.pos_output = Position(len(self.output) - len(ref.resolved), len(self.output))
pos_in = ref.pos_input.finish
self.output += self.input[pos_in : len(self.input)]

View File

@ -1,118 +0,0 @@
''' Russian language models. '''
from __future__ import annotations
from enum import Enum, unique
from typing import Iterable, Optional
from pymorphy3 import MorphAnalyzer
from pymorphy3.tagset import OpencorporaTag as WordTag
# ''' Morphology parser. '''
morpho = MorphAnalyzer()
Grammemes = Iterable[str]
def split_grams(text: str) -> list[str]:
''' Split grammemes string into set of items. '''
return [tag.strip() for tag in filter(None, text.split(','))]
def combine_grams(tags: Iterable[str]) -> str:
''' Combine grammemes into string. '''
return ','.join(tags)
@unique
class SemanticRole(Enum):
''' Enumerating semantic types for different parse patterns. '''
unknwn = 0
term = 1
action = 2
definition = 3
@staticmethod
def from_POS(pos: Optional[str]) -> SemanticRole:
''' Production method: types from part of speech. '''
if pos in ['NOUN', 'NPRO']:
return SemanticRole.term
elif pos in ['VERB', 'INFN', 'PRTF', 'PRTS']:
return SemanticRole.action
elif pos in ['ADJF', 'ADJS']:
return SemanticRole.definition
return SemanticRole.unknwn
class Morphology:
''' Wrapper for OpencorporaTag expanding functionality for multiword.
Full morphology tags see http://opencorpora.org/dict.php?act=gram
'''
def __init__(self, tag: WordTag, semantic=SemanticRole.unknwn):
self.tag = tag
self.semantic = semantic if semantic != SemanticRole.unknwn else SemanticRole.from_POS(tag.POS)
_TAGS_IMMUTABLE = frozenset(['INFN', 'ADVB', 'COMP', 'PNCT', 'PREP', 'CONJ', 'PRCL', 'INTJ'])
_TAGS_NO_TENSE = frozenset(['NOUN', 'NPRO', 'ADJF', 'ADJS'])
_TAGS_NO_CASE = frozenset(['GRND', 'VERB', 'ADJS', 'PRTS'])
_TAGS_NO_NUMBER = frozenset(['GRND'])
_TAGS_NO_GENDER = frozenset(['GRND', 'NOUN', 'NPRO', 'plur'])
_TAGS_NO_PERSON = frozenset(['GRND', 'NOUN', 'ADJF', 'ADJS', 'PRTF', 'PRTS', 'past'])
@property
def can_coordinate(self) -> bool:
''' Check if coordination can change text. '''
return self.tag.POS in ['NOUN', 'NPRO', 'NUMR', 'ADJF', 'ADJS', 'PRTF', 'PRTS']
@staticmethod
def is_dependable(pos: str):
''' Check if this morphology can be dependant. '''
return pos in ['ADJF', 'ADJS', 'PRTF', 'PRTS']
@property
def effective_POS(self) -> Optional[str]:
''' Access part of speech. Pronouns are considered as nouns '''
pos: Optional[str] = self.tag.POS
if pos and self.tag.POS == 'NPRO':
return 'NOUN'
return pos
def complete_grams(self, grams: Iterable[str]) -> set[str]:
''' Add missing tags before inflection. '''
result = set(grams)
pos = self.tag.POS
if pos and result.isdisjoint(WordTag.PARTS_OF_SPEECH):
result.add(pos if pos != 'INFN' or len(result) == 0 else 'VERB')
if not result.isdisjoint(self._TAGS_IMMUTABLE):
return result
if self.tag.case and result.isdisjoint(WordTag.CASES) and result.isdisjoint(self._TAGS_NO_CASE):
result.add(self.tag.case)
if self.tag.tense and result.isdisjoint(WordTag.TENSES) and result.isdisjoint(self._TAGS_NO_TENSE):
if (self.tag.tense != 'past' or result.isdisjoint(WordTag.PERSONS)) \
and (self.tag.tense != 'pres' or result.isdisjoint(WordTag.GENDERS)):
result.add(self.tag.tense)
if self.tag.number and result.isdisjoint(WordTag.NUMBERS) and result.isdisjoint(self._TAGS_NO_NUMBER):
if self.tag.number != 'plur' or result.isdisjoint(WordTag.GENDERS):
result.add(self.tag.number)
if self.tag.gender and result.isdisjoint(WordTag.GENDERS) and result.isdisjoint(self._TAGS_NO_GENDER):
if 'PRTF' in result or 'pres' not in result:
result.add(self.tag.gender)
if self.tag.person and result.isdisjoint(WordTag.PERSONS) and result.isdisjoint(self._TAGS_NO_PERSON):
result.add(self.tag.person)
if 'plur' in result and not result.isdisjoint(WordTag.GENDERS):
result = result.difference(WordTag.GENDERS)
return result
def coordination_grams(self) -> set[str]:
''' Return set of grammemes for inflection to keep coordination . '''
result = set()
if self.tag.case:
result.add(self.tag.case)
if self.tag:
number = self.tag.number
result.add(number)
if self.tag.gender and 'plur' not in result:
result.add(self.tag.gender)
return result
def to_text(self) -> str:
''' Produce string of all grammemes. '''
return combine_grams(self.tag.grammemes)

View File

@ -1,486 +0,0 @@
''' Parsing russian language using pymorphy3 library. '''
from __future__ import annotations
from typing import Optional
from razdel.substring import Substring as Segment
from pymorphy3.analyzer import Parse as WordParse
from .syntax import RuSyntax, Capitalization
from .rumodel import SemanticRole, Morphology, WordTag, morpho, Grammemes
INDEX_NONE = -1
NO_COORDINATION = -1
WORD_NONE = -1
class WordToken:
''' Atomic text token. '''
def __init__(self, segment: Segment, parse: list[WordParse], main_parse: int = 0):
self.segment: Segment = segment
self.forms: list[WordParse] = parse
self.main: int = main_parse
def get_morpho(self) -> Morphology:
''' Return morphology for current token. '''
return Morphology(self.get_parse().tag)
def get_parse(self) -> WordParse:
''' Access main form. '''
return self.forms[self.main]
def inflect(self, inflection_grams: set[str]) -> Optional[WordParse]:
''' Apply inflection to segment text. Does not modify forms '''
inflected = self.get_parse().inflect(inflection_grams)
if not inflected:
return None
self.segment.text = Capitalization.from_text(self.segment.text).apply_to(inflected.word)
return inflected
class Collation:
''' Parsed data for input coordinated text. '''
def __init__(self, text: str):
self.text = text
self.words: list[WordToken] = []
self.coordination: list[int] = []
self.main_word: int = WORD_NONE
def is_valid(self) -> bool:
''' Check if data is parsed correctly '''
return self.main_word != WORD_NONE
def get_form(self) -> WordParse:
''' Access WordParse. '''
return self.words[self.main_word].get_parse()
def get_morpho(self) -> Morphology:
''' Access parsed main morphology. '''
return self.words[self.main_word].get_morpho()
def add_word(self, segment, forms: list, main_form: int, need_coordination: bool = True):
''' Add word information. '''
self.words.append(WordToken(segment, forms, main_form))
self.coordination.append(NO_COORDINATION if not need_coordination else 0)
def inflect(self, target_grams: Grammemes) -> str:
''' Inflect text to match required tags. '''
if self.is_valid():
origin = self.get_morpho()
if not origin.tag.grammemes.issuperset(target_grams):
if self._apply_inflection(origin, target_grams):
return self._generate_text()
return self.text
def inflect_like(self, base_model: Collation) -> str:
''' Create inflection to substitute base_model form. '''
if self.is_valid():
morph = base_model.get_morpho()
if morph.effective_POS:
tags = set()
tags.add(morph.effective_POS)
tags = morph.complete_grams(tags)
return self.inflect(tags)
return self.text
def inflect_dependant(self, master_model: Collation) -> str:
''' Create inflection to coordinate with master_model form. '''
assert self.is_valid()
morph = master_model.get_morpho()
tags = morph.coordination_grams()
tags = self.get_morpho().complete_grams(tags)
return self.inflect(tags)
def normal_form(self) -> str:
''' Generate normal form. '''
if self.is_valid():
main_form = self.get_form()
new_morpho = Morphology(main_form.normalized.tag)
new_grams = new_morpho.complete_grams(frozenset())
return self.inflect(new_grams)
return self.text
def _iterate_coordinated(self):
words_count = len(self.words)
current_word = self.coordination[words_count]
while current_word != words_count:
yield self.words[current_word]
current_word += self.coordination[current_word]
def _inflect_main_word(self, origin: Morphology, target_grams: Grammemes) -> Optional[Morphology]:
full_grams = origin.complete_grams(target_grams)
inflected = self.words[self.main_word].inflect(full_grams)
if not inflected:
return None
return Morphology(inflected.tag)
def _apply_inflection(self, origin: Morphology, target_grams: Grammemes) -> bool:
new_moprho = self._inflect_main_word(origin, target_grams)
if not new_moprho:
return False
inflection_grams = new_moprho.coordination_grams()
if len(inflection_grams) == 0:
return True
for word in self._iterate_coordinated():
word.inflect(inflection_grams)
return True
def _generate_text(self) -> str:
current_pos = 0
result = ''
for token in self.words:
if token.segment.start > current_pos:
result += self.text[current_pos: token.segment.start]
result += token.segment.text
current_pos = token.segment.stop
if current_pos + 1 < len(self.text):
result += self.text[current_pos:]
return result
class PhraseParser:
''' Russian grammar parser. '''
def __init__(self):
pass
def __del__(self):
pass
_FILTER_SCORE = 0.005
_SINGLE_SCORE_SEARCH = 0.2
_PRIORITY_NONE = NO_COORDINATION
_MAIN_WAIT_LIMIT = 10 # count words until fixing main
_MAIN_MAX_FOLLOWERS = 3 # count words after main as coordination candidates
def parse(self, text: str,
require_index: int = INDEX_NONE,
require_grams: Optional[Grammemes] = None) -> Optional[Collation]:
'''
Determine morpho tags for input text.
::returns:: Morphology of a text or None if no suitable form is available
'''
segments = list(RuSyntax.tokenize(text))
if len(segments) == 0:
return None
elif len(segments) == 1:
return self._parse_single(segments[0], require_index, require_grams)
else:
return self._parse_multiword(text, segments, require_index, require_grams)
def normalize(self, text: str):
''' Get normal form for target text. '''
processed = self.parse(text)
if processed:
return processed.normal_form()
return text
def find_substr(self, text: str, sub: str) -> tuple[int, int]:
''' Search for substring position in text regardless of morphology. '''
if not text or not sub:
return (0, 0)
query = [self.normalize(elem.text) for elem in RuSyntax.tokenize(sub)]
query_len = len(query)
start = 0
current_index = 0
for token in RuSyntax.tokenize(text):
text_word = self.normalize(token.text)
if text_word != query[current_index]:
current_index = 0
else:
if current_index == 0:
start = token.start
current_index += 1
if current_index == query_len:
return (start, token.stop)
return (0, 0)
def inflect_context(self, text: str, before: str = '', after: str = '') -> str:
''' Inflect text in accordance to context before and after. '''
target = self.parse(text)
if not target:
return text
target_morpho = target.get_morpho()
if not target_morpho or not target_morpho.can_coordinate:
return text
model_after = self.parse(after)
model_before = self.parse(before)
etalon = PhraseParser._choose_context_etalon(target_morpho, model_before, model_after)
if not etalon:
return text
etalon_moprho = etalon.get_morpho()
if not etalon_moprho.can_coordinate:
return text
new_form = PhraseParser._combine_morpho(target_morpho, etalon_moprho.tag)
return target.inflect(new_form)
def inflect_substitute(self, substitute_normal: str, original: str) -> str:
''' Inflect substitute to match original form. '''
original_model = self.parse(original)
if not original_model:
return substitute_normal
substitute_model = self.parse(substitute_normal)
if not substitute_model:
return substitute_normal
return substitute_model.inflect_like(original_model)
def inflect_dependant(self, dependant_normal: str, master: str) -> str:
''' Inflect dependant to coordinate with master text. '''
master_model = self.parse(master)
if not master_model:
return dependant_normal
dependant_model = self.parse(dependant_normal)
if not dependant_model:
return dependant_normal
return dependant_model.inflect_dependant(master_model)
def _parse_single(self, segment, require_index: int, require_grams: Optional[Grammemes]) -> Optional[Collation]:
forms = list(self._filtered_parse(segment.text))
parse_index = INDEX_NONE
if len(forms) == 0 or require_index >= len(forms):
return None
if require_index != INDEX_NONE:
tags = forms[require_index].tag
if require_grams and not tags.grammemes.issuperset(require_grams):
return None
parse_index = require_index
else:
current_score = 0
for (index, form) in enumerate(forms):
if not require_grams or form.tag.grammemes.issuperset(require_grams):
if form.tag.case == 'nomn':
parse_index = index
break
elif parse_index == INDEX_NONE:
current_score = form.score
parse_index = index
elif form.score / current_score < self._SINGLE_SCORE_SEARCH:
break
if parse_index == INDEX_NONE:
return None
result = Collation(segment.text)
result.add_word(segment, [forms[parse_index]], main_form=0, need_coordination=False)
result.coordination.append(len(result.words))
result.main_word = 0
return result
def _parse_multiword(self, text: str, segments: list, require_index: int,
require_grams: Optional[Grammemes]) -> Optional[Collation]:
result = Collation(text)
priority_main: float = self._PRIORITY_NONE
segment_index = 0
main_wait = 0
word_index = 0
for segment in segments:
if main_wait > PhraseParser._MAIN_WAIT_LIMIT:
break
segment_index += 1
priority = self._parse_segment(result, segment, require_index, require_grams)
if priority is None:
continue # skip non-parsable entities
main_wait += 1
if priority > priority_main:
result.main_word = word_index
priority_main = priority
word_index += 1
if result.main_word == INDEX_NONE:
return None
self._finalize_coordination(result)
if segment_index < len(segments):
pass # finish to parse segments after main if needed
return result
def _parse_segment(self,
output: Collation,
segment: Segment,
require_index: int,
require_grams: Optional[Grammemes]) -> Optional[float]:
''' Return priority for this can be a new main word '''
forms = list(self._filtered_parse(segment.text))
if len(forms) == 0:
return None
main_index: int = INDEX_NONE
segment_score: float = self._PRIORITY_NONE
needs_coordination = False
local_sum: float = 0
score_sum: float = 0
if require_index != INDEX_NONE:
form = forms[require_index]
if not require_grams or form.tag.grammemes.issuperset(require_grams):
(local_max, segment_score) = PhraseParser._get_priorities_for(form.tag)
main_index = require_index
needs_coordination = Morphology.is_dependable(form.tag.POS)
else:
local_max = self._PRIORITY_NONE
for (index, form) in enumerate(forms):
if require_grams and not form.tag.grammemes.issuperset(require_grams):
continue
(local_priority, global_priority) = PhraseParser._get_priorities_for(form.tag)
needs_coordination = needs_coordination or Morphology.is_dependable(form.tag.POS)
local_sum += global_priority * form.score
score_sum += form.score
if local_priority > local_max:
local_max = local_priority
# segment_score = global_priority
main_index = index
if score_sum == 0:
return None
segment_score = local_sum / score_sum
output.add_word(segment, forms, main_index, needs_coordination)
return segment_score
# Alternative: return segment_score
# penalty_suspicious = 0 if local_max == 0 else (1 - local_sum / local_max) * self._PRIORITY_PENALTY
# return segment_score - penalty_suspicious
@classmethod
def _finalize_coordination(cls, target: Collation):
main_morpho: Morphology = target.get_morpho()
main_coordinate = main_morpho.can_coordinate
target.coordination[target.main_word] = NO_COORDINATION
first_change = INDEX_NONE
current_len = 0
for (index, word) in enumerate(target.words):
if target.coordination[index] == NO_COORDINATION or index - target.main_word > cls._MAIN_MAX_FOLLOWERS:
needs_change = False
if index != target.main_word:
word.main = INDEX_NONE
else:
word.main = PhraseParser._find_coordination(word.forms, main_morpho.tag, index < target.main_word)
needs_change = word.main != INDEX_NONE
if not needs_change or not main_coordinate:
target.coordination[index] = NO_COORDINATION
current_len += 1
if needs_change and main_coordinate:
target.coordination[index] = current_len
current_len = 0
if first_change == INDEX_NONE:
first_change = index
if first_change == INDEX_NONE:
target.coordination.append(len(target.words))
return
previous_reference = first_change
current_word = len(target.words)
target.coordination.append(current_len + 1)
while target.coordination[current_word] != INDEX_NONE:
previous_word = current_word - target.coordination[current_word]
target.coordination[current_word] = previous_reference
previous_reference = current_word - previous_word
current_word = previous_word
if previous_reference == 0 or current_word < 0:
break
@staticmethod
def _find_coordination(forms: list, main_tag: WordTag, before_main: bool) -> int:
for (index, form) in enumerate(forms):
pos = form.tag.POS
case = form.tag.case
if pos not in ['ADJF', 'ADJS', 'PRTF', 'PRTS']:
continue
if SemanticRole.from_POS(pos) == SemanticRole.term and case == 'gent':
if before_main:
continue
else:
return INDEX_NONE
if case == main_tag.case:
return index
elif main_tag.case in ['accs', 'gent'] and case in ['accs', 'gent']:
return index
return INDEX_NONE
@staticmethod
def _filtered_parse(text: str):
capital = Capitalization.from_text(text)
score_filter = PhraseParser._filter_score(morpho.parse(text))
yield from PhraseParser._filter_capital(score_filter, capital)
@staticmethod
def _filter_score(generator):
for form in generator:
if form.score < PhraseParser._FILTER_SCORE:
break
yield form
@staticmethod
def _filter_capital(generator, capital: Capitalization):
if capital in [Capitalization.upper_case, Capitalization.mixed]:
for form in generator:
if 'Abbr' not in form.tag.grammemes:
continue
yield form
else:
yield from generator
@staticmethod
def _parse_word(text: str, require_index: int = INDEX_NONE,
require_grams: Optional[Grammemes] = None) -> Optional[Morphology]:
parsed_variants = morpho.parse(text)
if not parsed_variants or require_index >= len(parsed_variants):
return None
if require_index != INDEX_NONE:
tags = parsed_variants[require_index].tag
if not require_grams or tags.grammemes.issuperset(require_grams):
return Morphology(tags)
else:
return None
else:
for variant in parsed_variants:
tags = variant.tag
if not require_grams or tags.grammemes.issuperset(require_grams):
return Morphology(tags)
return None
@staticmethod
def _get_priorities_for(tag: WordTag) -> tuple[float, float]:
''' Return pair of local and global priorities. '''
if tag.POS in ['VERB', 'INFN']:
return (9, 10)
if tag.POS in ['NOUN', 'NPRO']:
return (10, 9) if 'nomn' in tag.grammemes and 'Fixd' not in tag.grammemes else (8, 8)
if tag.POS in ['PRTF', 'PRTS']:
return (6, 6)
if tag.POS in ['ADJF', 'ADJS']:
return (5, 5)
if tag.POS == 'ADVB':
return (7, 4)
return (0, 0)
@staticmethod
def _choose_context_etalon(target: Morphology,
before: Optional[Collation],
after: Optional[Collation]) -> Optional[Collation]:
if not before or not before.get_morpho().can_coordinate:
return after
if not after or not after.get_morpho().can_coordinate:
return before
before_semantic = before.get_morpho().semantic
after_semantic = after.get_morpho().semantic
if target.semantic == SemanticRole.definition:
if after_semantic == SemanticRole.term:
return after
if before_semantic == SemanticRole.term:
return before
if before_semantic == SemanticRole.definition:
return before
return after
if target.semantic == SemanticRole.term:
if before_semantic == SemanticRole.definition:
return before
if after_semantic == SemanticRole.definition:
return after
return before
@staticmethod
def _combine_morpho(target: Morphology, etalon: WordTag) -> frozenset[str]:
part_of_speech = target.tag.POS
number = etalon.number
if number == 'plur':
return frozenset([part_of_speech, number, etalon.case])
else:
gender = etalon.gender if target.semantic != SemanticRole.term else target.tag.gender
return frozenset([part_of_speech, number, gender, etalon.case])

View File

@ -1,87 +0,0 @@
''' Russian language syntax incapsulation. '''
from __future__ import annotations
from enum import Enum, unique
from razdel import tokenize
@unique
class Capitalization(Enum):
''' Enumerating capitalization types. '''
unknwn = 0
lower_case = 1
upper_case = 2
first_capital = 3
mixed = 4
@staticmethod
def from_text(text: str) -> Capitalization:
''' Fabric method to identify capitalization in text. '''
if len(text) == 0:
return Capitalization.unknwn
first_capital = Capitalization._is_capital(text[0])
has_mid_capital = False
has_lower = not first_capital
for symbol in text[1:]:
if Capitalization._is_capital(symbol):
if has_lower:
return Capitalization.mixed
has_mid_capital = True
else:
if has_mid_capital:
return Capitalization.mixed
else:
has_lower = True
if has_mid_capital:
return Capitalization.upper_case
elif first_capital:
return Capitalization.first_capital
else:
return Capitalization.lower_case
def apply_to(self, text: str) -> str:
''' Apply capitalization to text. '''
if not text or self in [Capitalization.unknwn, Capitalization.mixed]:
return text
elif self == Capitalization.lower_case:
return text.lower()
elif self == Capitalization.upper_case:
return text.upper()
else:
return text[0].upper() + text[1:]
@staticmethod
def _is_capital(symbol: str) -> bool:
return 'А' <= symbol <= 'Я' or 'A' <= symbol <= 'Z'
class RuSyntax:
''' Russian language syntax parser. '''
def __init__(self):
pass
def __del__(self):
pass
@staticmethod
def is_single_word(text: str) -> bool:
''' Test if text is a single word. '''
try:
gen = tokenize(text)
if next(gen) == '':
return True
if next(gen) == '':
return True
return False
except StopIteration:
return True
@staticmethod
def tokenize(text: str):
''' Split text into words. Returns list[(start, stop, text)]. '''
return tokenize(text)
@staticmethod
def split_words(text: str) -> list[str]:
''' Split text into words. '''
return [elem.text for elem in tokenize(text)]

View File

@ -60,11 +60,10 @@ Name: quicklaunchicon; Description: "{cm:CreateQuickLaunchIcon}"; GroupDescripti
[Files]
Source: "..\bin\x64\Exteor.exe"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\import\cctext\*"; DestDir: "{app}\cctext"; Flags: ignoreversion
Source: "app\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
Source: "..\distr\app\DejaVu Sans.ttf"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\script\PymorphyInstall.bat"; DestDir: {tmp}; Flags: deleteafterinstall
Source: "..\script\installDependencies.bat"; DestDir: {tmp}; Flags: deleteafterinstall
[Icons]
Name: "{group}\{#ExteorName}"; Filename: "{app}\{#ExteorEXE}"
@ -101,7 +100,7 @@ Filename: "{app}\{#ExteorEXE}"; Description: "{cm:LaunchProgram,{#StringChange(E
Filename: "{app}\Документация\README.rtf"; Description: "{cm:ReadMe}"; Flags: nowait postinstall skipifsilent unchecked shellexec
Filename: "{tmp}\{#VSRedist}"; Parameters: "/install /quiet /NORESTART"; StatusMsg: {cm:CppRedist}; Check: VCRedistNeedsInstall()
Filename: "{tmp}\{#PythonRedist}"; Parameters: "/quiet InstallAllUsers=1 PrependPath=1 Include_test=0"; StatusMsg: {cm:PythonInstall}; Check: PythonNeedsInstall()
Filename: "{tmp}\PymorphyInstall.bat"; Parameters: """{code:PythonPath}"""
Filename: "{tmp}\installDependencies.bat"; Parameters: """{code:PythonPath}"""
[Code]
function IsPythonMissing: Boolean; forward;

View File

@ -60,11 +60,10 @@ Name: quicklaunchicon; Description: "{cm:CreateQuickLaunchIcon}"; GroupDescripti
[Files]
Source: "..\bin\x64\Exteor.exe"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\import\cctext\*"; DestDir: "{app}\cctext"; Flags: ignoreversion
Source: "app\*"; DestDir: "{app}"; Flags: ignoreversion recursesubdirs
Source: "..\distr\app\DejaVu Sans.ttf"; DestDir: "{app}"; Flags: ignoreversion
Source: "..\script\PymorphyInstall.bat"; DestDir: {tmp}; Flags: deleteafterinstall
Source: "..\script\installDependencies.bat"; DestDir: {tmp}; Flags: deleteafterinstall
[Icons]
Name: "{group}\{#ExteorName}"; Filename: "{app}\{#ExteorEXE}"
@ -101,7 +100,7 @@ Filename: "{app}\{#ExteorEXE}"; Description: "{cm:LaunchProgram,{#StringChange(E
Filename: "{app}\Документация\README.rtf"; Description: "{cm:ReadMe}"; Flags: nowait postinstall skipifsilent unchecked shellexec
Filename: "{tmp}\{#VSRedist}"; Parameters: "/install /quiet /NORESTART"; StatusMsg: {cm:CppRedist}; Check: VCRedistNeedsInstall()
Filename: "{tmp}\{#PythonRedist}"; Parameters: "/quiet InstallAllUsers=1 PrependPath=1 Include_test=0"; StatusMsg: {cm:PythonInstall}; Check: PythonNeedsInstall()
Filename: "{tmp}\PymorphyInstall.bat"; Parameters: """{code:PythonPath}"""
Filename: "{tmp}\installDependencies.bat"; Parameters: """{code:PythonPath}"""
[Code]
function IsPythonMissing: Boolean; forward;

View File

@ -7,8 +7,7 @@ PING -n 1 www.google.com > nul && (goto :SUCCESS) || (goto :FAILURE)
:SUCCESS
@echo off
"%python3Path%Scripts\pip.exe" install razdel || (goto :FAILURE)
"%python3Path%Scripts\pip.exe" install pymorphy3 || (goto :FAILURE)
"%python3Path%Scripts\pip.exe" install cctext || (goto :FAILURE)
goto :END
:FAILURE

View File

@ -154,7 +154,7 @@
<WarningLevel>Level4</WarningLevel>
<SDLCheck>true</SDLCheck>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
<AdditionalIncludeDirectories>Mock;./;..\include;..\import\include;..\..\ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>Mock;./;..\include;..\import\include;..\..\GH-ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpplatest</LanguageStandard>
<BuildStlModules>false</BuildStlModules>
</ClCompile>
@ -163,12 +163,11 @@
<SubSystem>Console</SubSystem>
<AdditionalDependencies>ConceptCoreLibraryd.lib;RSlangd.lib;cclGraphd.lib;cclLangd.lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Python312-32\libs;..\..\ConceptCore\output\lib\x86;..\..\OfficeOLE\output\lib\x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Python312-32\libs;..\..\GH-ConceptCore\output\lib\x86;..\..\OfficeOLE\output\lib\x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
</Link>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "..\import\cctext" "$(OutDir)\cctext\"
copy "Data\sample_module.py" "$(OutDir)sample_module.py"
<Command>copy "Data\sample_module.py" "$(OutDir)sample_module.py"
copy "C:\Program Files (x86)\Python312-32\Python312_d.dll" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
</PostBuildEvent>
@ -184,7 +183,7 @@ copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
<WarningLevel>Level4</WarningLevel>
<SDLCheck>true</SDLCheck>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\..\ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\..\GH-ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpplatest</LanguageStandard>
<BuildStlModules>false</BuildStlModules>
</ClCompile>
@ -193,12 +192,11 @@ copy "C:\Program Files (x86)\Python312-32\Python312_d.pdb" "$(OutDir)"</Command>
<SubSystem>Console</SubSystem>
<AdditionalDependencies>ConceptCoreLibraryd.lib;RSlangd.lib;cclGraphd.lib;cclLangd.lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
<AdditionalLibraryDirectories>C:\Program Files\Python312\libs;..\..\ConceptCore\output\lib\x64;..\..\OfficeOLE\output\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files\Python312\libs;..\..\GH-ConceptCore\output\lib\x64;..\..\OfficeOLE\output\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
</Link>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "..\import\cctext" "$(OutDir)\cctext\"
copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
<Command>copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@ -212,7 +210,7 @@ copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
<TreatWarningAsError>false</TreatWarningAsError>
<SDLCheck>true</SDLCheck>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\import\include;..\..\ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\import\include;..\..\GH-ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files (x86)\Python312-32\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpplatest</LanguageStandard>
<BuildStlModules>false</BuildStlModules>
</ClCompile>
@ -223,12 +221,11 @@ copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
<EnableCOMDATFolding>true</EnableCOMDATFolding>
<AdditionalDependencies>ConceptCoreLibrary.lib;RSlang.lib;cclGraph.lib;cclLang.lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Python312-32\libs;..\..\ConceptCore\output\lib\x86;..\..\OfficeOLE\output\lib\x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files (x86)\Python312-32\libs;..\..\GH-ConceptCore\output\lib\x86;..\..\OfficeOLE\output\lib\x86;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
</Link>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "..\import\cctext" "$(OutDir)\cctext\"
copy "Data\sample_module.py" "$(OutDir)sample_module.py"
<Command>copy "Data\sample_module.py" "$(OutDir)sample_module.py"
copy "C:\Program Files (x86)\Python312-32\Python312.dll" "$(OutDir)"
copy "C:\Program Files (x86)\Python312-32\Python312.pdb" "$(OutDir)"</Command>
</PostBuildEvent>
@ -244,7 +241,7 @@ copy "C:\Program Files (x86)\Python312-32\Python312.pdb" "$(OutDir)"</Command>
<TreatWarningAsError>false</TreatWarningAsError>
<SDLCheck>true</SDLCheck>
<ObjectFileName>$(IntDir)obj\</ObjectFileName>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\import\include;..\..\ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<AdditionalIncludeDirectories>Mock;./;..\include;..\header;..\import\include;..\import\include;..\..\GH-ConceptCore\output\include;..\..\OfficeOLE\include;C:\Program Files\Python312\include;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
<LanguageStandard>stdcpplatest</LanguageStandard>
<BuildStlModules>false</BuildStlModules>
</ClCompile>
@ -256,12 +253,11 @@ copy "C:\Program Files (x86)\Python312-32\Python312.pdb" "$(OutDir)"</Command>
<AdditionalDependencies>ConceptCoreLibrary.lib;RSlang.lib;cclGraph.lib;cclLang.lib;iphlpapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<LinkTimeCodeGeneration>UseLinkTimeCodeGeneration</LinkTimeCodeGeneration>
<ImageHasSafeExceptionHandlers>false</ImageHasSafeExceptionHandlers>
<AdditionalLibraryDirectories>C:\Program Files\Python312\libs;..\..\ConceptCore\output\lib\x64;..\..\OfficeOLE\output\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalLibraryDirectories>C:\Program Files\Python312\libs;..\..\GH-ConceptCore\output\lib\x64;..\..\OfficeOLE\output\lib\x64;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalOptions>/ignore:4099 %(AdditionalOptions)</AdditionalOptions>
</Link>
<PostBuildEvent>
<Command>xcopy /y /s /q /i "..\import\cctext" "$(OutDir)\cctext\"
copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
<Command>copy "Data\sample_module.py" "$(OutDir)sample_module.py"</Command>
</PostBuildEvent>
</ItemDefinitionGroup>
<Target Name="EnsureNuGetPackageBuildImports" BeforeTargets="PrepareForBuild">