// Module: Natural language model declarations. /** * Represents single unit of language Morphology. */ export enum Grammeme { // Неизвестная граммема UNKN = 'UNKN', // Части речи NOUN = 'NOUN', ADJF = 'ADJF', ADJS = 'ADJS', COMP = 'COMP', VERB = 'VERB', INFN = 'INFN', PRTF = 'PRTF', PRTS = 'PRTS', GRND = 'GRND', NUMR = 'NUMR', ADVB = 'ADVB', NPRO = 'NPRO', PRED = 'PRED', PREP = 'PREP', CONJ = 'CONJ', PRCL = 'PRCL', INTJ = 'INTJ', // Одушевленность anim = 'anim', inan = 'inan', // Род masc = 'masc', femn = 'femn', neut = 'neut', // Число sing = 'sing', plur = 'plur', // Падеж (основные) nomn = 'nomn', gent = 'gent', datv = 'datv', accs = 'accs', ablt = 'ablt', loct = 'loct', // Совершенный / несовершенный вид perf = 'perf', impf = 'impf', // Переходность tran = 'tran', intr = 'intr', // Время pres = 'pres', past = 'past', futr = 'futr', // Лицо per1 = '1per', per2 = '2per', per3 = '3per', // Наклонение indc = 'indc', impr = 'impr', // Включение говорящего в действие incl = 'incl', excl = 'excl', // Залог actv = 'actv', pssv = 'pssv', // Стиль речи Infr = 'Infr', // Неформальный Slng = 'Slng', // Жаргон Arch = 'Arch', // Устаревший Litr = 'Litr', // Литературный // Аббревиатура Abbr = 'Abbr' } /** * Represents part of speech language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const PartOfSpeech = [ Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS, Grammeme.COMP, Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS, Grammeme.GRND, Grammeme.ADVB, Grammeme.NPRO, Grammeme.PRED, Grammeme.PREP, Grammeme.CONJ, Grammeme.PRCL, Grammeme.INTJ ]; /** * Represents gender language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Gender = [ Grammeme.masc, Grammeme.femn, Grammeme.neut ]; /** * Represents case language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Case = [ Grammeme.nomn, Grammeme.gent, Grammeme.datv, Grammeme.accs, Grammeme.ablt, Grammeme.loct ]; /** * Represents plurality language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Plurality = [ Grammeme.sing, Grammeme.plur ]; /** * Represents verb perfectivity language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Perfectivity = [ Grammeme.perf, Grammeme.impf ]; /** * Represents verb transitivity language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Transitivity = [ Grammeme.tran, Grammeme.intr ]; /** * Represents verb mood language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Mood = [ Grammeme.indc, Grammeme.impr ]; /** * Represents verb self-inclusion language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Inclusion = [ Grammeme.incl, Grammeme.excl ]; /** * Represents verb voice language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Voice = [ Grammeme.actv, Grammeme.pssv ]; /** * Represents verb tense language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Tense = [ Grammeme.pres, Grammeme.past, Grammeme.futr ]; /** * Represents verb person language concept. * * Implemented as a list of mututally exclusive {@link Grammeme}s. */ export const Person = [ Grammeme.per1, Grammeme.per2, Grammeme.per3 ]; /** * Represents complete list of language concepts. * * Implemented as a list of lists of {@link Grammeme}s. */ export const GrammemeGroups = [ PartOfSpeech, Gender, Case, Plurality, Perfectivity, Transitivity, Mood, Inclusion, Voice, Tense, Person ]; /** * Represents NOUN-ish list of language concepts. * * Represented concepts can be target of inflection or coalition in a sentence. * * Implemented as a list of lists of {@link Grammeme}s. */ export const NounGrams = [ Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS, ...Gender, ...Case, ...Plurality ]; /** * Represents VERB-ish list of language concepts. * * Represented concepts can be target of inflection or coalition in a sentence. * * Implemented as a list of lists of {@link Grammeme}s. */ export const VerbGrams = [ Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS, ...Perfectivity, ...Transitivity, ...Mood, ...Inclusion, ...Voice, ...Tense, ...Person ]; /** * Represents {@link Grammeme} parse data. */ export interface IGramData { type: Grammeme data: string } /** * Represents specific wordform attached to {@link Grammeme}s. */ export interface IWordForm { text: string grams: IGramData[] } /** * Equality comparator for {@link IGramData}. Compares text data for unknown grammemes */ export function matchGrammeme(left: IGramData, right: IGramData): boolean { if (left.type !== right.type) { return false; } return left.type !== Grammeme.UNKN || left.data === right.data; } /** * Equality comparator for {@link IWordForm}. Compares a set of Grammemes attached to wordforms */ export function matchWordForm(left: IWordForm, right: IWordForm): boolean { if (left.grams.length !== right.grams.length) { return false; } for (let index = 0; index < left.grams.length; ++index) { if (!matchGrammeme(left.grams[index], right.grams[index])) { return false; } } return true; } function parseSingleGrammeme(text: string): IGramData { if (Object.values(Grammeme).includes(text as Grammeme)) { return { data: text, type: text as Grammeme } } else { return { data: text, type: Grammeme.UNKN } } } export function sortGrammemes(input: TData[]): TData[] { const result: TData[] = []; Object.values(Grammeme).forEach( gram => { const item = input.find(data => data.type === gram); if (item) { result.push(item); } }); return result; } export function parseGrammemes(termForm: string): IGramData[] { const result: IGramData[] = []; const chunks = termForm.split(','); chunks.forEach(chunk => { chunk = chunk.trim(); if (chunk !== '') { result.push(parseSingleGrammeme(chunk)); } }); return sortGrammemes(result); } // ====== Reference resolution ===== export interface IRefsText { text: string } export enum ReferenceType { ENTITY = 'entity', SYNTACTIC = 'syntax' } export interface IEntityReference { entity: string form: string } export interface ISyntacticReference { offset: number nominal: string } export interface ITextPosition { start: number finish: number } export interface IResolvedReference { type: ReferenceType data: IEntityReference | ISyntacticReference pos_input: ITextPosition pos_output: ITextPosition } export interface IReferenceData { input: string output: string refs: IResolvedReference[] }