2023-09-11 20:31:54 +03:00
|
|
|
// Module: Natural language model declarations.
|
|
|
|
|
2023-09-14 16:53:38 +03:00
|
|
|
|
|
|
|
// ====== Morphology ========
|
2023-09-19 17:55:17 +03:00
|
|
|
export enum Grammeme {
|
2023-09-21 14:58:01 +03:00
|
|
|
// Неизвестная граммема
|
|
|
|
UNKN = 'UNKN',
|
|
|
|
|
2023-09-14 16:53:38 +03:00
|
|
|
// Части речи
|
|
|
|
NOUN = 'NOUN',
|
|
|
|
ADJF = 'ADJF',
|
|
|
|
ADJS = 'ADJS',
|
|
|
|
COMP = 'COMP',
|
|
|
|
VERB = 'VERB',
|
|
|
|
INFN = 'INFN',
|
|
|
|
PRTF = 'PRTF',
|
|
|
|
PRTS = 'PRTS',
|
|
|
|
GRND = 'GRND',
|
|
|
|
NUMR = 'NUMR',
|
|
|
|
ADVB = 'ADVB',
|
|
|
|
NPRO = 'NPRO',
|
|
|
|
PRED = 'PRED',
|
|
|
|
PREP = 'PREP',
|
|
|
|
CONJ = 'CONJ',
|
|
|
|
PRCL = 'PRCL',
|
|
|
|
INTJ = 'INTJ',
|
|
|
|
|
|
|
|
// Одушевленность
|
|
|
|
anim = 'anim',
|
|
|
|
inan = 'inan',
|
|
|
|
|
|
|
|
// Род
|
|
|
|
masc = 'masc',
|
|
|
|
femn = 'femn',
|
|
|
|
neut = 'neut',
|
|
|
|
|
|
|
|
// Число
|
|
|
|
sing = 'sing',
|
|
|
|
plur = 'plur',
|
|
|
|
|
|
|
|
// Падеж (основные)
|
|
|
|
nomn = 'nomn',
|
|
|
|
gent = 'gent',
|
|
|
|
datv = 'datv',
|
|
|
|
accs = 'accs',
|
|
|
|
ablt = 'ablt',
|
|
|
|
loct = 'loct',
|
|
|
|
|
|
|
|
// Совершенный / несовершенный вид
|
|
|
|
perf = 'perf',
|
|
|
|
impf = 'impf',
|
|
|
|
|
|
|
|
// Переходность
|
|
|
|
tran = 'tran',
|
|
|
|
intr = 'intr',
|
|
|
|
|
|
|
|
// Время
|
|
|
|
pres = 'pres',
|
|
|
|
past = 'past',
|
|
|
|
futr = 'futr',
|
|
|
|
|
|
|
|
// Лицо
|
|
|
|
per1 = '1per',
|
|
|
|
per2 = '2per',
|
|
|
|
per3 = '3per',
|
|
|
|
|
|
|
|
// Наклонение
|
|
|
|
indc = 'indc',
|
|
|
|
impr = 'impr',
|
|
|
|
|
|
|
|
// Включение говорящего в действие
|
|
|
|
incl = 'incl',
|
|
|
|
excl = 'excl',
|
|
|
|
|
|
|
|
// Залог
|
|
|
|
actv = 'actv',
|
|
|
|
pssv = 'pssv',
|
|
|
|
|
|
|
|
// Стиль речи
|
|
|
|
Infr = 'Infr', // Неформальный
|
|
|
|
Slng = 'Slng', // Жаргон
|
|
|
|
Arch = 'Arch', // Устаревший
|
|
|
|
Litr = 'Litr', // Литературный
|
|
|
|
|
|
|
|
// Аббревиатура
|
|
|
|
Abbr = 'Abbr'
|
|
|
|
}
|
|
|
|
|
|
|
|
export const PartOfSpeech = [
|
2023-09-19 17:55:17 +03:00
|
|
|
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS, Grammeme.COMP,
|
|
|
|
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
|
|
|
|
Grammeme.GRND, Grammeme.ADVB, Grammeme.NPRO, Grammeme.PRED,
|
2023-09-21 14:58:01 +03:00
|
|
|
Grammeme.PREP, Grammeme.CONJ, Grammeme.PRCL, Grammeme.INTJ
|
2023-09-19 17:55:17 +03:00
|
|
|
];
|
2023-09-14 16:53:38 +03:00
|
|
|
|
|
|
|
export const Gender = [
|
2023-09-19 17:55:17 +03:00
|
|
|
Grammeme.masc, Grammeme.femn, Grammeme.neut
|
|
|
|
];
|
2023-09-14 16:53:38 +03:00
|
|
|
|
|
|
|
export const Case = [
|
2023-09-19 17:55:17 +03:00
|
|
|
Grammeme.nomn, Grammeme.gent, Grammeme.datv,
|
|
|
|
Grammeme.accs, Grammeme.ablt, Grammeme.loct
|
|
|
|
];
|
|
|
|
|
2023-09-21 14:58:01 +03:00
|
|
|
export const Plurality = [ Grammeme.sing, Grammeme.plur ];
|
|
|
|
|
|
|
|
export const Perfectivity = [ Grammeme.perf, Grammeme.impf ];
|
|
|
|
export const Transitivity = [ Grammeme.tran, Grammeme.intr ];
|
|
|
|
export const Mood = [ Grammeme.indc, Grammeme.impr ];
|
|
|
|
export const Inclusion = [ Grammeme.incl, Grammeme.excl ];
|
|
|
|
export const Voice = [ Grammeme.actv, Grammeme.pssv ];
|
|
|
|
|
|
|
|
export const Tense = [
|
|
|
|
Grammeme.pres,
|
|
|
|
Grammeme.past,
|
|
|
|
Grammeme.futr
|
|
|
|
];
|
|
|
|
|
|
|
|
export const Person = [
|
|
|
|
Grammeme.per1,
|
|
|
|
Grammeme.per2,
|
|
|
|
Grammeme.per3
|
|
|
|
];
|
|
|
|
|
|
|
|
export const GrammemeGroups = [
|
|
|
|
PartOfSpeech, Gender, Case, Plurality, Perfectivity,
|
|
|
|
Transitivity, Mood, Inclusion, Voice, Tense, Person
|
|
|
|
];
|
|
|
|
|
|
|
|
export const NounGrams = [
|
|
|
|
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS,
|
|
|
|
...Gender,
|
|
|
|
...Case,
|
|
|
|
...Plurality
|
|
|
|
];
|
|
|
|
|
|
|
|
export const VerbGrams = [
|
|
|
|
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
|
|
|
|
...Perfectivity,
|
|
|
|
...Transitivity,
|
|
|
|
...Mood,
|
|
|
|
...Inclusion,
|
|
|
|
...Voice,
|
|
|
|
...Tense,
|
|
|
|
...Person
|
|
|
|
];
|
|
|
|
|
|
|
|
// Grammeme parse data
|
|
|
|
export interface IGramData {
|
|
|
|
type: Grammeme
|
|
|
|
data: string
|
|
|
|
}
|
|
|
|
|
|
|
|
// Equality comparator for IGramData
|
|
|
|
export function matchGrammeme(value: IGramData, test: IGramData): boolean {
|
|
|
|
if (value.type !== test.type) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
return value.type !== Grammeme.UNKN || value.data === test.data;
|
|
|
|
}
|
|
|
|
|
|
|
|
function parseSingleGrammeme(text: string): IGramData {
|
|
|
|
if (Object.values(Grammeme).includes(text as Grammeme)) {
|
|
|
|
return {
|
|
|
|
data: text,
|
|
|
|
type: text as Grammeme
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return {
|
|
|
|
data: text,
|
|
|
|
type: Grammeme.UNKN
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
export function parseGrammemes(termForm: string): IGramData[] {
|
|
|
|
const result: IGramData[] = [];
|
|
|
|
const chunks = termForm.split(',');
|
|
|
|
chunks.forEach(chunk => {
|
|
|
|
chunk = chunk.trim();
|
|
|
|
if (chunk !== '') {
|
|
|
|
result.push(parseSingleGrammeme(chunk));
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface IWordForm {
|
|
|
|
text: string
|
|
|
|
grams: IGramData[]
|
|
|
|
}
|
|
|
|
|
2023-09-19 17:55:17 +03:00
|
|
|
// ====== Reference resolution =====
|
|
|
|
export interface IRefsText {
|
|
|
|
text: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export enum ReferenceType {
|
|
|
|
ENTITY = 'entity',
|
|
|
|
SYNTACTIC = 'syntax'
|
|
|
|
}
|
2023-09-21 14:58:01 +03:00
|
|
|
|
2023-09-19 17:55:17 +03:00
|
|
|
export interface IEntityReference {
|
|
|
|
entity: string
|
|
|
|
form: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface ISyntacticReference {
|
|
|
|
offset: number
|
|
|
|
nominal: string
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface ITextPosition {
|
|
|
|
start: number
|
|
|
|
finish: number
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface IResolvedReference {
|
|
|
|
type: ReferenceType
|
|
|
|
data: IEntityReference | ISyntacticReference
|
|
|
|
pos_input: ITextPosition
|
|
|
|
pos_output: ITextPosition
|
|
|
|
}
|
|
|
|
|
|
|
|
export interface IReferenceData {
|
|
|
|
input: string
|
|
|
|
output: string
|
|
|
|
refs: IResolvedReference[]
|
|
|
|
}
|