ConceptPortal-public/rsconcept/frontend/src/models/language.ts

230 lines
4.5 KiB
TypeScript
Raw Normal View History

2023-09-11 20:31:54 +03:00
// Module: Natural language model declarations.
2023-09-14 16:53:38 +03:00
// ====== Morphology ========
2023-09-19 17:55:17 +03:00
export enum Grammeme {
2023-09-21 14:58:01 +03:00
// Неизвестная граммема
UNKN = 'UNKN',
2023-09-14 16:53:38 +03:00
// Части речи
NOUN = 'NOUN',
ADJF = 'ADJF',
ADJS = 'ADJS',
COMP = 'COMP',
VERB = 'VERB',
INFN = 'INFN',
PRTF = 'PRTF',
PRTS = 'PRTS',
GRND = 'GRND',
NUMR = 'NUMR',
ADVB = 'ADVB',
NPRO = 'NPRO',
PRED = 'PRED',
PREP = 'PREP',
CONJ = 'CONJ',
PRCL = 'PRCL',
INTJ = 'INTJ',
// Одушевленность
anim = 'anim',
inan = 'inan',
// Род
masc = 'masc',
femn = 'femn',
neut = 'neut',
// Число
sing = 'sing',
plur = 'plur',
// Падеж (основные)
nomn = 'nomn',
gent = 'gent',
datv = 'datv',
accs = 'accs',
ablt = 'ablt',
loct = 'loct',
// Совершенный / несовершенный вид
perf = 'perf',
impf = 'impf',
// Переходность
tran = 'tran',
intr = 'intr',
// Время
pres = 'pres',
past = 'past',
futr = 'futr',
// Лицо
per1 = '1per',
per2 = '2per',
per3 = '3per',
// Наклонение
indc = 'indc',
impr = 'impr',
// Включение говорящего в действие
incl = 'incl',
excl = 'excl',
// Залог
actv = 'actv',
pssv = 'pssv',
// Стиль речи
Infr = 'Infr', // Неформальный
Slng = 'Slng', // Жаргон
Arch = 'Arch', // Устаревший
Litr = 'Litr', // Литературный
// Аббревиатура
Abbr = 'Abbr'
}
export const PartOfSpeech = [
2023-09-19 17:55:17 +03:00
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS, Grammeme.COMP,
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
Grammeme.GRND, Grammeme.ADVB, Grammeme.NPRO, Grammeme.PRED,
2023-09-21 14:58:01 +03:00
Grammeme.PREP, Grammeme.CONJ, Grammeme.PRCL, Grammeme.INTJ
2023-09-19 17:55:17 +03:00
];
2023-09-14 16:53:38 +03:00
export const Gender = [
2023-09-19 17:55:17 +03:00
Grammeme.masc, Grammeme.femn, Grammeme.neut
];
2023-09-14 16:53:38 +03:00
export const Case = [
2023-09-19 17:55:17 +03:00
Grammeme.nomn, Grammeme.gent, Grammeme.datv,
Grammeme.accs, Grammeme.ablt, Grammeme.loct
];
2023-09-21 14:58:01 +03:00
export const Plurality = [ Grammeme.sing, Grammeme.plur ];
export const Perfectivity = [ Grammeme.perf, Grammeme.impf ];
export const Transitivity = [ Grammeme.tran, Grammeme.intr ];
export const Mood = [ Grammeme.indc, Grammeme.impr ];
export const Inclusion = [ Grammeme.incl, Grammeme.excl ];
export const Voice = [ Grammeme.actv, Grammeme.pssv ];
export const Tense = [
Grammeme.pres,
Grammeme.past,
Grammeme.futr
];
export const Person = [
Grammeme.per1,
Grammeme.per2,
Grammeme.per3
];
export const GrammemeGroups = [
PartOfSpeech, Gender, Case, Plurality, Perfectivity,
Transitivity, Mood, Inclusion, Voice, Tense, Person
];
export const NounGrams = [
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS,
...Gender,
...Case,
...Plurality
];
export const VerbGrams = [
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
...Perfectivity,
...Transitivity,
...Mood,
...Inclusion,
...Voice,
...Tense,
...Person
];
// Grammeme parse data
export interface IGramData {
type: Grammeme
data: string
}
// Equality comparator for IGramData
export function matchGrammeme(value: IGramData, test: IGramData): boolean {
if (value.type !== test.type) {
return false;
}
return value.type !== Grammeme.UNKN || value.data === test.data;
}
function parseSingleGrammeme(text: string): IGramData {
if (Object.values(Grammeme).includes(text as Grammeme)) {
return {
data: text,
type: text as Grammeme
}
} else {
return {
data: text,
type: Grammeme.UNKN
}
}
}
export function parseGrammemes(termForm: string): IGramData[] {
const result: IGramData[] = [];
const chunks = termForm.split(',');
chunks.forEach(chunk => {
chunk = chunk.trim();
if (chunk !== '') {
result.push(parseSingleGrammeme(chunk));
}
});
return result;
}
export interface IWordForm {
text: string
grams: IGramData[]
}
2023-09-19 17:55:17 +03:00
// ====== Reference resolution =====
export interface IRefsText {
text: string
}
export enum ReferenceType {
ENTITY = 'entity',
SYNTACTIC = 'syntax'
}
2023-09-21 14:58:01 +03:00
2023-09-19 17:55:17 +03:00
export interface IEntityReference {
entity: string
form: string
}
export interface ISyntacticReference {
offset: number
nominal: string
}
export interface ITextPosition {
start: number
finish: number
}
export interface IResolvedReference {
type: ReferenceType
data: IEntityReference | ISyntacticReference
pos_input: ITextPosition
pos_output: ITextPosition
}
export interface IReferenceData {
input: string
output: string
refs: IResolvedReference[]
}