ConceptPortal-public/rsconcept/frontend/src/models/language.ts
2023-09-21 23:09:51 +03:00

314 lines
7.1 KiB
TypeScript

// Module: Natural language model declarations.
/**
* Represents single unit of language Morphology.
*/
export enum Grammeme {
// Неизвестная граммема
UNKN = 'UNKN',
// Части речи
NOUN = 'NOUN', ADJF = 'ADJF', ADJS = 'ADJS', COMP = 'COMP',
VERB = 'VERB', INFN = 'INFN', PRTF = 'PRTF', PRTS = 'PRTS',
GRND = 'GRND', NUMR = 'NUMR', ADVB = 'ADVB', NPRO = 'NPRO',
PRED = 'PRED', PREP = 'PREP', CONJ = 'CONJ', PRCL = 'PRCL',
INTJ = 'INTJ',
// Одушевленность
anim = 'anim', inan = 'inan',
// Род
masc = 'masc', femn = 'femn', neut = 'neut',
// Число
sing = 'sing', plur = 'plur',
// Падеж (основные)
nomn = 'nomn', gent = 'gent', datv = 'datv',
accs = 'accs', ablt = 'ablt', loct = 'loct',
// Совершенный / несовершенный вид
perf = 'perf', impf = 'impf',
// Переходность
tran = 'tran', intr = 'intr',
// Время
pres = 'pres', past = 'past', futr = 'futr',
// Лицо
per1 = '1per', per2 = '2per', per3 = '3per',
// Наклонение
indc = 'indc', impr = 'impr',
// Включение говорящего в действие
incl = 'incl', excl = 'excl',
// Залог
actv = 'actv', pssv = 'pssv',
// Стиль речи
Infr = 'Infr', // Неформальный
Slng = 'Slng', // Жаргон
Arch = 'Arch', // Устаревший
Litr = 'Litr', // Литературный
// Аббревиатура
Abbr = 'Abbr'
}
/**
* Represents part of speech language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const PartOfSpeech = [
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS, Grammeme.COMP,
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
Grammeme.GRND, Grammeme.ADVB, Grammeme.NPRO, Grammeme.PRED,
Grammeme.PREP, Grammeme.CONJ, Grammeme.PRCL, Grammeme.INTJ
];
/**
* Represents gender language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Gender = [
Grammeme.masc, Grammeme.femn, Grammeme.neut
];
/**
* Represents case language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Case = [
Grammeme.nomn, Grammeme.gent, Grammeme.datv,
Grammeme.accs, Grammeme.ablt, Grammeme.loct
];
/**
* Represents plurality language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Plurality = [ Grammeme.sing, Grammeme.plur ];
/**
* Represents verb perfectivity language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Perfectivity = [ Grammeme.perf, Grammeme.impf ];
/**
* Represents verb transitivity language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Transitivity = [ Grammeme.tran, Grammeme.intr ];
/**
* Represents verb mood language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Mood = [ Grammeme.indc, Grammeme.impr ];
/**
* Represents verb self-inclusion language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Inclusion = [ Grammeme.incl, Grammeme.excl ];
/**
* Represents verb voice language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Voice = [ Grammeme.actv, Grammeme.pssv ];
/**
* Represents verb tense language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Tense = [
Grammeme.pres,
Grammeme.past,
Grammeme.futr
];
/**
* Represents verb person language concept.
*
* Implemented as a list of mututally exclusive {@link Grammeme}s.
*/
export const Person = [
Grammeme.per1,
Grammeme.per2,
Grammeme.per3
];
/**
* Represents complete list of language concepts.
*
* Implemented as a list of lists of {@link Grammeme}s.
*/
export const GrammemeGroups = [
PartOfSpeech, Gender, Case, Plurality, Perfectivity,
Transitivity, Mood, Inclusion, Voice, Tense, Person
];
/**
* Represents NOUN-ish list of language concepts.
*
* Represented concepts can be target of inflection or coalition in a sentence.
*
* Implemented as a list of lists of {@link Grammeme}s.
*/
export const NounGrams = [
Grammeme.NOUN, Grammeme.ADJF, Grammeme.ADJS,
...Gender,
...Case,
...Plurality
];
/**
* Represents VERB-ish list of language concepts.
*
* Represented concepts can be target of inflection or coalition in a sentence.
*
* Implemented as a list of lists of {@link Grammeme}s.
*/
export const VerbGrams = [
Grammeme.VERB, Grammeme.INFN, Grammeme.PRTF, Grammeme.PRTS,
...Perfectivity,
...Transitivity,
...Mood,
...Inclusion,
...Voice,
...Tense,
...Person
];
/**
* Represents {@link Grammeme} parse data.
*/
export interface IGramData {
type: Grammeme
data: string
}
/**
* Represents specific wordform attached to {@link Grammeme}s.
*/
export interface IWordForm {
text: string
grams: IGramData[]
}
/**
* Equality comparator for {@link IGramData}. Compares text data for unknown grammemes
*/
export function matchGrammeme(left: IGramData, right: IGramData): boolean {
if (left.type !== right.type) {
return false;
}
return left.type !== Grammeme.UNKN || left.data === right.data;
}
/**
* Equality comparator for {@link IWordForm}. Compares a set of Grammemes attached to wordforms
*/
export function matchWordForm(left: IWordForm, right: IWordForm): boolean {
if (left.grams.length !== right.grams.length) {
return false;
}
for (let index = 0; index < left.grams.length; ++index) {
if (!matchGrammeme(left.grams[index], right.grams[index])) {
return false;
}
}
return true;
}
function parseSingleGrammeme(text: string): IGramData {
if (Object.values(Grammeme).includes(text as Grammeme)) {
return {
data: text,
type: text as Grammeme
}
} else {
return {
data: text,
type: Grammeme.UNKN
}
}
}
export function sortGrammemes<TData extends IGramData>(input: TData[]): TData[] {
const result: TData[] = [];
Object.values(Grammeme).forEach(
gram => {
const item = input.find(data => data.type === gram);
if (item) {
result.push(item);
}
});
return result;
}
export function parseGrammemes(termForm: string): IGramData[] {
const result: IGramData[] = [];
const chunks = termForm.split(',');
chunks.forEach(chunk => {
chunk = chunk.trim();
if (chunk !== '') {
result.push(parseSingleGrammeme(chunk));
}
});
return sortGrammemes(result);
}
// ====== Reference resolution =====
export interface IRefsText {
text: string
}
export enum ReferenceType {
ENTITY = 'entity',
SYNTACTIC = 'syntax'
}
export interface IEntityReference {
entity: string
form: string
}
export interface ISyntacticReference {
offset: number
nominal: string
}
export interface ITextPosition {
start: number
finish: number
}
export interface IResolvedReference {
type: ReferenceType
data: IEntityReference | ISyntacticReference
pos_input: ITextPosition
pos_output: ITextPosition
}
export interface IReferenceData {
input: string
output: string
refs: IResolvedReference[]
}