2024-06-07 20:17:03 +03:00
|
|
|
/**
|
|
|
|
* Module: Natural language model API.
|
|
|
|
*/
|
|
|
|
|
2025-02-11 20:56:11 +03:00
|
|
|
import { labelGrammeme } from '../labels';
|
2025-02-12 21:36:03 +03:00
|
|
|
|
2024-06-07 20:17:03 +03:00
|
|
|
import {
|
|
|
|
GramData,
|
|
|
|
Grammeme,
|
|
|
|
GrammemeGroups,
|
|
|
|
IEntityReference,
|
2025-02-10 01:32:16 +03:00
|
|
|
IGrammemeOption,
|
2025-02-15 15:33:37 +03:00
|
|
|
IReference,
|
2024-06-07 20:17:03 +03:00
|
|
|
ISyntacticReference,
|
|
|
|
IWordForm,
|
|
|
|
NounGrams,
|
2025-02-15 15:33:37 +03:00
|
|
|
ReferenceType,
|
2025-02-10 01:32:16 +03:00
|
|
|
supportedGrammemes,
|
2024-06-07 20:17:03 +03:00
|
|
|
VerbGrams
|
|
|
|
} from './language';
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Equality comparator for {@link IWordForm}. Compares a set of Grammemes attached to wordforms
|
|
|
|
*/
|
|
|
|
export function wordFormEquals(left: IWordForm, right: IWordForm): boolean {
|
|
|
|
if (left.grams.length !== right.grams.length) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
for (let index = 0; index < left.grams.length; ++index) {
|
|
|
|
if (left.grams[index] !== right.grams[index]) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Compares {@link GramData} based on Grammeme enum and alpha order for strings.
|
|
|
|
*/
|
|
|
|
export function grammemeCompare(left: GramData, right: GramData): number {
|
|
|
|
const indexLeft = Object.values(Grammeme).findIndex(gram => gram === (left as Grammeme));
|
|
|
|
const indexRight = Object.values(Grammeme).findIndex(gram => gram === (right as Grammeme));
|
|
|
|
if (indexLeft === -1 && indexRight === -1) {
|
|
|
|
return left.localeCompare(right);
|
|
|
|
} else if (indexLeft === -1 && indexRight !== -1) {
|
|
|
|
return 1;
|
|
|
|
} else if (indexLeft !== -1 && indexRight === -1) {
|
|
|
|
return -1;
|
|
|
|
} else {
|
|
|
|
return indexLeft - indexRight;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Transforms {@link Grammeme} enumeration to {@link GramData}.
|
|
|
|
*/
|
|
|
|
export function parseGrammemes(termForm: string): GramData[] {
|
|
|
|
const result: GramData[] = [];
|
|
|
|
const chunks = termForm.split(',');
|
|
|
|
chunks.forEach(chunk => {
|
|
|
|
const gram = chunk.trim();
|
|
|
|
if (gram !== '') {
|
|
|
|
result.push(gram);
|
|
|
|
}
|
|
|
|
});
|
|
|
|
return result.sort(grammemeCompare);
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Creates a list of compatible {@link Grammeme}s.
|
|
|
|
*/
|
|
|
|
export function getCompatibleGrams(input: Grammeme[]): Grammeme[] {
|
|
|
|
let result: Grammeme[] = [];
|
|
|
|
input.forEach(gram => {
|
|
|
|
if (!result.includes(gram)) {
|
|
|
|
if (NounGrams.includes(gram)) {
|
|
|
|
result.push(...NounGrams);
|
|
|
|
}
|
|
|
|
if (VerbGrams.includes(gram)) {
|
|
|
|
result.push(...VerbGrams);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
|
|
|
|
input.forEach(gram =>
|
|
|
|
GrammemeGroups.forEach(group => {
|
|
|
|
if (group.includes(gram)) {
|
|
|
|
result = result.filter(item => !group.includes(item));
|
|
|
|
}
|
|
|
|
})
|
|
|
|
);
|
|
|
|
|
|
|
|
if (result.length === 0) {
|
|
|
|
return [...new Set<Grammeme>([...VerbGrams, ...NounGrams])];
|
|
|
|
} else {
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Extracts {@link IEntityReference} from string representation.
|
|
|
|
*
|
|
|
|
* @param text - Reference text in a valid pattern. Must fit format '\@\{GLOBAL_ID|GRAMMEMES\}'
|
|
|
|
*/
|
|
|
|
export function parseEntityReference(text: string): IEntityReference {
|
|
|
|
const blocks = text.slice(2, text.length - 1).split('|');
|
|
|
|
return {
|
|
|
|
entity: blocks[0].trim(),
|
|
|
|
form: blocks[1].trim()
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Extracts {@link ISyntacticReference} from string representation.
|
|
|
|
*
|
|
|
|
* @param text - Reference text in a valid pattern. Must fit format '\@\{OFFSET|NOMINAL_FORM\}'
|
|
|
|
*/
|
|
|
|
export function parseSyntacticReference(text: string): ISyntacticReference {
|
|
|
|
const blocks = text.slice(2, text.length - 1).split('|');
|
|
|
|
return {
|
|
|
|
offset: Number(blocks[0].trim()),
|
|
|
|
nominal: blocks[1].trim()
|
|
|
|
};
|
|
|
|
}
|
2025-02-10 01:32:16 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Represents options for {@link Grammeme} selector.
|
|
|
|
*/
|
|
|
|
export const supportedGrammeOptions: IGrammemeOption[] = supportedGrammemes.map(gram => ({
|
|
|
|
value: gram,
|
|
|
|
label: labelGrammeme(gram)
|
|
|
|
}));
|
2025-02-15 15:33:37 +03:00
|
|
|
|
|
|
|
/**
|
|
|
|
* Transforms {@link IReference} to string representation.
|
|
|
|
*/
|
|
|
|
export function referenceToString(ref: IReference): string {
|
|
|
|
switch (ref.type) {
|
|
|
|
case ReferenceType.ENTITY: {
|
|
|
|
const entity = ref.data as IEntityReference;
|
|
|
|
return `@{${entity.entity}|${entity.form}}`;
|
|
|
|
}
|
|
|
|
case ReferenceType.SYNTACTIC: {
|
|
|
|
const syntactic = ref.data as ISyntacticReference;
|
|
|
|
return `@{${syntactic.offset}|${syntactic.nominal}}`;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|