mirror of
https://github.com/IRBorisov/ConceptPortal.git
synced 2025-06-26 13:00:39 +03:00
95 lines
2.4 KiB
Plaintext
95 lines
2.4 KiB
Plaintext
![]() |
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"\n",
|
||
|
"import pymorphy2\n",
|
||
|
"import cctext as ct\n",
|
||
|
"\n",
|
||
|
"from nltk.stem import SnowballStemmer\n",
|
||
|
"stemmer = SnowballStemmer(language=\"russian\")\n",
|
||
|
"# https://www.langust.ru/rus_gram/rus_gr06.shtml\n",
|
||
|
"# stemmer.stem(\"обеспечение\")\n",
|
||
|
"\n",
|
||
|
"morpho = pymorphy2.MorphAnalyzer()\n",
|
||
|
"parser = ct.RuParser()"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"butyavka = morpho.parse('мюсли')\n",
|
||
|
"butyavka[0].lexeme"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"ct.get_all_forms('танк')"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": null,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"# thesaurus = open('tests/data/thesaurus.txt', mode='r', encoding='UTF-8')\n",
|
||
|
"thesaurus = open('tests/data/functions.txt', mode='r', encoding='UTF-8')\n",
|
||
|
"content = [term.strip() for term in thesaurus.readlines()]\n",
|
||
|
"thesaurus.close()\n",
|
||
|
"\n",
|
||
|
"inflect_datv = [ct.inflect(term, 'datv') for term in content]\n",
|
||
|
"inflect_nomn1 = [ct.inflect(term, 'nomn') for term in inflect_datv]\n",
|
||
|
"inflect_gent = [ct.inflect(term, 'gent') for term in content]\n",
|
||
|
"inflect_nomn2 = [ct.inflect(term, 'nomn') for term in inflect_gent]\n",
|
||
|
"\n",
|
||
|
"data = zip(content, inflect_datv, inflect_nomn1, inflect_gent, inflect_nomn2)\n",
|
||
|
"df = pd.DataFrame(data, columns =['Term', 'Datv', 'Inverse1', 'Gent', 'Inverse2'])\n",
|
||
|
"df.to_excel('output.xlsx')"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"interpreter": {
|
||
|
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
|
||
|
},
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3.9.5 64-bit",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"language_info": {
|
||
|
"codemirror_mode": {
|
||
|
"name": "ipython",
|
||
|
"version": 3
|
||
|
},
|
||
|
"file_extension": ".py",
|
||
|
"mimetype": "text/x-python",
|
||
|
"name": "python",
|
||
|
"nbconvert_exporter": "python",
|
||
|
"pygments_lexer": "ipython3",
|
||
|
"version": "3.9.5"
|
||
|
},
|
||
|
"metadata": {
|
||
|
"interpreter": {
|
||
|
"hash": "b8488343e509b415c98a857491a9b4c90395f9a45992da0bb6102fdf120e22ce"
|
||
|
}
|
||
|
},
|
||
|
"orig_nbformat": 2
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|