ConceptPortal-public/rsconcept/backend/cctext/tests/manual/TestNotebook.ipynb

95 lines
2.4 KiB
Plaintext
Raw Normal View History

2023-08-16 21:26:59 +03:00
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"\n",
"import pymorphy2\n",
"import cctext as ct\n",
"\n",
"from nltk.stem import SnowballStemmer\n",
"stemmer = SnowballStemmer(language=\"russian\")\n",
"# https://www.langust.ru/rus_gram/rus_gr06.shtml\n",
"# stemmer.stem(\"обеспечение\")\n",
"\n",
"morpho = pymorphy2.MorphAnalyzer()\n",
"parser = ct.RuParser()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"butyavka = morpho.parse('мюсли')\n",
"butyavka[0].lexeme"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"ct.get_all_forms('танк')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# thesaurus = open('tests/data/thesaurus.txt', mode='r', encoding='UTF-8')\n",
"thesaurus = open('tests/data/functions.txt', mode='r', encoding='UTF-8')\n",
"content = [term.strip() for term in thesaurus.readlines()]\n",
"thesaurus.close()\n",
"\n",
"inflect_datv = [ct.inflect(term, 'datv') for term in content]\n",
"inflect_nomn1 = [ct.inflect(term, 'nomn') for term in inflect_datv]\n",
"inflect_gent = [ct.inflect(term, 'gent') for term in content]\n",
"inflect_nomn2 = [ct.inflect(term, 'nomn') for term in inflect_gent]\n",
"\n",
"data = zip(content, inflect_datv, inflect_nomn1, inflect_gent, inflect_nomn2)\n",
"df = pd.DataFrame(data, columns =['Term', 'Datv', 'Inverse1', 'Gent', 'Inverse2'])\n",
"df.to_excel('output.xlsx')"
]
}
],
"metadata": {
"interpreter": {
"hash": "11938c6bc6919ae2720b4d5011047913343b08a43b18698fd82dedb0d4417594"
},
"kernelspec": {
"display_name": "Python 3.9.5 64-bit",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.5"
},
"metadata": {
"interpreter": {
"hash": "b8488343e509b415c98a857491a9b4c90395f9a45992da0bb6102fdf120e22ce"
}
},
"orig_nbformat": 2
},
"nbformat": 4,
"nbformat_minor": 2
}