42 lines
878 B
Plaintext
42 lines
878 B
Plaintext
![]() |
https://github.com/natasha/natasha
|
||
|
|
||
|
====== install =========
|
||
|
pip install natasha
|
||
|
wget https://storage.yandexcloud.net/natasha-navec/packs/navec_hudlit_v1_12B_500K_300d_100q.tar
|
||
|
wget https://storage.yandexcloud.net/natasha-navec/packs/navec_news_v1_1B_250K_300d_100q.tar
|
||
|
|
||
|
|
||
|
======= config ========
|
||
|
from natasha import (
|
||
|
Segmenter,
|
||
|
MorphVocab,
|
||
|
|
||
|
NewsEmbedding,
|
||
|
NewsMorphTagger,
|
||
|
NewsSyntaxParser,
|
||
|
NewsNERTagger,
|
||
|
|
||
|
PER,
|
||
|
NamesExtractor,
|
||
|
|
||
|
Doc
|
||
|
)
|
||
|
|
||
|
segmenter = Segmenter()
|
||
|
morph_vocab = MorphVocab()
|
||
|
|
||
|
emb = NewsEmbedding()
|
||
|
morph_tagger = NewsMorphTagger(emb)
|
||
|
syntax_parser = NewsSyntaxParser(emb)
|
||
|
ner_tagger = NewsNERTagger(emb)
|
||
|
|
||
|
names_extractor = NamesExtractor(morph_vocab)
|
||
|
|
||
|
|
||
|
============= use ==============
|
||
|
text = 'Мама мыла раму'
|
||
|
doc = Doc(text)
|
||
|
|
||
|
doc.segment(segmenter)
|
||
|
doc.parse_syntax(syntax_parser)
|
||
|
doc.sents[0].syntax.print()
|