42 lines
878 B
Plaintext
42 lines
878 B
Plaintext
https://github.com/natasha/natasha
|
|
|
|
====== install =========
|
|
pip install natasha
|
|
wget https://storage.yandexcloud.net/natasha-navec/packs/navec_hudlit_v1_12B_500K_300d_100q.tar
|
|
wget https://storage.yandexcloud.net/natasha-navec/packs/navec_news_v1_1B_250K_300d_100q.tar
|
|
|
|
|
|
======= config ========
|
|
from natasha import (
|
|
Segmenter,
|
|
MorphVocab,
|
|
|
|
NewsEmbedding,
|
|
NewsMorphTagger,
|
|
NewsSyntaxParser,
|
|
NewsNERTagger,
|
|
|
|
PER,
|
|
NamesExtractor,
|
|
|
|
Doc
|
|
)
|
|
|
|
segmenter = Segmenter()
|
|
morph_vocab = MorphVocab()
|
|
|
|
emb = NewsEmbedding()
|
|
morph_tagger = NewsMorphTagger(emb)
|
|
syntax_parser = NewsSyntaxParser(emb)
|
|
ner_tagger = NewsNERTagger(emb)
|
|
|
|
names_extractor = NamesExtractor(morph_vocab)
|
|
|
|
|
|
============= use ==============
|
|
text = 'Мама мыла раму'
|
|
doc = Doc(text)
|
|
|
|
doc.segment(segmenter)
|
|
doc.parse_syntax(syntax_parser)
|
|
doc.sents[0].syntax.print() |