如何将预训练的fastText向量转换为gensim模型

Question

How to convert pretrained fastText vectors to gensim model? 如何将预训练的fastText向量转换为gensim模型？ I need predict_output_word method. 我需要predict_output_word方法。

import gensim from gensim.models import Word2Vec from gensim.models.wrappers import FastText 从gensim.models导入gensim从gensim.models.wrappers导入Word2Vec导入FastText

model_wiki = gensim.models.KeyedVectors.load_word2vec_format("wiki.ru.vec") model3 = Word2Vec(sentences=model_wiki) model_wiki = gensim.models.KeyedVectors.load_word2vec_format（“ wiki.ru.vec”）model3 = Word2Vec（句子= model_wiki）

TypeError Traceback (most recent call last) in ----> 1 model3 = Word2Vec(sentences=model_wiki) # train a model from the corpus ----> 1中的TypeError Traceback（最近一次调用，最近一次调用）1 model3 = Word2Vec（sentences = model_wiki）＃从语料库训练模型

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/word2vec.py in init (self, sentences, corpus_file, size, alpha, window, min_count, max_vocab_size, sample, seed, workers, min_alpha, sg, hs, negative, ns_exponent, cbow_mean, hashfxn, iter, null_word, trim_rule, sorted_vocab, batch_words, compute_loss, callbacks, max_final_vocab) 765 callbacks=callbacks, batch_words=batch_words, trim_rule=trim_rule, sg=sg, alpha=alpha, window=window, 766 seed=seed, hs=hs, negative=negative, cbow_mean=cbow_mean, min_alpha=min_alpha, compute_loss=compute_loss, --> 767 fast_version=FAST_VERSION) 768 769 def _do_train_epoch(self, corpus_file, thread_id, offset, cython_vocab, thread_private_mem, cur_epoch, 〜/ anaconda3 / ENVS /平阳霉素/ lib中/ python3.6 /站点包/ gensim /模型/ word2vec.py中的init（个体经营，句子，corpus_file，大小，α，窗口，min_count，max_vocab_size，样品，种子，工人， min_alpha，sg，hs，负数，ns_exponent，cbow_mean，hashfxn，iter，null_word，trim_rule，sorted_vocab，batch_words，compute_loss，callbacks，max_final_vocab）765回调=回调，batch_words = batch_words，trim_rule = trim，trim = rule = trim ，窗口=窗口，766种子=种子，hs = hs，负=负，cbow_mean = cbow_mean，min_alpha = min_alpha，compute_loss = compute_loss，-> 767 fast_version = FAST_VERSION）768769 def _do_train_epoch（self，corpus_file，thread_id，，cython_vocab，thread_private_mem，cur_epoch，

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/base_any2vec.py in init (self, sentences, corpus_file, workers, vector_size, epochs, callbacks, batch_words, trim_rule, sg, alpha, window, seed, hs, negative, ns_exponent, cbow_mean, min_alpha, compute_loss, fast_version, **kwargs) 757 raise TypeError("You can't pass a generator as the sentences argument. Try an iterator.") 758 --> 759 self.build_vocab(sentences=sentences, corpus_file=corpus_file, trim_rule=trim_rule) 760 self.train( 761 sentences=sentences, corpus_file=corpus_file, total_examples=self.corpus_count, 〜/ anaconda3 / ENVS /平阳霉素/ lib中/ python3.6 /站点包/ gensim /模型/ base_any2vec.py中的init（个体经营，句子，corpus_file，工人，vector_size，时代，回调，batch_words，trim_rule，SG，α，窗口，种子，hs，负数，ns_exponent，cbow_mean，min_alpha，compute_loss，fast_version，** kwargs）757提高TypeError（“您不能将生成器作为句子参数传递。请尝试使用迭代器。”）758-> 759 self.build_vocab（句子=句子，corpus_file = corpus_file，trim_rule = trim_rule）760 self.train（761句子=句子，corpus_file = corpus_file，total_examples = self.corpus_count，

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/base_any2vec.py in build_vocab(self, sentences, corpus_file, update, progress_per, keep_raw_vocab, trim_rule, **kwargs) 934 """ 935 total_words, corpus_count = self.vocabulary.scan_vocab( --> 936 sentences=sentences, corpus_file=corpus_file, progress_per=progress_per, trim_rule=trim_rule) 937 self.corpus_count = corpus_count 938 self.corpus_total_words = total_words 〜/ anaconda3 / envs / pym / lib / python3.6 / site-packages / gensim / models / base_any2vec.py在build_vocab中（自己，句子，语料库文件，更新，progress_per，keep_raw_vocab，trim_rule，** kwargs）934“” 935 total_words，corpus_count = self.vocabulary.scan_vocab（-> 936句子=句子，corpus_file = corpus_file，progress_per = progress_per，trim_rule = trim_rule）937 self.corpus_count = corpus_count 938 self.corpus_total_words = total_

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/word2vec.py in scan_vocab(self, sentences, corpus_file, progress_per, workers, trim_rule) 1569 sentences = LineSentence(corpus_file) 〜/ anaconda3 / envs / pym / lib / python3.6 / site-packages / gensim / models / word2vec.py在scan_vocab中（自己，句子，corpus_file，progress_per，worker，trim_rule）1569句子= LineSentence（corpus_file）
1570 -> 1571 total_words, corpus_count = self._scan_vocab(sentences, progress_per, trim_rule) 1572 1573 logger.info( 1570-> 1571 total_words，corpus_count = self._scan_vocab（句子，progress_per，trim_rule）1572 1573 logger.info（

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/word2vec.py in _scan_vocab(self, sentences, progress_per, trim_rule) 1538 〜/ anaconda3 / envs / pym / lib / python3.6 / site-packages / gensim / models / word2vec.py in _scan_vocab（自己，句子，progress_per，trim_rule）1538
vocab = defaultdict(int) 1539 checked_string_types = 0 -> 1540 for sentence_no, sentence in enumerate(sentences): 1541 if not checked_string_types: 1542 vocab = defaultdict（int）1539 Checked_string_types = 0-> 1540 for句子_否，枚举（句子）中的句子：1541，如果未选中，则字符串_类型：1542
if isinstance(sentence, string_types): 如果isinstance（sentence，string_types）：

~/anaconda3/envs/pym/lib/python3.6/site-packages/gensim/models/keyedvectors.py in getitem (self, entities) 337 return self.get_vector(entities) 338 --> 339 return vstack([self.get_vector(entity) for entity in entities]) 340 341 def contains (self, entity): 〜/ anaconda3 / envs / pym / lib / python3.6 / site-packages / gensim / models / keyedvectors.py in getitem （self，entities）337返回self.get_vector（entities）338-> 339 return vstack（[self实体中实体的.get_vector（entity）]）340 341 def 包含（自身，实体）：

TypeError: 'int' object is not iterable TypeError：“ int”对象不可迭代

Answer 1

According to Gensim docs, you can use gensim.models.wrappers function to: 根据Gensim文档，您可以使用gensim.models.wrappers函数执行以下操作：

Load the input-hidden weight matrix from Facebook's native fasttext .bin and .vec output files 从Facebook的本地fasttext .bin和.vec输出文件加载隐藏输入的权重矩阵

Here is an example: 这是一个例子：

from gensim.models.wrappers import FastText

model = FastText.load_fasttext_format('wiki.vec')

如何将预训练的fastText向量转换为gensim模型

问题描述

1 个解决方案

解决方案1
0 2018-12-21 14:16:01

如何将预训练的fastText向量转换为gensim模型

问题描述

1 个解决方案

解决方案1 0 2018-12-21 14:16:01

解决方案1
0 2018-12-21 14:16:01