In [34]:
from gensim.models import word2vec
import gensim
In [35]:
model = word2vec.Word2Vec.load_word2vec_format('GoogleNews-vectors-negative300.bin', binary=True)
In [37]:
model.most_similar(positive=['ipad'], negative=['screen'])
Out[37]:
[(u'Iphone', 0.4428732395172119),
 (u'iphone', 0.4421612024307251),
 (u'iPad2', 0.4012947678565979),
 (u'unlock_iphone', 0.3873326778411865),
 (u'apple_iphone', 0.37293773889541626),
 (u'3gs', 0.3723487854003906),
 (u'Ipad', 0.37108615040779114),
 (u'iphone_3gs', 0.36967432498931885),
 (u'itz', 0.36776965856552124),
 (u'xmas', 0.36486053466796875)]
In [38]:
model.doesnt_match("kimchi mandu stake ramyeon kimbap".split())
Out[38]:
'stake'
In [40]:
model.similarity('ora', 'muda')
Out[40]:
0.38961832366855831