# -*- coding: utf-8 -*- """ Unit tests for nltk.corpus.wordnet See also nltk/test/wordnet.doctest """ from __future__ import unicode_literals from nose import SkipTest import unittest import os from nltk.corpus.reader.wordnet import WordNetCorpusReader from nltk.corpus import wordnet as wn from nltk.corpus import wordnet_ic as wnic from nltk.data import find as find_data wn.ensure_loaded() S = wn.synset L = wn.lemma class WordnNetDemo(unittest.TestCase): def test_retrieve_synset(self): move_synset = S('go.v.21') self.assertEqual(move_synset.name(), "move.v.15") self.assertEqual(move_synset.lemma_names(), ['move', 'go']) self.assertEqual(move_synset.definition(), "have a turn; make one's move in a game") self.assertEqual(move_synset.examples(), ['Can I go now?']) def test_retrieve_synsets(self): self.assertEqual(sorted(wn.synsets('zap', pos='n')), [S('zap.n.01')]) self.assertEqual(sorted(wn.synsets('zap', pos='v')), [S('microwave.v.01'), S('nuke.v.01'), S('zap.v.01'), S('zap.v.02')]) def test_hyperhyponyms(self): # Not every synset as hypernyms() self.assertEqual(S('travel.v.01').hypernyms(), []) self.assertEqual(S('travel.v.02').hypernyms(), [S('travel.v.03')]) self.assertEqual(S('travel.v.03').hypernyms(), []) # Test hyper-/hyponyms. self.assertEqual(S('breakfast.n.1').hypernyms(), [S('meal.n.01')]) first_five_meal_hypo = [S('banquet.n.02'), S('bite.n.04'), S('breakfast.n.01'), S('brunch.n.01'), S('buffet.n.02')] self.assertEqual(sorted(S('meal.n.1').hyponyms()[:5]), first_five_meal_hypo) self.assertEqual(S('Austen.n.1').instance_hypernyms(), [S('writer.n.01')]) first_five_composer_hypo = [S('ambrose.n.01'), S('bach.n.01'), S('barber.n.01'), S('bartok.n.01'), S('beethoven.n.01')] self.assertEqual(S('composer.n.1').instance_hyponyms()[:5], first_five_composer_hypo) # Test root hyper-/hyponyms self.assertEqual(S('person.n.01').root_hypernyms(), [S('entity.n.01')]) self.assertEqual(S('sail.v.01').root_hypernyms(), [S('travel.v.01')]) self.assertEqual(S('fall.v.12').root_hypernyms(), [S('act.v.01'), S('fall.v.17')]) def test_derivationally_related_forms(self): # Test `derivationally_related_forms()` self.assertEqual(L('zap.v.03.nuke').derivationally_related_forms(), [L('atomic_warhead.n.01.nuke')]) self.assertEqual(L('zap.v.03.atomize').derivationally_related_forms(), [L('atomization.n.02.atomization')]) self.assertEqual(L('zap.v.03.atomise').derivationally_related_forms(), [L('atomization.n.02.atomisation')]) self.assertEqual(L('zap.v.03.zap').derivationally_related_forms(), []) def test_meronyms_holonyms(self): # Test meronyms, holonyms. self.assertEqual(S('dog.n.01').member_holonyms(), [S('canis.n.01'), S('pack.n.06')]) self.assertEqual(S('dog.n.01').part_meronyms(), [S('flag.n.07')]) self.assertEqual(S('faculty.n.2').member_meronyms(), [S('professor.n.01')]) self.assertEqual(S('copilot.n.1').member_holonyms(), [S('crew.n.01')]) self.assertEqual(S('table.n.2').part_meronyms(), [S('leg.n.03'), S('tabletop.n.01'), S('tableware.n.01')]) self.assertEqual(S('course.n.7').part_holonyms(), [S('meal.n.01')]) self.assertEqual(S('water.n.1').substance_meronyms(), [S('hydrogen.n.01'), S('oxygen.n.01')]) self.assertEqual(S('gin.n.1').substance_holonyms(), [S('gin_and_it.n.01'), S('gin_and_tonic.n.01'), S('martini.n.01'), S('pink_lady.n.01')]) def test_antonyms(self): # Test antonyms. self.assertEqual(L('leader.n.1.leader').antonyms(), [L('follower.n.01.follower')]) self.assertEqual(L('increase.v.1.increase').antonyms(), [L('decrease.v.01.decrease')]) def test_misc_relations(self): # Test misc relations. self.assertEqual(S('snore.v.1').entailments(), [S('sleep.v.01')]) self.assertEqual(S('heavy.a.1').similar_tos(), [S('dense.s.03'), S('doughy.s.01'), S('heavier-than-air.s.01'), S('hefty.s.02'), S('massive.s.04'), S('non-buoyant.s.01'), S('ponderous.s.02')]) self.assertEqual(S('light.a.1').attributes(), [S('weight.n.01')]) self.assertEqual(S('heavy.a.1').attributes(), [S('weight.n.01')]) # Test pertainyms. self.assertEqual(L('English.a.1.English').pertainyms(), [L('england.n.01.England')]) def test_lch(self): # Test LCH. self.assertEqual(S('person.n.01').lowest_common_hypernyms(S('dog.n.01')), [S('organism.n.01')]) self.assertEqual(S('woman.n.01').lowest_common_hypernyms(S('girlfriend.n.02')), [S('woman.n.01')]) def test_domains(self): # Test domains. self.assertEqual(S('code.n.03').topic_domains(), [S('computer_science.n.01')]) self.assertEqual(S('pukka.a.01').region_domains(), [S('india.n.01')]) self.assertEqual(S('freaky.a.01').usage_domains(), [S('slang.n.02')]) def test_wordnet_similarities(self): # Path based similarities. self.assertAlmostEqual(S('cat.n.01').path_similarity(S('cat.n.01')), 1.0) self.assertAlmostEqual(S('dog.n.01').path_similarity(S('cat.n.01')), 0.2) self.assertAlmostEqual(S('dog.n.01').lch_similarity(S('cat.n.01')), 2.028, places=3) self.assertAlmostEqual(S('dog.n.01').wup_similarity(S('cat.n.01')), 0.8571, places=3) # Information Content similarities. brown_ic = wnic.ic('ic-brown.dat') self.assertAlmostEqual(S('dog.n.01').jcn_similarity(S('cat.n.01'), brown_ic), 0.4497, places=3) semcor_ic = wnic.ic('ic-semcor.dat') self.assertAlmostEqual(S('dog.n.01').lin_similarity(S('cat.n.01'), semcor_ic), 0.8863, places=3)