diff --git a/results/readable-format/model_comparison_boxplot_20ng.pdf b/results/readable-format/model_comparison_boxplot_20ng.pdf new file mode 100644 index 0000000000000000000000000000000000000000..94a04c98afb4b167027da72e0fc2ff3db1ebb217 Binary files /dev/null and b/results/readable-format/model_comparison_boxplot_20ng.pdf differ diff --git a/results/readable-format/model_comparison_boxplot_wiki.pdf b/results/readable-format/model_comparison_boxplot_wiki.pdf new file mode 100644 index 0000000000000000000000000000000000000000..6aadece50854af0a796d096ef1ceb5b81c0da1f2 Binary files /dev/null and b/results/readable-format/model_comparison_boxplot_wiki.pdf differ diff --git a/results/readable-format/npmis_dvae-20ng.txt b/results/readable-format/npmis_dvae-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..de703b065d19990ac28caa4cf2048a855f57ee43 --- /dev/null +++ b/results/readable-format/npmis_dvae-20ng.txt @@ -0,0 +1,50 @@ +0.24867697774022374 +0.3396118005925878 +0.29970309841727477 +0.23866633667824944 +0.3699640259380764 +0.42240324296068005 +0.8874487186560454 +0.128691716880522 +0.37589863667431506 +0.6238240724850099 +0.17956630920866357 +0.29258062003294744 +0.3877803368148381 +0.33284345912394303 +0.17428405447076523 +0.4708279864428822 +0.33234795901646436 +0.2659239621234882 +0.4472987543185158 +0.18977108703994489 +0.21801183330846072 +0.45164395443930255 +0.5162468786823781 +0.4219132846940917 +0.37711305638174364 +0.5037645757835705 +0.36761746934438233 +0.6534066162568825 +0.09283550832829766 +0.2860944139426903 +0.3716574301502446 +0.10686569980483274 +0.47924113528456774 +0.3303952745347445 +0.42508422599921253 +0.41984768456307325 +0.40716769678535675 +0.408620814860296 +0.11413587466631586 +0.12248643588268114 +0.23690636746040614 +0.4823283121857407 +0.186655726461628 +0.6961456071854104 +0.45522040363986493 +0.5989490902099036 +0.49535041831334065 +0.5920919493249797 +0.769231707047005 +0.4833977306079935 diff --git a/results/readable-format/npmis_dvae-wiki.txt b/results/readable-format/npmis_dvae-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..83dd70f839ecb6b19f6b7521f88d638c91818902 --- /dev/null +++ b/results/readable-format/npmis_dvae-wiki.txt @@ -0,0 +1,50 @@ +0.7217955074040513 +0.8449497046071232 +0.2664973793660157 +0.41015537870684277 +0.44793629011265856 +0.5068716250918338 +0.5869756393954607 +0.40779636218595866 +0.32365262852428495 +0.3706530395260478 +0.6191137102459842 +0.5194978794532137 +0.24563058030745483 +0.626175312158884 +0.32396256683781666 +0.5739839565518935 +0.5284204235749705 +0.5312909044467004 +0.36389183117374196 +0.2464605956066343 +0.36255692884689855 +0.7123711703156873 +0.2751115356544055 +0.4693476885444469 +0.6086562465660189 +0.7296206567911837 +0.539164685906222 +0.4556988939400416 +0.5171653657991538 +0.35983835287120886 +0.4876362794776332 +0.6215960079286554 +0.6037290051569684 +0.5247944845051506 +0.46251880155546765 +0.47585938780808573 +0.22948938920078743 +0.5422161524901976 +0.47674696602922834 +0.462252664781505 +0.48974542792852566 +0.5157172252590485 +0.4766109503971607 +0.3748874274113518 +0.33051162921453836 +0.6729153603615396 +0.5285041126988406 +0.16229422961833365 +0.5807691921228599 +0.3852226832817468 diff --git a/results/readable-format/npmis_etm-20ng.txt b/results/readable-format/npmis_etm-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..481db2acf19fee6c05936cfc9bbef286512bd41f --- /dev/null +++ b/results/readable-format/npmis_etm-20ng.txt @@ -0,0 +1,50 @@ +0.07848605495278796 +0.21328584909712833 +0.16799076962198772 +0.12183382621090537 +0.19279748866717403 +0.09666872272011014 +0.08767949691275889 +0.11020999746869907 +0.24171168500496862 +0.20521845532961316 +0.27272151636049047 +0.23137416387552698 +0.1458776787327446 +0.15639292061352442 +0.09666872272011014 +0.08791069303364062 +0.13429349867431195 +0.10610405727202535 +0.39672121139939837 +0.11177041401400996 +0.14383479078798841 +0.08767949691275888 +0.34523811142100447 +0.3999433091980417 +0.1782174324788786 +0.23780285865870993 +0.08767949691275892 +0.1467177646091656 +0.08767949691275892 +0.41888563195253214 +0.08571979438381021 +0.17344625865203767 +0.3053923176182822 +0.16360972802369766 +0.16394144519225215 +0.1175482300339774 +0.15265430531494578 +0.09517116802114771 +0.12178250895220842 +0.11908460298883951 +0.19835765148176882 +0.2603634061718126 +0.277454810526896 +0.18905821329673844 +0.20408578488080548 +0.1963516393456708 +0.37255350028093653 +0.15490547257309728 +0.06754457583744143 +0.34019381396472376 diff --git a/results/readable-format/npmis_etm-wiki.txt b/results/readable-format/npmis_etm-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..230384306eabf5e8c9d3fce142ecb77afaa300e3 --- /dev/null +++ b/results/readable-format/npmis_etm-wiki.txt @@ -0,0 +1,50 @@ +0.07969143718808 +0.08459605499894651 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.07969143718808949 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.08459605499894651 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.09079702052627525 +0.0796914371880895 +0.09540202317228184 +0.08459605499894651 +0.0796914371880895 +0.09137986189292291 +0.09079702052627522 +0.0796914371880895 +0.0796914371880895 +0.07714367183466073 +0.0796914371880895 +0.0913798618929229 +0.08769835441390111 +0.0796914371880895 +0.07969143718808949 +0.09879154828384014 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.0796914371880895 +0.09079702052627524 +0.07969143718808949 +0.0796914371880895 +0.0796914371880895 +0.09079702052627524 +0.07969143718808949 +0.0796914371880895 +0.0796914371880895 +0.09540202317228184 +0.0796914371880895 +0.0796914371880895 +0.09079702052627525 +0.0796914371880895 diff --git a/results/readable-format/npmis_mallet-20ng.txt b/results/readable-format/npmis_mallet-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..0a5ed1ff031cce8a2912da58c1fffdb31355b69a --- /dev/null +++ b/results/readable-format/npmis_mallet-20ng.txt @@ -0,0 +1,50 @@ +0.1314275058044984 +0.12868722938942015 +0.35750602263515147 +0.11305819494914734 +0.11484206083907826 +0.11176888846533804 +0.12732699467385925 +0.2287394936063482 +0.44473939676352114 +0.24127019674935196 +0.32284272065971203 +0.43142111410155054 +0.2910513037578202 +0.18014990648358223 +0.32188953772530043 +0.6603008717784691 +0.30699249515929244 +0.13326988122411804 +0.6421268827929683 +0.30043303281639544 +0.28048604272718247 +0.16676331019961838 +0.3726503665794892 +0.3765487569290983 +0.23026012345157132 +0.13333804691738643 +0.20357481024945415 +0.35850791330099435 +0.38752138070365977 +0.0799560563666676 +0.17852991901351475 +0.2742371586926216 +0.3715196209666211 +0.42261689038903927 +0.38192610751506706 +0.12258062217387942 +0.15059222629790764 +0.2023676323552062 +0.7693309879198367 +0.43051248340180165 +0.20686042222320672 +0.25629312128444554 +0.2010377483499159 +0.14900240808594048 +0.10959274106419682 +0.21311907647038422 +0.1879680802758673 +0.20926676826294122 +0.15470010994497765 +0.1706810915779308 diff --git a/results/readable-format/npmis_mallet-wiki.txt b/results/readable-format/npmis_mallet-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..8f2beb5d398908ad9f4f387e47c89a121b27d587 --- /dev/null +++ b/results/readable-format/npmis_mallet-wiki.txt @@ -0,0 +1,50 @@ +0.2689839733873824 +0.233509639067077 +0.2091672244613921 +0.24196489806173224 +0.21101726425596937 +0.14360179775585216 +0.3220178892577338 +0.2533548710417281 +0.20018029382644514 +0.39153268710299616 +0.3161962487259974 +0.1793602730550851 +0.2391022155379864 +0.2634998223646122 +0.24252844038457835 +0.2959826535218742 +0.4000450322417536 +0.264452320626896 +0.13067652448201164 +0.1558377693435571 +0.20791799494943936 +0.13918009957582778 +0.2574165964663429 +0.1738353653525663 +0.19054039886779456 +0.29871310582168886 +0.16920582648998816 +0.27540949931196845 +0.13486620395429627 +0.1523359110603592 +0.3339516490571772 +0.1759393397970967 +0.07472710712092726 +0.18055466664549447 +0.32633642438567984 +0.22545362784105932 +0.17323505371047257 +0.15773617968710743 +0.16755707256328778 +0.2572597350061184 +0.22425789198759724 +0.16858526377784225 +0.31286288749216973 +0.2652862471717779 +0.2644593428594425 +0.21055883414981424 +0.24193722515529006 +0.30243104215986205 +0.21706699924472342 +0.455008584010293 diff --git a/results/readable-format/topics_dvae-20ng.txt b/results/readable-format/topics_dvae-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..e4a4143954b6be749cd325de797b1df59a06c7c9 --- /dev/null +++ b/results/readable-format/topics_dvae-20ng.txt @@ -0,0 +1,50 @@ +['bike', 'behanna', 'syl', 'callison', 'uokmax', 'glide', 'fxwg', 'svoboda', 'cousineau', 'ranck'] +['dresden', 'window', 'xpert', 'enterpoop', 'beck', 'xserver', 'mehl', 'abpsoft', 'sigma', 'expose'] +['kth', 'nada', 'sale', 'hemul', 'tte', 'jwa', 'printer', 'sony', 'latonia', 'stereo'] +['horne', 'broward', 'feustel', 'dafco', 'ipser', 'technet', 'newsserver', 'people', 'blh', 'taxes'] +['bike', 'egreen', 'npet', 'pettefar', 'daker', 'darkman', 'ysu', 'biker', 'maidenhead', 'pbwasoh'] +['season', 'game', 'playoffs', 'puck', 'kkeller', 'goalie', 'playoff', 'team', 'lindros', 'games'] +['geb', 'dsl', 'chastity', 'gordon', 'banks', 'shameful', 'intellect', 'cadre', 'pitt', 'skepticism'] +['petch', 'cboesel', 'diablo', 'gvg', 'matusevich', 'mmatusev', 'ching', 'boesel', 'singapore', 'logistician'] +['windows', 'ini', 'louray', 'panayiotakis', 'grp', 'exe', 'truetype', 'gwu', 'dll', 'cica'] +['sandvik', 'god', 'ksand', 'cookamunga', 'jesus', 'alink', 'tourist', 'newton', 'royalroads', 'kent'] +['max', 'encryption', 'catchers', 'lopez', 'catcher', 'pl', 'snichols', 'singer', 'sherri', 'qualcom'] +['graphics', 'pov', 'tga', 'morphing', 'trent', 'tiff', 'pc', 'postscript', 'windows', 'demo'] +['scsi', 'ide', 'drive', 'bus', 'motherboard', 'isa', 'quadra', 'controller', 'simms', 'eisa'] +['crypt', 'key', 'cryptography', 'ftp', 'faq', 'lib', 'rsa', 'ripem', 'motif', 'cipher'] +['max', 'cosmo', 'benson', 'angmar', 'technische', 'muenchen', 'pl', 'pb', 'tu', 'det'] +['livesey', 'schneider', 'solntze', 'caltech', 'cco', 'wpd', 'keith', 'allan', 'yob', 'sccsi'] +['br', 'gun', 'steveh', 'hendricks', 'thor', 'guns', 'linknet', 'magpie', 'homicides', 'isc'] +['gov', 'higgins', 'risc', 'instruction', 'csie', 'tw', 'nasa', 'fnal', 'linux', 'fnalf'] +['players', 'gm', 'laurentian', 'golchowy', 'olchowy', 'nhl', 'team', 'maynard', 'ramsey', 'hockey'] +['mmatusev', 'matusevich', 'cboesel', 'diablo', 'petch', 'holonet', 'logistician', 'coyote', 'ching', 'boesel'] +['dtmedin', 'catbyte', 'medin', 'voltage', 'amp', 'megatest', 'alung', 'mayhew', 'car', 'sensor'] +['god', 'christians', 'truth', 'christianity', 'rutgers', 'christian', 'athos', 'faith', 'religion', 'arrogance'] +['informatik', 'fbihh', 'intercon', 'hamburg', 'tapped', 'bontchev', 'vesselin', 'sternlight', 'amanda', 'strnlght'] +['jesus', 'god', 'bible', 'christ', 'church', 'faith', 'law', 'dlecoint', 'scriptures', 'christians'] +['roby', 'dividian', 'sandvik', 'batf', 'fbi', 'chopin', 'survivors', 'atf', 'uxh', 'stove'] +['msg', 'dyer', 'superstition', 'spdcc', 'noring', 'sensitivity', 'foods', 'food', 'glutamate', 'candida'] +['gld', 'cunixb', 'boyle', 'car', 'cunixc', 'dare', 'cars', 'souviens', 'cactus', 'domi'] +['israel', 'israeli', 'lebanese', 'hernlem', 'israelis', 'arabs', 'arab', 'lebanon', 'hezbollah', 'palestinians'] +['sale', 'obo', 'forsale', 'diack', 'rupin', 'bitzm', 'lancs', 'phakt', 'projector', 'zhenghao'] +['clipper', 'israel', 'encryption', 'government', 'lebanese', 'chip', 'kadie', 'tuinstra', 'escrow', 'key'] +['god', 'jaeger', 'mozumder', 'buphy', 'okcforum', 'benedikt', 'islam', 'osrhe', 'rosenau', 'atheism'] +['sale', 'obo', 'forsale', 'diack', 'phakt', 'bitzm', 'baud', 'zhenghao', 'cosmo', 'dialup'] +['pitching', 'braves', 'alomar', 'hitter', 'obp', 'baseball', 'tedward', 'baerga', 'jays', 'batting'] +['diamond', 'ati', 'windows', 'mouse', 'speedstar', 'card', 'monitor', 'video', 'winmarks', 'winfax'] +['armenian', 'armenians', 'cousineau', 'egreen', 'sumgait', 'serdar', 'zuma', 'argic', 'sera', 'azerbaijani'] +['cramer', 'optilink', 'clayton', 'homosexual', 'promiscuous', 'magnus', 'clas', 'rscharfy', 'dseg', 'hennessy'] +['polygon', 'unx', 'theseus', 'homeopathy', 'merrill', 'fulk', 'sas', 'sasghm', 'wingate', 'mangoe'] +['gtoal', 'chip', 'key', 'toal', 'clipper', 'intercon', 'keyseach', 'escrow', 'amanda', 'serial'] +['dos', 'militia', 'ricardo', 'mcmains', 'qwk', 'db', 'pope', 'rchland', 'arbor', 'desonia'] +['catharines', 'winqvt', 'tdawson', 'alee', 'herringshaw', 'muohio', 'kuleuven', 'bmug', 'jobe', 'wate'] +['espn', 'kkeller', 'keller', 'upenn', 'sepinwall', 'jhunix', 'baseball', 'dwarner', 'homewood', 'hcf'] +['uic', 'gun', 'kratz', 'ifas', 'uicvm', 'guns', 'gnv', 'cops', 'glock', 'revolver'] +['lafibm', 'covington', 'rvesterm', 'decvax', 'mcovingt', 'stankowitz', 'vesterman', 'hplabs', 'lowenstein', 'koufax'] +['cpr', 'apc', 'israeli', 'elias', 'igc', 'davidsson', 'cdp', 'israel', 'iceland', 'unconventional'] +['livesey', 'morality', 'solntze', 'wpd', 'beauchaine', 'schneider', 'objective', 'bobbe', 'horus', 'alexia'] +['henry', 'zoology', 'zoo', 'utzoo', 'spencer', 'kipling', 'moonbase', 'toronto', 'sherzer', 'iti'] +['gld', 'cunixb', 'cunixc', 'souviens', 'dare', 'phds', 'espn', 'domi', 'jtchern', 'thomasp'] +['geb', 'dsl', 'chastity', 'gordon', 'shameful', 'banks', 'intellect', 'nsmca', 'cadre', 'aurora'] +['turkish', 'armenian', 'serdar', 'argic', 'armenians', 'turks', 'zuma', 'turkey', 'greek', 'sera'] +['jpl', 'kelvin', 'baalke', 'nasa', 'space', 'telos', 'higgins', 'jet', 'prb', 'spacecraft'] diff --git a/results/readable-format/topics_dvae-wiki.txt b/results/readable-format/topics_dvae-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..504a39a94ca9720b1c56b38323b51379adfa25a3 --- /dev/null +++ b/results/readable-format/topics_dvae-wiki.txt @@ -0,0 +1,50 @@ +['rower', 'hammersmith_bridge', 'rowed', 'mile_post', 'rowing', 'cambridge', 'boat_race', 'chiswick_steps', 'oxford', 'university_of_oxford'] +['spore', 'hyphae', 'basidia', 'cystidia', 'spored', 'spores', 'mycologist', 'fungus', 'hyaline', 'edibility'] +['coaster', 'railway', 'trains', 'locomotives', 'construction', 'railways', 'roller', 'coasters', 'traffic', 'terminus'] +['tissue', 'symptoms', 'cells', 'pregnancy', 'tissues', 'diagnosis', 'arterial', 'medication', 'clinical', 'therapy'] +['castle', 'gatehouse', 'nave', 'chancel', 'stonework', 'ashlar', 'buttresses', 'pilasters', 'architectural', 'storeys'] +['song', 'chart', 'mtv_news', 'certifications', 'video', 'australian_recording_industry_association', 'sal_cinquemani', 'billboard', 'lyrically', 'peaked'] +['album', 'albums', 'vocals', 'band', 'songs', 'guitar', 'rolling_stone', 'lyrics', 'recording', 'guitars'] +['constitutional', 'courts', 'supreme_court', 'judicial', 'jurisdiction', 'legislative', 'court', 'law', 'constitution', 'parliament'] +['organisms', 'mathematical', 'formula', 'impedance', 'equations', 'algebraic', 'molecular', 'mathematics', 'evolutionary', 'infinite'] +['game', 'ign', 'player', 'video', 'metacritic', 'console', 'nintendo', 'games', 'reviewers', 'usb'] +['episode', 'scully', 'mulder', 'fox_mulder', 'dana_scully', 'david_duchovny', 'gillian_anderson', 'episodes', 'simpsons', 'x-files'] +['vishnu', 'shiva', 'purana', 'inscriptions', 'dynasty', 'deity', 'iconography', 'temple', 'parvati', 'hindu'] +['novel', 'doctrines', 'book', 'fleming', 'scripture', 'theologians', 'theology', 'teachings', 'religions', 'sexuality'] +['route', 'highway', 'intersection', 'intersects', 'interchange', 'intersections', 'renumbering', 'concurrency', 'terminus', 'national_highway_system'] +['hergé', 'painting', 'paintings', 'tintin', 'comics', 'fiction', 'illustrations', 'literary', 'novel', 'adventures_of_tintin'] +['subspecies', 'breeding', 'species', 'habitat', 'breed', 'breeds', 'habitats', 'populations', 'plumage', 'underparts'] +['election', 'republicans', 'democrats', 'senate', 'republican', 'legislature', 'nomination', 'democratic', 'candidacy', 'republican_party'] +['compounds', 'hydroxide', 'reacts', 'isotopes', 'oxidation', 'bombarding', 'silvery', 'manhattan_project', 'isotope', 'fission'] +['creek', 'interchange', 'highway', 'watershed', 'freeway', 'tributaries', 'interchanges', 'tributary', 'lanes', 'susquehanna_river'] +['episode', 'storyline', 'championship', 'defeated', 'match', 'storylines', 'rematch', 'backstage', 'singles', 'scripted'] +['siege', 'battle', 'troops', 'continental_army', 'army', 'marched', 'ibn', 'byzantines', 'byzantine', 'caliph'] +['guns', 'armament', 'turrets', 'aft', 'waterline', 'boilers', 'torpedo', 'conning', 'knots', 'mounts'] +['diameter', 'star', 'latitudes', 'surface', 'velocity', 'voyager', 'equatorial', 'temperature', 'planet', 'equator'] +['squadron', 'raaf', 'bombers', 'squadrons', 'aircraft', 'jagdgeschwader', 'geschwaderkommodore', 'commander', 'royal_australian_air_force', 'sorties'] +['serbs', 'croats', 'yugoslav', 'jna', 'croatian', 'serb', 'jews', 'chetnik', 'serbia', 'chetniks'] +['episode', 'rainn_wilson', 'dwight', 'john_krasinski', 'jenna_fischer', 'alan_sepinwall', 'pam', 'jim_halpert', 'dunder', 'ed_helms'] +['aircraft', 'mint', 'coinage', 'numismatic', 'coins', 'mintage', 'obverse', 'engraver', 'philadelphia_mint', 'coin'] +['season', 'nhl', 'games', 'puck', 'tournament', 'hockey', 'canucks', 'playoffs', 'team', 'goaltender'] +['archbishop_of_canterbury', 'ecclesiastical', 'archbishop', 'bishops', 'earl', 'clergy', 'papal', 'henry_ii', 'papacy', 'heir'] +['viewers', 'glee', 'storylines', 'comedy', 'emmy', 'new_directions', 'actress', 'primetime', 'matthew_morrison', 'eastenders'] +['artillery', 'casualties', 'battalion', 'north', 'infantry', 'troops', 'brigade', 'guns', 'battalions', 'reinforcements'] +['species', 'genus', 'specimen', 'fossils', 'genera', 'specimens', 'taxonomy', 'phylogeny', 'clade', 'phylogenetic'] +['album', 'billboard', 'chart', 'song', 'albums', 'allmusic', 'vocals', 'recording_industry_association_of_america', 'certifications', 'albums_chart'] +['mph', 'northeastward', 'tropical', 'flooding', 'hurricane', 'westward', 'rainfall', 'northwestward', 'northward', 'peaking'] +['demography', 'population', 'census', 'councillors', 'wards', 'constituency', 'parish', 'households', 'borough', 'domesday'] +['tour', 'innings', 'wicket', 'england', 'runs', 'wickets', 'batsmen', 'cricket', 'bowled', 'scored'] +['gdp', 'world_bank', 'burger_king', 'capita', 'menu', 'billion', 'ceo', 'sales', 'cuisines', 'economies'] +['film', 'filming', 'rotten_tomatoes', 'grossing', 'films', 'screenplay', 'roger_ebert', 'script', 'grossed', 'blu'] +['ships', 'ship', 'frigates', 'hms', 'cruiser', 'broadside', 'wilhelmshaven', 'frigate', 'admiral', 'convoy'] +['flooding', 'precipitation', 'rainfall', 'mph', 'landslides', 'moisture', 'winds', 'damage', 'national_weather_service', 'flooded'] +['season', 'club', 'uefa', 'relegation', 'stadium', 'champions_league', 'wembley', 'football_league', 'premier_league', 'champions'] +['mph', 'tropical', 'westward', 'southeast', 'northwest', 'northeast', 'convection', 'northeastward', 'rainfall', 'northwestward'] +['season', 'hits', 'games', 'inning', 'pitcher', 'mlb', 'game', 'strikeouts', 'hitter', 'san_francisco_giants'] +['campus', 'alumni', 'faculty', 'students', 'enrollment', 'undergraduate', 'undergraduates', 'university', 'endowment', 'breed'] +['yards', 'halftime', 'game', 'season', 'yard', 'bcs', 'touchdown', 'passes', 'interception', 'intercepted'] +['boilers', 'conning', 'knots', 'guns', 'waterline', 'amidships', 'torpedo', 'tons', 'ship', 'aft'] +['cantata', 'music', 'chorale', 'cantatas', 'alfred_dürr', 'bach', 'continuo', 'bwv', 'premiere', 'musical'] +['shackleton', 'expedition', 'ernest_shackleton', 'murder', 'royal_geographical_society', 'murders', 'detectives', 'puritan', 'duchess', 'gunmen'] +['album', 'billboard', 'song', 'chart', 'certifications', 'recording_industry_association_of_america', 'riaa', 'australian_recording_industry_association', 'lyrically', 'peaked'] +['characters', 'character', 'game', 'manga', 'anime', 'ign', 'square_enix', 'final_fantasy', 'soundtrack', 'rpgfan'] diff --git a/results/readable-format/topics_etm-20ng.txt b/results/readable-format/topics_etm-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..8c05a1e9f04688a5c8b81119ebf676f0ebea4f79 --- /dev/null +++ b/results/readable-format/topics_etm-20ng.txt @@ -0,0 +1,50 @@ +['writes', 'article', 'virginia', 'news', 'stanford', 'netcom', 'read', 'jim', 'thing', 'day'] +['car', 'bike', 'dod', 'cars', 'front', 'engine', 'ride', 'road', 'speed', 'article'] +['time', 'problem', 'back', 'work', 'good', 'problems', 'long', 'years', 'ago', 'found'] +['mail', 'hp', 'mark', 'version', 'fax', 'wrote', 'newsreader', 'reply', 'phone', 'systems'] +['people', 'make', 'point', 'post', 'group', 'discussion', 'idea', 'things', 'read', 'fact'] +['university', 'posting', 'host', 'nntp', 'writes', 'ca', 'article', 'distribution', 'reply', 'cs'] +['university', 'posting', 'host', 'nntp', 'ca', 'cs', 'writes', 'article', 'distribution', 'reply'] +['access', 'ibm', 'au', 'org', 'net', 'mil', 'apr', 'digex', 'austin', 'pat'] +['drive', 'card', 'scsi', 'disk', 'video', 'dos', 'mac', 'system', 'hard', 'windows'] +['file', 'email', 'information', 'faq', 'output', 'entry', 'internet', 'list', 'program', 'send'] +['health', 'medical', 'science', 'food', 'water', 'disease', 'patients', 'insurance', 'medicine', 'study'] +['san', 'california', 'university', 'york', 'american', 'information', 'los', 'la', 'angeles', 'april'] +['time', 'good', 'back', 'work', 'problem', 'long', 'years', 'netcom', 'thing', 'make'] +['book', 'graphics', 'books', 'points', 'line', 'software', 'computer', 'reference', 'copy', 'code'] +['writes', 'article', 'posting', 'university', 'host', 'nntp', 'ca', 'distribution', 'reply', 'world'] +['state', 'ohio', 'columbia', 'cleveland', 'pitt', 'cwru', 'article', 'acs', 'reply', 'university'] +['mail', 'mark', 'hp', 'version', 'newsreader', 'systems', 'wrote', 'fax', 'tin', 'internet'] +['writes', 'article', 'university', 'posting', 'host', 'nntp', 'distribution', 'reply', 'news', 'david'] +['israel', 'turkish', 'israeli', 'jews', 'armenian', 'people', 'armenians', 'jewish', 'armenia', 'arab'] +['uk', 'ac', 'de', 'uiuc', 'andrew', 'colorado', 'university', 'cmu', 'cso', 'cs'] +['mail', 'information', 'list', 'computer', 'software', 'send', 'find', 'email', 'post', 'info'] +['posting', 'host', 'nntp', 'university', 'ca', 'cs', 'writes', 'article', 'distribution', 'reply'] +['key', 'chip', 'encryption', 'clipper', 'keys', 'government', 'system', 'security', 'public', 'law'] +['game', 'team', 'year', 'play', 'hockey', 'games', 'season', 'players', 'win', 'nhl'] +['people', 'make', 'good', 'things', 'read', 'give', 'group', 'point', 'time', 'thing'] +['ftp', 'image', 'graphics', 'pub', 'software', 'data', 'version', 'images', 'package', 'information'] +['posting', 'nntp', 'host', 'university', 'ca', 'writes', 'cs', 'article', 'reply', 'distribution'] +['time', 'back', 'good', 'problem', 'work', 'netcom', 'years', 'long', 'ago', 'problems'] +['posting', 'university', 'host', 'nntp', 'writes', 'article', 'ca', 'distribution', 'reply', 'cs'] +['god', 'jesus', 'people', 'christian', 'bible', 'christians', 'life', 'church', 'faith', 'christ'] +['host', 'posting', 'nntp', 'university', 'ca', 'cs', 'writes', 'article', 'distribution', 'reply'] +['ground', 'power', 'wire', 'hot', 'work', 'current', 'high', 'run', 'electrical', 'temperature'] +['people', 'fbi', 'fire', 'children', 'koresh', 'started', 'time', 'told', 'batf', 'happened'] +['time', 'good', 'back', 'problem', 'long', 'work', 'years', 'make', 'real', 'problems'] +['time', 'back', 'problem', 'work', 'good', 'long', 'years', 'left', 'problems', 'hit'] +['hp', 'mail', 'phone', 'mark', 'newsreader', 'tin', 'fax', 'corporation', 'version', 'usa'] +['people', 'make', 'white', 'drugs', 'black', 'things', 'news', 'money', 'good', 'country'] +['writes', 'article', 'university', 'distribution', 'posting', 'virginia', 'nntp', 'news', 'host', 'reply'] +['writes', 'article', 'good', 'make', 'read', 'thing', 'people', 'time', 'give', 'netcom'] +['good', 'time', 'netcom', 'writes', 'article', 'back', 'work', 'find', 'thing', 'make'] +['max', 'ah', 'tm', 'mr', 'air', 'ma', 'cs', 'mi', 'si', 'sp'] +['people', 'morality', 'system', 'caltech', 'keith', 'writes', 'objective', 'moral', 'sgi', 'argument'] +['windows', 'window', 'file', 'files', 'server', 'program', 'display', 'screen', 'mit', 'application'] +['people', 'make', 'time', 'things', 'good', 'point', 'find', 'question', 'case', 'made'] +['president', 'mr', 'clinton', 'people', 'tax', 'jobs', 'government', 'money', 'program', 'time'] +['sale', 'price', 'shipping', 'offer', 'printer', 'audio', 'condition', 'cover', 'sell', 'input'] +['space', 'nasa', 'gov', 'earth', 'moon', 'henry', 'toronto', 'orbit', 'launch', 'jpl'] +['good', 'writes', 'article', 'people', 'make', 'thing', 'give', 'time', 'great', 'things'] +['article', 'writes', 'university', 'news', 'jim', 'posting', 'distribution', 'question', 'host', 'virginia'] +['gun', 'people', 'guns', 'law', 'government', 'state', 'firearms', 'crime', 'police', 'weapons'] diff --git a/results/readable-format/topics_etm-wiki.txt b/results/readable-format/topics_etm-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..1af9f596ae1cc0af7df33dd2fbca89ec499ab25a --- /dev/null +++ b/results/readable-format/topics_etm-wiki.txt @@ -0,0 +1,50 @@ +['nameplate', 'prohibit', 'kpa', 'plodding', 'incubating', 'general_electric', 'heal', 'intermingled', 'cupola', 'panama_city'] +['nameplate', 'prohibit', 'intermingled', 'incubating', 'kpa', 'cupola', 'plodding', 'general_electric', 'ito', 'accruing'] +['nameplate', 'incubating', 'prohibit', 'general_electric', 'plodding', 'intermingled', 'kpa', 'panama_city', 'cupola', 'heal'] +['prohibit', 'incubating', 'nameplate', 'intermingled', 'plodding', 'general_electric', 'kpa', 'ito', 'panama_city', 'cupola'] +['prohibit', 'nameplate', 'intermingled', 'incubating', 'kpa', 'general_electric', 'bristle', 'cupola', 'ito', 'heal'] +['nameplate', 'prohibit', 'intermingled', 'incubating', 'cupola', 'plodding', 'accruing', 'heal', 'kpa', 'panama_city'] +['nameplate', 'prohibit', 'intermingled', 'incubating', 'general_electric', 'kpa', 'plodding', 'cupola', 'decimated', 'ito'] +['prohibit', 'nameplate', 'incubating', 'intermingled', 'plodding', 'general_electric', 'kpa', 'panama_city', 'heal', 'evelyn_waugh'] +['prohibit', 'incubating', 'nameplate', 'intermingled', 'plodding', 'general_electric', 'cupola', 'kpa', 'accruing', 'decimated'] +['nameplate', 'incubating', 'prohibit', 'plodding', 'general_electric', 'kpa', 'intermingled', 'cupola', 'ito', 'panama_city'] +['nameplate', 'prohibit', 'incubating', 'intermingled', 'ito', 'cupola', 'plodding', 'kpa', 'general_electric', 'heal'] +['nameplate', 'intermingled', 'incubating', 'prohibit', 'general_electric', 'plodding', 'cupola', 'kpa', 'bristle', 'decimated'] +['prohibit', 'nameplate', 'incubating', 'general_electric', 'plodding', 'cupola', 'intermingled', 'heal', 'kpa', 'panama_city'] +['prohibit', 'nameplate', 'intermingled', 'kpa', 'incubating', 'cupola', 'general_electric', 'plodding', 'panama_city', 'heal'] +['prohibit', 'nameplate', 'intermingled', 'incubating', 'cupola', 'bristle', 'general_electric', 'ito', 'decimated', 'heal'] +['nameplate', 'prohibit', 'incubating', 'plodding', 'intermingled', 'general_electric', 'cupola', 'kpa', 'ito', 'accruing'] +['incubating', 'prohibit', 'nameplate', 'intermingled', 'plodding', 'general_electric', 'kpa', 'decimated', 'accruing', 'cupola'] +['prohibit', 'incubating', 'nameplate', 'plodding', 'cupola', 'general_electric', 'intermingled', 'kpa', 'vida', 'heal'] +['prohibit', 'incubating', 'nameplate', 'intermingled', 'general_electric', 'kpa', 'ito', 'plodding', 'cupola', 'panama_city'] +['incubating', 'nameplate', 'prohibit', 'intermingled', 'plodding', 'kpa', 'heal', 'cupola', 'general_electric', 'panama_city'] +['nameplate', 'prohibit', 'incubating', 'intermingled', 'cupola', 'plodding', 'kpa', 'heal', 'general_electric', 'panama_city'] +['nameplate', 'incubating', 'prohibit', 'plodding', 'general_electric', 'intermingled', 'cupola', 'panama_city', 'decimated', 'kpa'] +['nameplate', 'prohibit', 'general_electric', 'intermingled', 'kpa', 'incubating', 'plodding', 'decimated', 'cupola', 'tutorials'] +['nameplate', 'general_electric', 'prohibit', 'incubating', 'cupola', 'intermingled', 'plodding', 'panama_city', 'kpa', 'decimated'] +['nameplate', 'prohibit', 'incubating', 'intermingled', 'general_electric', 'cupola', 'kpa', 'plodding', 'panama_city', 'ito'] +['prohibit', 'plodding', 'nameplate', 'incubating', 'intermingled', 'accruing', 'cupola', 'bristle', 'kpa', 'ito'] +['incubating', 'nameplate', 'prohibit', 'cupola', 'intermingled', 'general_electric', 'plodding', 'decimated', 'kpa', 'heal'] +['prohibit', 'nameplate', 'incubating', 'cupola', 'plodding', 'heal', 'general_electric', 'kpa', 'intermingled', 'decimated'] +['prohibit', 'nameplate', 'incubating', 'intermingled', 'general_electric', 'plodding', 'panama_city', 'kpa', 'heal', 'accruing'] +['prohibit', 'incubating', 'nameplate', 'general_electric', 'intermingled', 'kpa', 'cupola', 'panama_city', 'plodding', 'ito'] +['nameplate', 'prohibit', 'intermingled', 'incubating', 'decimated', 'kpa', 'plodding', 'general_electric', 'cupola', 'evelyn_waugh'] +['nameplate', 'intermingled', 'prohibit', 'incubating', 'kpa', 'ito', 'heal', 'plodding', 'cupola', 'general_electric'] +['prohibit', 'incubating', 'nameplate', 'plodding', 'general_electric', 'accruing', 'kpa', 'panama_city', 'intermingled', 'cupola'] +['prohibit', 'nameplate', 'incubating', 'intermingled', 'plodding', 'cupola', 'kpa', 'general_electric', 'decimated', 'evelyn_waugh'] +['prohibit', 'incubating', 'nameplate', 'intermingled', 'cupola', 'plodding', 'general_electric', 'kpa', 'heal', 'panama_city'] +['prohibit', 'incubating', 'nameplate', 'intermingled', 'plodding', 'general_electric', 'ito', 'cupola', 'panama_city', 'heal'] +['nameplate', 'prohibit', 'incubating', 'intermingled', 'general_electric', 'panama_city', 'plodding', 'decimated', 'kpa', 'cupola'] +['prohibit', 'intermingled', 'incubating', 'nameplate', 'cupola', 'general_electric', 'plodding', 'heal', 'kpa', 'ito'] +['nameplate', 'prohibit', 'general_electric', 'incubating', 'plodding', 'bristle', 'panama_city', 'intermingled', 'decimated', 'cupola'] +['nameplate', 'prohibit', 'incubating', 'plodding', 'general_electric', 'heal', 'intermingled', 'kpa', 'panama_city', 'evelyn_waugh'] +['prohibit', 'nameplate', 'incubating', 'general_electric', 'intermingled', 'plodding', 'kpa', 'cupola', 'accruing', 'ito'] +['nameplate', 'prohibit', 'kpa', 'intermingled', 'incubating', 'decimated', 'cupola', 'general_electric', 'plodding', 'panama_city'] +['nameplate', 'prohibit', 'incubating', 'general_electric', 'heal', 'intermingled', 'kpa', 'cupola', 'panama_city', 'plodding'] +['prohibit', 'nameplate', 'incubating', 'general_electric', 'kpa', 'intermingled', 'cupola', 'plodding', 'ito', 'panama_city'] +['nameplate', 'prohibit', 'incubating', 'plodding', 'intermingled', 'general_electric', 'decimated', 'heal', 'ito', 'cupola'] +['incubating', 'prohibit', 'nameplate', 'general_electric', 'kpa', 'intermingled', 'plodding', 'cupola', 'accruing', 'panama_city'] +['prohibit', 'nameplate', 'incubating', 'intermingled', 'cupola', 'bristle', 'plodding', 'general_electric', 'panama_city', 'heal'] +['prohibit', 'nameplate', 'incubating', 'general_electric', 'intermingled', 'plodding', 'kpa', 'ito', 'heal', 'cupola'] +['prohibit', 'incubating', 'nameplate', 'cupola', 'general_electric', 'panama_city', 'plodding', 'intermingled', 'heal', 'kpa'] +['prohibit', 'incubating', 'nameplate', 'kpa', 'intermingled', 'general_electric', 'cupola', 'plodding', 'panama_city', 'heal'] diff --git a/results/readable-format/topics_mallet-20ng.txt b/results/readable-format/topics_mallet-20ng.txt new file mode 100644 index 0000000000000000000000000000000000000000..5fc2ff1ca53a6896f29123bb43f55846c2c11534 --- /dev/null +++ b/results/readable-format/topics_mallet-20ng.txt @@ -0,0 +1,50 @@ +['mr', 'president', 'navy', 'mil', 'ms', 'myers', 'package', 'press', 'groups', 'stratus'] +['gov', 'nasa', 'writes', 'article', 'posting', 'nntp', 'harvard', 'host', 'ncr', 'mark'] +['key', 'keys', 'chip', 'des', 'bit', 'encryption', 'security', 'number', 'public', 'pgp'] +['article', 'toronto', 'henry', 'writes', 'water', 'purdue', 'oil', 'alaska', 'ecn', 'air'] +['db', 'fi', 'uchicago', 'mit', 'se', 'cs', 'midway', 'university', 'ericsson', 'host'] +['netcom', 'writes', 'article', 'services', 'nec', 'guest', 'newsreader', 'chris', 'tin', 'line'] +['sale', 'university', 'host', 'posting', 'distribution', 'nntp', 'price', 'state', 'mail', 'offer'] +['cc', 'columbia', 'insurance', 'gary', 'writes', 'car', 'gld', 'cunixb', 'article', 'health'] +['team', 'hockey', 'game', 'season', 'nhl', 'games', 'play', 'players', 'year', 'teams'] +['cwru', 'cleveland', 'freenet', 'msg', 'ti', 'reserve', 'host', 'ins', 'nntp', 'university'] +['law', 'state', 'rights', 'government', 'laws', 'court', 'states', 'amendment', 'people', 'constitution'] +['encryption', 'clipper', 'chip', 'government', 'key', 'escrow', 'technology', 'privacy', 'law', 'keys'] +['jesus', 'god', 'bible', 'sandvik', 'christian', 'john', 'christ', 'people', 'kent', 'apple'] +['power', 'high', 'audio', 'output', 'good', 'input', 'low', 'data', 'tv', 'circuit'] +['fbi', 'writes', 'batf', 'fire', 'koresh', 'article', 'waco', 'stratus', 'atf', 'udel'] +['turkish', 'armenian', 'armenians', 'armenia', 'turkey', 'turks', 'people', 'serdar', 'argic', 'greek'] +['window', 'motif', 'server', 'mit', 'widget', 'application', 'display', 'set', 'xterm', 'problem'] +['posting', 'nntp', 'host', 'berkeley', 'san', 'university', 'california', 'article', 'writes', 'ca'] +['cx', 'ah', 'lk', 'mv', 'uw', 'hz', 'ck', 'mw', 'pl', 'mc'] +['card', 'video', 'monitor', 'drivers', 'windows', 'vga', 'pc', 'driver', 'mouse', 'port'] +['image', 'graphics', 'software', 'ftp', 'version', 'files', 'file', 'pub', 'program', 'data'] +['university', 'book', 'information', 'center', 'research', 'books', 'conference', 'dr', 'points', 'art'] +['file', 'entry', 'output', 'program', 'char', 'int', 'printf', 'entries', 'section', 'null'] +['drive', 'scsi', 'disk', 'drives', 'hard', 'ide', 'controller', 'floppy', 'bus', 'system'] +['article', 'writes', 'cramer', 'people', 'gay', 'sex', 'virginia', 'optilink', 'state', 'men'] +['hp', 'indiana', 'posting', 'host', 'nntp', 'article', 'hewlett', 'packard', 'writes', 'newsreader'] +['uiuc', 'cso', 'writes', 'article', 'university', 'illinois', 'frank', 'urbana', 'news', 'duke'] +['pitt', 'gordon', 'medical', 'banks', 'health', 'geb', 'disease', 'article', 'cs', 'patients'] +['god', 'christians', 'christian', 'church', 'faith', 'jesus', 'bible', 'religion', 'people', 'hell'] +['ibm', 'article', 'car', 'writes', 'org', 'austin', 'cars', 'posting', 'gatech', 'nntp'] +['people', 'time', 'make', 'good', 'point', 'things', 'question', 'find', 'problem', 'thing'] +['windows', 'dos', 'file', 'files', 'ms', 'program', 'os', 'system', 'run', 'win'] +['israel', 'israeli', 'writes', 'jews', 'article', 'arab', 'war', 'lebanese', 'arabs', 'peace'] +['space', 'nasa', 'launch', 'orbit', 'earth', 'shuttle', 'satellite', 'moon', 'lunar', 'data'] +['gun', 'guns', 'control', 'firearms', 'crime', 'weapons', 'police', 'article', 'handgun', 'criminals'] +['cs', 'university', 'nntp', 'posting', 'host', 'au', 'cmu', 'writes', 'andrew', 'article'] +['uk', 'ac', 'de', 'university', 'uni', 'writes', 'host', 'posting', 'nntp', 'informatik'] +['ca', 'ground', 'wire', 'radar', 'bnr', 'wiring', 'cable', 'canada', 'neutral', 'detector'] +['max', 'pl', 'giz', 'bhj', 'wm', 'bxn', 'sl', 'gk', 'qq', 'ax'] +['jews', 'israeli', 'israel', 'anti', 'nazi', 'jewish', 'arab', 'jake', 'cpr', 'center'] +['government', 'money', 'year', 'program', 'people', 'national', 'years', 'american', 'tax', 'states'] +['mail', 'list', 'information', 'internet', 'send', 'email', 'faq', 'anonymous', 'address', 'ftp'] +['la', 'period', 'pts', 'ohio', 'van', 'acs', 'upenn', 'magnus', 'bos', 'chi'] +['bike', 'dod', 'car', 'article', 'writes', 'sun', 'ride', 'good', 'road', 'speed'] +['ca', 'university', 'article', 'writes', 'cs', 'colorado', 'posting', 'host', 'stanford', 'nntp'] +['isc', 'islam', 'writes', 'article', 'islamic', 'br', 'bu', 'rit', 'muslim', 'absolute'] +['people', 'time', 'back', 'left', 'told', 'home', 'day', 'started', 'night', 'years'] +['year', 'baseball', 'game', 'team', 'article', 'writes', 'good', 'games', 'runs', 'players'] +['apple', 'mac', 'simms', 'bit', 'quadra', 'writes', 'article', 'memory', 'speed', 'keyboard'] +['access', 'caltech', 'keith', 'writes', 'digex', 'sgi', 'posting', 'host', 'nntp', 'system'] diff --git a/results/readable-format/topics_mallet-wiki.txt b/results/readable-format/topics_mallet-wiki.txt new file mode 100644 index 0000000000000000000000000000000000000000..18428dc7aa654b4ef311f9f6212f67489ba3f34d --- /dev/null +++ b/results/readable-format/topics_mallet-wiki.txt @@ -0,0 +1,50 @@ +['film', 'films', 'production', 'role', 'million', 'released', 'release', 'movie', 'director', 'best'] +['season', 'game', 'games', 'home', 'baseball', 'team', 'league', 'runs', 'series', 'hit'] +['disease', 'people', 'risk', 'blood', 'treatment', 'use', 'symptoms', 'medical', 'cause', 'children'] +['game', 'season', 'team', 'points', 'games', 'yards', 'yard', 'second', 'football', 'coach'] +['character', 'story', 'said', 'like', 'love', 'characters', 'father', 'later', 'life', 'man'] +['said', 'police', 'people', 'later', 'found', 'death', 'prison', 'trial', 'day', 'case'] +['season', 'club', 'team', 'cup', 'league', 'match', 'goal', 'goals', 'scored', 'final'] +['music', 'musical', 'opera', 'works', 'composer', 'play', 'performance', 'theatre', 'work', 'piano'] +['world', 'games', 'won', 'team', 'canada', 'olympics', 'time', 'tournament', 'olympic', 'event'] +['song', 'album', 'number', 'music', 'video', 'single', 'chart', 'released', 'track', 'songs'] +['route', 'highway', 'road', 'state', 'north', 'east', 'south', 'west', 'street', 'interchange'] +['water', 'area', 'river', 'miles', 'park', 'land', 'years', 'lake', 'feet', 'near'] +['law', 'court', 'act', 'case', 'state', 'rights', 'government', 'legal', 'right', 'public'] +['ship', 'british', 'ships', 'french', 'captain', 'island', 'fleet', 'crew', 'expedition', 'sea'] +['war', 'german', 'government', 'military', 'soviet', 'polish', 'french', 'russian', 'germany', 'political'] +['game', 'player', 'games', 'released', 'series', 'players', 'video', 'characters', 'release', 'version'] +['storm', 'tropical', 'hurricane', 'winds', 'damage', 'mph', 'cyclone', 'depression', 'september', 'system'] +['king', 'england', 'royal', 'english', 'scotland', 'henry', 'son', 'queen', 'death', 'duke'] +['design', 'system', 'use', 'engine', 'power', 'new', 'speed', 'production', 'model', 'development'] +['chinese', 'government', 'china', 'century', 'spanish', 'country', 'people', 'population', 'british', 'world'] +['century', 'built', 'castle', 'church', 'building', 'house', 'site', 'town', 'stone', 'south'] +['formula', 'nuclear', 'number', 'element', 'known', 'metal', 'form', 'elements', 'energy', 'high'] +['aircraft', 'air', 'japanese', 'war', 'flight', 'squadron', 'training', 'wing', 'flying', 'operations'] +['station', 'line', 'railway', 'bridge', 'service', 'new', 'trains', 'london', 'built', 'opened'] +['building', 'park', 'city', 'new', 'construction', 'street', 'feet', 'built', 'site', 'opened'] +['forces', 'battle', 'troops', 'division', 'attack', 'north', 'war', 'battalion', 'men', 'army'] +['later', 'years', 'life', 'family', 'time', 'year', 'father', 'born', 'work', 'death'] +['episode', 'season', 'series', 'homer', 'television', 'episodes', 'said', 'simpsons', 'bart', 'time'] +['race', 'second', 'stage', 'lap', 'team', 'time', 'car', 'lead', 'points', 'won'] +['million', 'company', 'new', 'year', 'market', 'business', 'billion', 'sold', 'announced', 'years'] +['cells', 'cell', 'dna', 'protein', 'proteins', 'called', 'acid', 'species', 'organisms', 'different'] +['american', 'war', 'united_states', 'washington', 'virginia', 'new_york', 'men', 'general', 'governor', 'americans'] +['madonna', 'carey', 'oxford', 'race', 'cambridge', 'gaga', 'christmas', 'boat', 'time', 'crew'] +['book', 'work', 'published', 'wrote', 'writing', 'world', 'works', 'written', 'history', 'life'] +['ship', 'ships', 'guns', 'war', 'class', 'german', 'tons', 'gun', 'long', 'fleet'] +['god', 'india', 'temple', 'church', 'christian', 'indian', 'religious', 'century', 'jesus', 'form'] +['species', 'found', 'known', 'long', 'shark', 'large', 'like', 'small', 'fish', 'teeth'] +['women', 'children', 'black', 'american', 'people', 'white', 'sex', 'gay', 'african', 'men'] +['art', 'painting', 'work', 'flag', 'century', 'white', 'works', 'red', 'paintings', 'artist'] +['army', 'emperor', 'city', 'battle', 'greek', 'roman', 'war', 'byzantine', 'empire', 'military'] +['city', 'school', 'students', 'university', 'schools', 'college', 'town', 'education', 'population', 'year'] +['earth', 'star', 'planet', 'sun', 'stars', 'mass', 'surface', 'solar', 'years', 'system'] +['species', 'birds', 'breeding', 'white', 'horses', 'bird', 'breed', 'black', 'males', 'male'] +['species', 'fruit', 'cap', 'brown', 'found', 'known', 'plants', 'plant', 'white', 'genus'] +['episode', 'series', 'season', 'episodes', 'television', 'doctor', 'mulder', 'time', 'broadcast', 'character'] +['series', 'book', 'story', 'bond', 'novel', 'published', 'stories', 'issue', 'comic', 'batman'] +['match', 'championship', 'team', 'event', 'title', 'wrestling', 'defeated', 'ring', 'episode', 'wwe'] +['president', 'election', 'state', 'government', 'party', 'political', 'campaign', 'governor', 'elected', 'vote'] +['australia', 'test', 'match', 'england', 'australian', 'runs', 'innings', 'team', 'cricket', 'scored'] +['album', 'band', 'music', 'songs', 'released', 'song', 'rock', 'guitar', 'record', 'recording']