2012-10-28 18:22:09 +00:00 
										
									 
								 
							 
							
								
									
										 
								
							 
							
								 
							
							
								/ *  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								import  nltk  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								sentence  =  "" " At  eight  o ' clock  on  Thursday  morning  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								...  Arthur  didn ' t  feel  very  good . "" "  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								tokens  =  nltk . word_tokenize ( sentence )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								tagged  =  nltk . pos_tag ( tokens )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								tagged [ 0 : 6 ]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								entities  =  nltk . chunk . ne_chunk ( tagged )  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								entities  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								* /  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
									
										
										
										
											2012-10-26 00:24:07 +01:00 
										
									 
								 
							 
							
								
							 
							
								 
							
							
								: -  use_module ( library ( python ) ) .  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								: -  use_module ( library ( maplist ) ) .  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								main  : -  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									main ( Sentence ,  Tokens ,  Tagged ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( Sentence ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( tokens = Tokens ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( tagged = Tagged ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									fail . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								main  : -  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Sentence  =  'Debutta a New York il nuovo sistema operativo (cronaca in diretta). E c\'è il tablet Surface. Svolta radicale che strizza l\'occhio al mondo touch per l\'azienda che controlla il 92% dei pc dal nostro inviato M. Serafini' , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    %  c  =  nltk . stem . snowball . ItalianStemmer ( " italian ' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								        $ c  : =  nltk:stem:snowball: 'ItalianStemmer' ( italian ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Tokens  : =  nltk:word_tokenize ( Sentence ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( tokens = Tokens ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    %  o  =  c . stem ( 'voglio' ) 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									maplist ( process ,  Tokens ,  Stems ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( stems = Stems ) . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								process ( In ,  Out )  : -  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Out  : =  $ c:stem ( In ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									writeln ( In: = Out ) . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								process ( In ,  In ) .  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								    
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								main ( Sentence ,  Tokens ,  Tagged )  : -   
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Sentence  =  ' \ " At  eight  o \ ' clock  on  Thursday  morning \ 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								...  Arthur  didn \ 't feel very good.\"' ,  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Tokens  : =  nltk:word_tokenize ( Sentence ) , 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% [ ' At ', ' eight ', "o' clock " ,  'on' ,  'Thursday' ,  'morning' ,  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								%  ' Arthur ', ' did ', "n' t " ,  'feel' ,  'very' ,  'good' ,  '.' ]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
									Tagged  : =  nltk:pos_tag ( Tokens ) . 
							 
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% > > >  tagged [ 0 : 6 ]  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% [ ( ' At ', ' IN '), (' eight ', ' CD '), ("o' clock " ,  'JJ' ) ,  ( 'on' ,  'IN' ) ,  
						 
					
						
							
								
							 
							
								
							 
							
								 
							
							
								% ( ' Thursday ', ' NNP '), (' morning ', ' NN ' ) ]