122 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			122 lines
		
	
	
		
			3.9 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| 
								 | 
							
								%%%%
							 | 
						||
| 
								 | 
							
								%%%%  Probabilistic DCG for Charniak's example --- pdcg_c.psm
							 | 
						||
| 
								 | 
							
								%%%%
							 | 
						||
| 
								 | 
							
								%%%%  Copyright (C) 2007,2008
							 | 
						||
| 
								 | 
							
								%%%%    Sato Laboratory, Dept. of Computer Science,
							 | 
						||
| 
								 | 
							
								%%%%    Tokyo Institute of Technology
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%% As described in the comments in pdcg.psm, PCFGs (probabilistic context-
							 | 
						||
| 
								 | 
							
								%% free grammars) are a stochastic extension of CFG grammar such that in a
							 | 
						||
| 
								 | 
							
								%% (leftmost) derivation, each production rule is selected probabilistically
							 | 
						||
| 
								 | 
							
								%% and applied. This program presents an implementation of an example from
							 | 
						||
| 
								 | 
							
								%% Charniak's textbook (Statistical Language Learning, The MIT Press, 1993):
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%    s --> np vp        (0.8)  |  verb --> swat  (0.2)
							 | 
						||
| 
								 | 
							
								%%    s --> vp           (0.2)  |  verb --> flies (0.4)
							 | 
						||
| 
								 | 
							
								%%   np --> noun         (0.4)  |  verb --> like  (0.4)
							 | 
						||
| 
								 | 
							
								%%   np --> noun pp      (0.4)  |  noun --> swat  (0.05) 
							 | 
						||
| 
								 | 
							
								%%   np --> noun np      (0.2)  |  noun --> flies (0.45) 
							 | 
						||
| 
								 | 
							
								%%   vp --> verb         (0.3)  |  noun --> ants  (0.5)
							 | 
						||
| 
								 | 
							
								%%   vp --> verb np      (0.3)  |  prep --> like  (1.0)
							 | 
						||
| 
								 | 
							
								%%   vp --> verb pp      (0.2)  |
							 | 
						||
| 
								 | 
							
								%%   vp --> verb np pp   (0.2)  |
							 | 
						||
| 
								 | 
							
								%%   pp --> prep np      (1.0)  |
							 | 
						||
| 
								 | 
							
								%%   (`s' is the start symbol)
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%% This program has a grammar-independent part (pcfg/1-2 and proj/2),
							 | 
						||
| 
								 | 
							
								%% which can work with any underlying CFG which has no epsilon rules
							 | 
						||
| 
								 | 
							
								%% and produces no unit cycles.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%----------------------------------
							 | 
						||
| 
								 | 
							
								%%  Quick start:
							 | 
						||
| 
								 | 
							
								%%  
							 | 
						||
| 
								 | 
							
								%%  ?- prism(pdcg_c).
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- prob(pcfg([swat,flies,like,ants])).
							 | 
						||
| 
								 | 
							
								%%         % get the generative probability of a sentence
							 | 
						||
| 
								 | 
							
								%%         % "swat flies like ants"
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- sample(pcfg(_X)),viterbif(pcfg(_X)).
							 | 
						||
| 
								 | 
							
								%%         % parse a sampled sentence
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- get_samples(50,pcfg(X),_Gs),learn(_Gs),show_sw. 
							 | 
						||
| 
								 | 
							
								%%         % conduct an artificial learning experiments
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- viterbif(pcfg([swat,flies,like,ants])).
							 | 
						||
| 
								 | 
							
								%%         % get the most probabile parse for "swat flies like ants"
							 | 
						||
| 
								 | 
							
								%% 
							 | 
						||
| 
								 | 
							
								%%  ?- n_viterbif(3,pcfg([swat,flies,like,ants])).
							 | 
						||
| 
								 | 
							
								%%         % get top 3 ranked parses for "swat flies like ants"
							 | 
						||
| 
								 | 
							
								%% 
							 | 
						||
| 
								 | 
							
								%%  ?- viterbit(pcfg([swat,flies,like,ants])).
							 | 
						||
| 
								 | 
							
								%%         % print the most probabile parse for "swat flies like ants" in
							 | 
						||
| 
								 | 
							
								%%         % a tree form.
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- viterbit(pcfg([swat,flies,like,ants]),P,E), build_tree(E,T).
							 | 
						||
| 
								 | 
							
								%%         % get the most probabile parse for "swat flies like ants" in a
							 | 
						||
| 
								 | 
							
								%%         % tree form, and convert it to a more readable Prolog term.
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								%%  ?- probfi(pcfg([swat,flies,like,ants])).
							 | 
						||
| 
								 | 
							
								%%         % print the parse forest with inside probabilities
							 | 
						||
| 
								 | 
							
								%%
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%----------------------------------
							 | 
						||
| 
								 | 
							
								%%  Declarations:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								values(s,[[np,vp],[vp]]).
							 | 
						||
| 
								 | 
							
								values(np,[[noun],[noun,pp],[noun,np]]).
							 | 
						||
| 
								 | 
							
								values(vp,[[verb],[verb,np],[verb,pp],[verb,np,pp]]).
							 | 
						||
| 
								 | 
							
								values(pp,[[prep,np]]).
							 | 
						||
| 
								 | 
							
								values(verb,[[swat],[flies],[like]]).
							 | 
						||
| 
								 | 
							
								values(noun,[[swat],[flies],[ants]]).
							 | 
						||
| 
								 | 
							
								values(prep,[[like]]).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								:- p_not_table proj/2. % This declaration is introduced just for
							 | 
						||
| 
								 | 
							
								                       % making the results of probabilistic inferences
							 | 
						||
| 
								 | 
							
								                       % simple and readable. 
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%----------------------------------
							 | 
						||
| 
								 | 
							
								%%  Modeling part:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								pcfg(L):- pcfg(s,L-[]).
							 | 
						||
| 
								 | 
							
								pcfg(LHS,L0-L1):-
							 | 
						||
| 
								 | 
							
								  ( nonterminal(LHS) -> msw(LHS,RHS),proj(RHS,L0-L1)
							 | 
						||
| 
								 | 
							
								  ; L0 = [LHS|L1]
							 | 
						||
| 
								 | 
							
								  ).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								proj([],L-L).
							 | 
						||
| 
								 | 
							
								proj([X|Xs],L0-L1):-
							 | 
						||
| 
								 | 
							
								  pcfg(X,L0-L2),proj(Xs,L2-L1).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								nonterminal(s).
							 | 
						||
| 
								 | 
							
								nonterminal(np).
							 | 
						||
| 
								 | 
							
								nonterminal(vp).
							 | 
						||
| 
								 | 
							
								nonterminal(pp).
							 | 
						||
| 
								 | 
							
								nonterminal(verb).
							 | 
						||
| 
								 | 
							
								nonterminal(noun).
							 | 
						||
| 
								 | 
							
								nonterminal(prep).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								%%----------------------------------
							 | 
						||
| 
								 | 
							
								%%  Utility part:
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								% set the rule probabilities:
							 | 
						||
| 
								 | 
							
								:- set_sw(s,[0.8,0.2]).
							 | 
						||
| 
								 | 
							
								:- set_sw(np,[0.4,0.4,0.2]).
							 | 
						||
| 
								 | 
							
								:- set_sw(vp,[0.3,0.3,0.2,0.2]).
							 | 
						||
| 
								 | 
							
								:- set_sw(pp,[1.0]).
							 | 
						||
| 
								 | 
							
								:- set_sw(verb,[0.2,0.4,0.4]).
							 | 
						||
| 
								 | 
							
								:- set_sw(noun,[0.05,0.45,0.5]).
							 | 
						||
| 
								 | 
							
								:- set_sw(prep,[1.0]).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								% build_tree(E,T):-
							 | 
						||
| 
								 | 
							
								%    Build a parse tree T from a tree-formed explanation E.
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								build_tree([],[]).
							 | 
						||
| 
								 | 
							
								build_tree([pcfg(_),Gs],T) :- build_tree(Gs,T).
							 | 
						||
| 
								 | 
							
								build_tree([pcfg(Sym,_)|Gs],T) :- build_tree1(Gs,T0),T=..[Sym|T0].
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								build_tree1([],[]).
							 | 
						||
| 
								 | 
							
								build_tree1([pcfg(Sym,_)|Gs],[Sym|T]) :- !,build_tree1(Gs,T).
							 | 
						||
| 
								 | 
							
								build_tree1([msw(_,_)|Gs],T) :- !, build_tree1(Gs,T).
							 | 
						||
| 
								 | 
							
								build_tree1([G|Gs],[T0|T]) :- build_tree(G,T0),!,build_tree1(Gs,T).
							 |