208 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
		
		
			
		
	
	
			208 lines
		
	
	
		
			4.4 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
|   | 
 | ||
|  | 
 | ||
|  | 
 | ||
|  | :- ensure_loaded(library(clpbn)). | ||
|  | 
 | ||
|  | :- ensure_loaded(library('clpbn/hmm')). | ||
|  | 
 | ||
|  | :- hmm_state((m/3, i/3, d/3, t/2, b/2, n/2, j/2, e/2, s/2, c/2)). | ||
|  | 
 | ||
|  | /* | ||
|  | 
 | ||
|  | We represent a plan7 HMMer as a recursive program. There are two parameters: | ||
|  |  i represents position on a string | ||
|  |  j slice in the HMMer: probability distributions are different for each slice. | ||
|  | 
 | ||
|  | An HMM has 10 states (M, I, D are the core states): | ||
|  | 	S -> begin | ||
|  | 	N -> before match | ||
|  | 	B -> begin a match | ||
|  | 	M -> match state | ||
|  | 	I -> insertion | ||
|  | 	D -> deletion | ||
|  | 	E -> end of match | ||
|  | 	C -> continuation after matches done | ||
|  | 	T -> end of sequence | ||
|  | 	J -> go back to match start.  | ||
|  | 
 | ||
|  | S, B, E, and T do not emit. | ||
|  | 
 | ||
|  | Each state will be represented as a binary random variable. | ||
|  | 
 | ||
|  | Also, you'll see terms of the form | ||
|  | 
 | ||
|  | 	{ S = m(I) with p([t,f], trans([MMCPT,IMCPT,DMCPT]), [M0,I0,D0]) }. | ||
|  | 
 | ||
|  | the sum function is as examplified: | ||
|  | 
 | ||
|  | 	P(S=t) = P(MMCPT|M0)P(M0=t)+P(IMCPT|M0)P(I0=t)+P(IDCPT|M0)P(D0=t) | ||
|  | 	P(S=f) = 1-P(S=t) | ||
|  | 
 | ||
|  | With sum a single element may be true so if | ||
|  | 	k1\=k2, P(A_k1=t,A_k2=t) = 0. | ||
|  | 
 | ||
|  | */ | ||
|  | 
 | ||
|  | % now this is our nice  DBN: notice that CPTs depend on slide, | ||
|  | % so this is really an "irregular"  DBN. | ||
|  | % first, the emission probabilities (they are easier ;-). | ||
|  | 
 | ||
|  | % we look at the core first: m, and i emissions | ||
|  | 
 | ||
|  | % next, go to inner states (state transitions). | ||
|  | 
 | ||
|  | % the first m-state | ||
|  | 
 | ||
|  | m(I,J,M) :- | ||
|  | 	slices(J), !, | ||
|  | 	I1 is I+1, | ||
|  | 	e(I1,E), | ||
|  | 	{ M = m(I,J) with p(bool, trans([0]),[E]) }, | ||
|  | 	emitting(M). | ||
|  | % standard m-state | ||
|  | m(I,J,M) :- | ||
|  | 	I1 is I+1, | ||
|  | 	J1 is J+1, | ||
|  | 	i(I1,J,NI), | ||
|  | 	m(I1,J1,NM), | ||
|  | 	d(I1,J1,ND), | ||
|  | 	e(I1,NE), | ||
|  | 	m_i_cpt(J,MICPT), | ||
|  | 	m_m_cpt(J,MMCPT), | ||
|  | 	m_d_cpt(J,MDCPT), | ||
|  | 	m_e_cpt(J,MECPT), | ||
|  | 	{ M = m(I,J) with p(bool, trans([MICPT,MMCPT,MDCPT,MECPT]),[NI,NM,ND,NE]) }, | ||
|  | 	emitting(M). | ||
|  | 
 | ||
|  | i(I,J,S) :- | ||
|  | 	I1 is I+1, | ||
|  | 	J1 is J+1, | ||
|  | 	m(I1,J1,M), | ||
|  | 	i(I1,J,IS), | ||
|  | 	i_i_cpt(J,IICPT), | ||
|  | 	i_m_cpt(J,IMCPT), | ||
|  | 	{ S = i(I,J) with p(bool, trans([IMCPT,IICPT]), [M,IS]) }, | ||
|  | 	emitting(S). | ||
|  | 
 | ||
|  | d(I,J,D) :- | ||
|  | 	slices(J), !, | ||
|  | 	e(I,E), | ||
|  | 	{ D = d(I,J) with p(bool, trans([0]), [E]) }. | ||
|  | d(I,J,S) :- | ||
|  | 	J1 is J+1, | ||
|  | 	m(I,J1,M), | ||
|  | 	d(I,J1,ND), | ||
|  | 	m_d_cpt(J,MDCPT), | ||
|  | 	d_d_cpt(J,DDCPT), | ||
|  | 	{ S = d(I,J) with p(bool, trans([MDCPT,DDCPT]), [M,ND]) }. | ||
|  | 
 | ||
|  | e_evidence([],_). | ||
|  | e_evidence([Emission|Es],Emission) :- | ||
|  | 	e_evidence(Es,Emission). | ||
|  | 
 | ||
|  | % | ||
|  | % N, C, and J states can also emit. | ||
|  | % | ||
|  | % and they have transitions. | ||
|  | 
 | ||
|  | % initial state | ||
|  | s(0,S) :- | ||
|  | 	n(0,N), | ||
|  | 	{ S = s(0) with p(bool, trans([0]),[N]) }. | ||
|  | 
 | ||
|  | n(I,S) :- | ||
|  | 	I1 is I+1, | ||
|  | 	b(I1, B0), | ||
|  | 	n(I1, N0), | ||
|  | 	n_n_cpt(NNCPT), | ||
|  | 	n_b_cpt(NBCPT), | ||
|  | 	{ S = n(I) with p(bool, trans([NBCPT,NNCPT]), [B0,N0]) }, | ||
|  | 	emitting(S). | ||
|  | 
 | ||
|  | b(I,S) :- | ||
|  | 	slices(Ss), | ||
|  | 	b_m_transitions(0,Ss,I,Ms,MCPTs), | ||
|  | 	d(I,1, D), | ||
|  | 	b_d_cpt(BMCPT), | ||
|  | 	{ S = b(I) with p(bool, trans([BMCPT|MCPTs]), [D|Ms]) }. | ||
|  | 
 | ||
|  | b_m_transitions(Ss,Ss,_,[],[]) :- !. | ||
|  | b_m_transitions(J0,Ss,I,[M|Ms],[CPT|MCPTs]) :- | ||
|  | 	J is J0+1, | ||
|  | 	m(I,J,M), | ||
|  | 	b_m_cpt(J,CPT), | ||
|  | 	b_m_transitions(J,Ss,I,Ms,MCPTs). | ||
|  | 
 | ||
|  | j(I,S) :- | ||
|  | 	I1 is I+1, | ||
|  | 	b(I1, NB), | ||
|  | 	j(I1, NJ), | ||
|  | 	j_b_cpt(JBCPT), | ||
|  | 	j_j_cpt(JJCPT), | ||
|  | 	{ S = j(I) with p(bool, trans([JBCPT,JJCPT]), [NB,NJ]) }, | ||
|  | 	emitting(S). | ||
|  | 
 | ||
|  | e(I,S) :- | ||
|  | 	c(I, NC), | ||
|  | 	j(I, NJ), | ||
|  | 	e_c_cpt(ECCPT), | ||
|  | 	e_j_cpt(EJCPT), | ||
|  | 	{ S = e(I) with p(bool, trans([ECCPT,EJCPT]), [NC,NJ]) }. | ||
|  | 
 | ||
|  | c(I,S) :- | ||
|  | 	I1 is I+1, | ||
|  | 	t(I1, T), | ||
|  | 	c(I1, NC), | ||
|  | 	c_t_cpt(CTCPT), | ||
|  | 	c_c_cpt(CCCPT), | ||
|  | 	{ S = c(I) with p(bool, trans([CCCPT,CTCPT]),[NC,T]) }, | ||
|  | 	emitting(S). | ||
|  | 
 | ||
|  | 	 | ||
|  | t(I,S) :- | ||
|  | 	% I < IMax | ||
|  | 	{ S = t(I) with p(bool, trans([]), []) }. | ||
|  | 
 | ||
|  | 
 | ||
|  | % the item I at slice J is a random variable P. | ||
|  | emitting(M) :- | ||
|  | 	emission(M). | ||
|  | 
 | ||
|  | emission_cpt(Key, CPT) :- | ||
|  | 	Key=..[A,_,Slice], !, | ||
|  | 	emission_cpt(A, Slice, CPT). | ||
|  | emission_cpt(_, CPT) :- | ||
|  | 	nule_cpt(CPT). | ||
|  | 
 | ||
|  | emission_cpt(m,J,CPT) :- !, me_cpt(J,CPT). | ||
|  | emission_cpt(i,J,CPT) :- !, ie_cpt(J,CPT). | ||
|  | emission_cpt(_,_,CPT) :- nule_cpt(CPT). | ||
|  | 
 | ||
|  | 
 | ||
|  | ie_cpt(I,Logs) :- ie_cpt(I,Logs,_,_). | ||
|  | me_cpt(I,Logs) :- me_cpt(I,Logs,_,_). | ||
|  | nule_cpt(Logs) :- nule_cpt(Logs,_,_). | ||
|  | b_m_cpt(I,Log) :- b_m_cpt(I,Log,_,_). | ||
|  | b_d_cpt(Log) :- b_d_cpt(Log,_,_). | ||
|  | c_c_cpt(Log) :- c_c_cpt(Log,_,_). | ||
|  | c_t_cpt(Log) :- c_t_cpt(Log,_,_). | ||
|  | d_d_cpt(I,Log) :- d_d_cpt(I,Log,_,_). | ||
|  | d_m_cpt(I,Log) :- d_m_cpt(I,Log,_,_). | ||
|  | e_c_cpt(Log) :- e_c_cpt(Log,_,_). | ||
|  | e_j_cpt(Log) :- e_j_cpt(Log,_,_). | ||
|  | i_i_cpt(I,Log) :- i_i_cpt(I,Log,_,_). | ||
|  | i_m_cpt(I,Log) :- i_m_cpt(I,Log,_,_). | ||
|  | j_b_cpt(Log) :- j_b_cpt(Log,_,_). | ||
|  | j_j_cpt(Log) :- j_j_cpt(Log,_,_). | ||
|  | m_d_cpt(I,Log) :- m_d_cpt(I,Log,_,_). | ||
|  | m_e_cpt(I,Log) :- m_e_cpt(I,Log,_,_). | ||
|  | m_i_cpt(I,Log) :- m_i_cpt(I,Log,_,_). | ||
|  | m_m_cpt(I,Log) :- m_m_cpt(I,Log,_,_). | ||
|  | n_b_cpt(Log) :- n_b_cpt(Log,_,_). | ||
|  | n_n_cpt(Log) :- n_n_cpt(Log,_,_). | ||
|  | 
 | ||
|  | %hmm_domain([a,  c,  d,  e,  f,  g,  h,  i,  k,  l,  m,  n,  p,  q,  r,  s,  t,  v,  w,  y]). | ||
|  | 
 | ||
|  | hmm_domain(aminoacids). | ||
|  | 
 |