330 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
			
		
		
	
	
			330 lines
		
	
	
		
			9.4 KiB
		
	
	
	
		
			Prolog
		
	
	
	
	
	
| %
 | |
| % Convert HMMer model to CLP(BN) program
 | |
| %
 | |
| main :-
 | |
| 	open('hmmer_b.1.18.1.hmm', read, S),
 | |
| 	open('hmmer_b.1.18.1.yap', write, W),
 | |
| %	open('globin.hmm', read, S),
 | |
| 	catch(parse_model(S,_),done(Info),stop(S,W,Info)),
 | |
| 	close(S),
 | |
| 	close(W).
 | |
| 
 | |
| stop(S,W,Info) :-
 | |
| 	ground(Info),
 | |
| 	gen_program(W, Info).
 | |
| stop(_,_,_) :-
 | |
| 	format(user_error,"Bad HMM~n", []).
 | |
| 
 | |
| parse_model(S,Info) :-
 | |
| 	get_line(S, Line, Info),
 | |
| %	format('~s~n',[Line]),
 | |
| 	match_field(Info, S, Line, []),
 | |
| 	parse_model(S, Info).
 | |
| 
 | |
| get_line(S, Out, Info) :-
 | |
| 	get0(S, C),
 | |
| 	(
 | |
| 	  C == 10 -> Out = [] ;
 | |
| 	  C == -1 -> throw(done(Info)) ;
 | |
| 	  Out = [C|Line], get_line(S, Line, Info)
 | |
| 	).
 | |
| 
 | |
| 
 | |
| match_field(hmmer(2,_,_,_,_,_,_,_), _) --> "HMMER", !,
 | |
| 	scanner_skip.			% mandatory field, should be ground 
 | |
| match_field(hmmer(_,Name,_,_,_,_,_,_),_) --> "NAME", !, 	% mandatory field 
 | |
| 	scanner_skip_blanks,
 | |
| 	get_name(String), {atom_codes(Name,String) }.
 | |
| match_field(_,_) --> "ACC", !. % accession id, used to track DB accesses
 | |
| match_field(hmmer(_,_,NOfStates,_,_,_,_,_),_) --> "LENG", !, % number of states
 | |
| 	scanner_skip_blanks,
 | |
| 	get_number(NOfStates, 0).
 | |
| match_field(hmmer(_,_,_,Alph,_,_,_,_),_) --> "ALPH", !, % aminos or bases
 | |
| 	scanner_skip_blanks,
 | |
| 	check_alphabet(Alph).
 | |
| match_field(_,_) --> "RF", !, scanner_skip.
 | |
| match_field(_,_) --> "CS", !, scanner_skip.
 | |
| match_field(hmmer(_,_,_,_,_,_,_,MAP),_) --> "MAP", !,
 | |
| 	scanner_skip_blanks,
 | |
| 	to_lower(Codes),
 | |
| 	{ map_code(Codes,MAP) }.
 | |
| match_field(_,_) --> "COM", !, scanner_skip.
 | |
| match_field(_,_) --> "CKSUM", !, scanner_skip.
 | |
| match_field(_,_) --> "GA", !, scanner_skip.
 | |
| match_field(_,_) --> "TC", !, scanner_skip.
 | |
| match_field(_,_) --> "NC", !, scanner_skip.
 | |
| match_field(_,_) --> "NSEQ", !, scanner_skip.
 | |
| match_field(_,_) --> "DATE", !, scanner_skip.
 | |
| match_field(hmmer(_,_,_,_,special(NB,NN,EC,EJ,CT,CC,JB,JJ,_,_),_,_,_),_) --> "XT", !,
 | |
| 	scan_transition(NB),
 | |
| 	scan_transition(NN),
 | |
| 	scan_transition(EC),
 | |
| 	scan_transition(EJ),
 | |
| 	scan_transition(CT),
 | |
| 	scan_transition(CC),
 | |
| 	scan_transition(JB),
 | |
| 	scan_transition(JJ).
 | |
| match_field(hmmer(_,_,_,_,special(_,_,_,_,_,_,_,_,GG,GF),_,_,_),_) --> "NULT", !,
 | |
| 	scan_transition(GG),
 | |
| 	scan_transition(GF).
 | |
| match_field(hmmer(_,_,_,Alph,_,NULE,_,_),_) --> "NULE", !,
 | |
| 	{ nof_symbols(Alph,N) },
 | |
| 	scan_transitions(N,Transitions),
 | |
| 	{ NULE =.. [null|Transitions] }.
 | |
| match_field(_,_) --> "EVD", !,
 | |
| 	scanner_skip.			% optional, but should do later.x
 | |
| match_field(Info,S) --> "HMM", !, 
 | |
| 	scanner_skip,
 | |
| 	{
 | |
| 	  get_line(S,_,Info),
 | |
| 	  Info = hmmer(_,_,NOfStates,Alph,_,_,model(BD,NBD,Transitions),MAP),
 | |
| 	  nof_symbols(Alph,N),
 | |
| 	  scan_model(S,NOfStates,N,BD,NBD,Transitions,MAP,Info),
 | |
| 	  throw(done(Info))
 | |
| 	}.
 | |
| 
 | |
| scan_model(S,NOfStates,N,BD,NBD,Transitions,MAP,Info) :-
 | |
| 	get_line(S,Line, Info),
 | |
| 	scan_bd(BD, NBD, Line, []),
 | |
| 	scan_states(NOfStates, N, S, MAP, Transitions, Info).
 | |
| 
 | |
| scan_states(0, _, _, MAP, [], _) :- !, close_map(MAP).
 | |
| scan_states(NOfStates, N, Stream, MAP, [t(E,I,S)|Transitions], Info) :-
 | |
| 	NOfStates1 is NOfStates-1,
 | |
| 	scan_state(Stream, E, I, MAP, S, N, NMAP, Info),
 | |
| 	scan_states(NOfStates1, N, Stream, NMAP, Transitions, Info).
 | |
| 
 | |
| scan_state(Stream, E,I,MAP,s(MM,MI,MD,IM,II,DM,DD,BM,ME), N, NMAP, Info) :-
 | |
| 	get_line(Stream, ELine, Info),
 | |
| 	get_line(Stream, ILine, Info),
 | |
| 	get_line(Stream, SLine, Info),
 | |
| %	format('~s~n~s~n~s~n',[ELine,ILine,SLine]),
 | |
| 	scanner_skip_field(ELine,Eline1),
 | |
| 	scan_transitions(N, E, Eline1, EMap),
 | |
| 	scan_map(MAP,NMAP,EMap, []),
 | |
| 	scanner_skip_field(ILine,ILine1),
 | |
| 	scan_transitions(N, I, ILine1, []),
 | |
| 	scanner_skip_field(SLine,SLine1),
 | |
| 	scan_transitions(MM, MI, MD, IM, II, DM, DD, BM, ME, SLine1, []).
 | |
| 
 | |
| scan_transitions(MM, MI, MD, IM, II, DM, DD, BM, ME) -->
 | |
| 	scan_transition(MM),
 | |
| 	scan_transition(MI),
 | |
| 	scan_transition(MD),
 | |
| 	scan_transition(IM),
 | |
| 	scan_transition(II),
 | |
| 	scan_transition(DM),
 | |
| 	scan_transition(DD),
 | |
| 	scan_transition(BM),
 | |
| 	scan_transition(ME).
 | |
| 
 | |
| 
 | |
| scan_transitions(0, []) --> !.
 | |
| scan_transitions(N, [T|Ts]) -->
 | |
| 	{ N1 is N-1 },
 | |
| 	scan_transition(T),
 | |
| 	scan_transitions(N1, Ts).
 | |
| 
 | |
| scan_transition(T) -->
 | |
| 	scanner_skip_blanks,
 | |
| 	get_transition(T).
 | |
| 
 | |
| get_transition('*') --> "*", !, scanner_skip_blanks.
 | |
| get_transition(N) --> [C],
 | |
| 	{ C >= 0'0, C =< 0'9, !, C0 is C-0'0 },
 | |
| 	get_number(N, C0 ).
 | |
| get_transition(N) --> "-",
 | |
| 	get_number(C0, 0 ),
 | |
| 	{ N is -C0}.
 | |
| 
 | |
| get_number(Nf, N0) --> [C], !,
 | |
| 	( { C >= 0'0, C =< 0'9 } -> { Ni is N0*10+(C-0'0) }, get_number(Nf,Ni) ;
 | |
| 	    { Nf = N0 }
 | |
| 	).
 | |
| get_number(N, N) --> [].
 | |
| 
 | |
| scanner_skip_blanks --> " ", !, scanner_skip_blanks.
 | |
| scanner_skip_blanks --> "	", !, scanner_skip_blanks.
 | |
| scanner_skip_blanks --> [].
 | |
| 
 | |
| 
 | |
| scanner_skip_field --> scan_transition(_).
 | |
| 
 | |
| scan_bd(BD, NBD) -->
 | |
| 	scan_transition(BD),
 | |
| 	scanner_skip_field,
 | |
| 	scan_transition(NBD).
 | |
| 
 | |
| check_alphabet(Alph) -->
 | |
| 	to_lower(Lower),
 | |
| 	{ get_alph(Lower, Alph) }.
 | |
| 
 | |
| to_lower([CF|Lower]) --> [C], !,
 | |
| 	{ ( C >= 0'A, C =< 0'Z -> CF is C+(0'a-0'A) ; CF = C) },
 | |
| 	to_lower(Lower).
 | |
| to_lower([]) --> [].
 | |
| 
 | |
| get_alph("amino", amino).
 | |
| get_alph("nucleic", nucleic).
 | |
| 
 | |
| map_code("yes", yes(_)).
 | |
| map_code("no", no).
 | |
| 
 | |
| nof_symbols(amino,20).
 | |
| nof_symbols(nucleic,4).
 | |
| 
 | |
| scanner_skip(_,_).
 | |
| 
 | |
| get_name(L,L,[]).
 | |
| 
 | |
| scan_map(yes([Id|Next]),yes(Next)) -->
 | |
| 	scan_transition(Id).
 | |
| scan_map(no,no) --> [].
 | |
| 
 | |
| close_map(yes([])) :- !.
 | |
| close_map(no).
 | |
| 
 | |
| 
 | |
| gen_program(W, hmmer(VersionId,Name,NOfStates,Alphabet,SpecialTransitions,NULE,Model,MAP)) :-
 | |
| 	format(W, '~n% HMMer Version ~d (Plan 7) using ~a~n',[VersionId,Alphabet]),
 | |
| 	format(W, '~n% Name: ~a~n',[Name]),
 | |
| 	format(W, '~n% Size: ~d~n',[NOfStates]),
 | |
| 	format(W, 'slices(~d).~n',[NOfStates]),
 | |
| 	gen_specials(W, SpecialTransitions),
 | |
| 	gen_nule(W, NULE,Alphabet,NULEProbs),
 | |
| 	gen_model(W, Model,NULEProbs),
 | |
| 	gen_map(W, MAP).
 | |
| 
 | |
| 
 | |
| %
 | |
| % special nodes in graph.
 | |
| %
 | |
| gen_specials(W, special(NB,NN,EC,EJ,CT,CC,JB,JJ,GG,GF)) :-
 | |
| 	% reaching state N
 | |
| 	normalize(NN,1.0,NNCPT),
 | |
| 	format(W, '~n%Reaching state N.~n',[]),
 | |
| 	format(W, 'n_n_cpt(~w,1.0,~w).~n',[NN,NNCPT]),
 | |
| 	% reaching state B
 | |
| 	normalize(JB,1.0,JBCPT),
 | |
| 	format(W, '~n%Reaching state B.~n',[]),
 | |
| 	format(W, 'j_b_cpt(~w,1.0,~w).~n',[JB,JBCPT]),
 | |
| 	normalize(NB,1.0,NBCPT),
 | |
| 	format(W, 'n_b_cpt(~w,1.0,~w).~n',[NB,NBCPT]),
 | |
| 	% reaching state J
 | |
| 	format(W, '~n%Reaching state J.~n',[]),
 | |
| 	normalize(EJ,1.0,EJCPT),
 | |
| 	format(W, 'e_j_cpt(~w,1.0,~w).~n',[EJ,EJCPT]),
 | |
| 	normalize(JJ,1.0,JJCPT),
 | |
| 	format(W, 'j_j_cpt(~w,1.0,~w).~n',[JJ,JJCPT]),
 | |
| 	% reaching state C
 | |
| 	format(W, '~n%Reaching state C.~n',[]),
 | |
| 	normalize(EC,1.0,ECCPT),
 | |
| 	format(W, 'e_c_cpt(~w,1.0,~w).~n',[EC,ECCPT]),
 | |
| 	normalize(CC,1.0,CCCPT),
 | |
| 	format(W, 'c_c_cpt(~w,1.0,~w).~n',[CC,CCCPT]),
 | |
| 	% reaching state T
 | |
| 	format(W, '~n%Reaching state T.~n',[]),
 | |
| 	normalize(CT,1.0,CTCPT),
 | |
| 	format(W, 'c_t_cpt(~w,1.0,~w).~n',[CT,CTCPT]),
 | |
| 	% null model
 | |
| 	format(W, '~n%Reaching state G (Null Model).~n',[]),
 | |
| 	normalize(GG,1.0,GGCPT),
 | |
| 	format(W, 'g_g_cpt(~w,1.0,~w).~n',[GG,GGCPT]),
 | |
| 	format(W, '~n%Reaching state F (Null Model).~n',[]),
 | |
| 	normalize(GF,1.0,GFCPT),
 | |
| 	format(W, 'g_f_cpt(~w,1.0,~w).~n',[GF,GFCPT]).
 | |
| 
 | |
| 
 | |
| normalize(*,_,0.0) :- !.
 | |
| normalize(Score,NULL,Prob) :-
 | |
| 	Prob is NULL * exp(2.0, (Score/1000)).
 | |
| 
 | |
| normalizel([],_,[]).
 | |
| normalizel([Score|Scores],NULL,[Prob|Probs]) :-
 | |
| 	normalize(Score, NULL, Prob),
 | |
| 	normalizel(Scores,NULL,Probs).
 | |
| 
 | |
| normalizell([],[],[]).
 | |
| normalizell([Score|Scores],[Norm|Norms],[Prob|Probs]) :-
 | |
| 	normalize(Score, Norm, Prob),
 | |
| 	normalizell(Scores,Norms,Probs).
 | |
| 
 | |
| % null emission CPT
 | |
| gen_nule(W, NULE,Alph,PsCPT) :-
 | |
| 	NULE =.. [_|Ps],
 | |
| 	nof_symbols(Alph,Size),
 | |
| 	Norm is 1/Size,
 | |
| 	normalizel(Ps,Norm,PsCPT),
 | |
| 	E0s =.. [e|Ps],
 | |
| 	Es =.. [e|PsCPT],
 | |
| 	format(W, '~n%Null state emission CPT.~n',[]),
 | |
| 	format(W, 'nule_cpt(~n     ~w,~n     ~w,~n     ~w).~n',[E0s,Norm,Es]).
 | |
| 
 | |
| gen_model(W, model(BD,NBD,States),PsCPT) :-
 | |
| 	normalize_bd(BD,NBD,BDCPT),
 | |
| 	format(W, '~n%Reaching first D.~n',[]),
 | |
| 	format(W, 'b_d_cpt(~w,~w,~w).~n',[BD,NBD,BDCPT]),
 | |
| 	gen_states(W, States,1,PsCPT).
 | |
| 
 | |
| gen_states(_, [],_,_).
 | |
| gen_states(W, [State|States],StateNo,PsCPT) :-
 | |
| 	gen_state(W, State,StateNo,PsCPT),
 | |
| 	NStateNo is StateNo+1,
 | |
| 	gen_states(W, States,NStateNo,PsCPT).
 | |
| 
 | |
| gen_state(W, t(E,I,s(MM,MI,MD,IM,II,DM,DD,BM,ME)),StateNo,PsCPT) :-
 | |
| 	format(W, '~n%State ~d.~n',[StateNo]),
 | |
| 	Norm =.. [e|PsCPT],
 | |
| 	normalizell(E,PsCPT,ECPT),
 | |
| 	E0s =.. [e|E],
 | |
| 	Es =.. [e|ECPT],
 | |
| 	find_consensus(W,StateNo,ECPT),
 | |
| 	sum(ECPT,0,TE),
 | |
| 	format(W, 'me_cpt(~d,~n    ~w,~n    ~w,~n    ~w). % ~w~n',[StateNo,E0s,Norm,Es,TE]),
 | |
| 	normalizell(I,PsCPT,ICPT),
 | |
| 	I0s =.. [e|ICPT],
 | |
| 	Is =.. [e|ICPT],
 | |
| 	sum(ICPT,0,TI),
 | |
| 	format(W, 'ie_cpt(~d,~n    ~w,~n    ~w,~n    ~w). %~w~n',[StateNo,I0s,Norm,Is,TI]),
 | |
| 	normalize(MM,1.0,MMCPT),
 | |
| 	format(W, 'm_m_cpt(~d,~w,~w,~w).~n',[StateNo,MM,1.0,MMCPT]),
 | |
| 	normalize(MI,1.0,MICPT),
 | |
| 	format(W, 'm_i_cpt(~d,~w,~w,~w).~n',[StateNo,MI,1.0,MICPT]),
 | |
| 	normalize(MD,1.0,MDCPT),
 | |
| 	format(W, 'm_d_cpt(~d,~w,~w,~w).~n',[StateNo,MD,1.0,MDCPT]),
 | |
| 	normalize(II,1.0,IICPT),
 | |
| 	format(W, 'i_i_cpt(~d,~w,~w,~w).~n',[StateNo,II,1.0,IICPT]),
 | |
| 	normalize(IM,1.0,IMCPT),
 | |
| 	format(W, 'i_m_cpt(~d,~w,~w,~w).~n',[StateNo,IM,1.0,IMCPT]),
 | |
| 	normalize(DM,1.0,DMCPT),
 | |
| 	format(W, 'd_m_cpt(~d,~w,~w,~w).~n',[StateNo,DM,1.0,DMCPT]),
 | |
| 	normalize(DD,1.0,DDCPT),
 | |
| 	format(W, 'd_d_cpt(~d,~w,~w,~w).~n',[StateNo,DD,1.0,DDCPT]),
 | |
| 	normalize(BM,1.0,BMCPT),
 | |
| 	format(W, 'b_m_cpt(~d,~w,~w,~w).~n',[StateNo,BM,1.0,BMCPT]),
 | |
| 	normalize(ME,1.0,MECPT),
 | |
| 	format(W, 'm_e_cpt(~d,~w,~w,~w).~n',[StateNo,ME,1.0,MECPT]).
 | |
| 
 | |
| gen_map(_,_).
 | |
| 
 | |
| normalize_bd(A,B,A).
 | |
| 
 | |
| sum([],S,S).
 | |
| sum([P|Ps],S0,S) :-
 | |
| 	Si is S0+P,
 | |
| 	sum(Ps,Si,S).
 | |
| 
 | |
| find_consensus(W, StateNo,ECPT) :-
 | |
| 	max_index(ECPT,1,0,-1,_,MaxIndex),
 | |
| 	format(W, 'consensus(~d,~d).~n',[StateNo,MaxIndex]).
 | |
| 
 | |
| max_index([],_,Max,MaxIndex,Max,MaxIndex).
 | |
| max_index([H|L],I0,Max0,MaxIndex0,Max,MaxIndex) :-
 | |
| 	H > Max0, !,
 | |
| 	I is I0+1,
 | |
| 	max_index(L,I,H,I0,Max,MaxIndex).
 | |
| max_index([_|L],I0,Max0,MaxIndex0,Max,MaxIndex) :-
 | |
| 	I is I0+1,
 | |
| 	max_index(L,I,Max0,MaxIndex0,Max,MaxIndex).
 | |
| 
 |