50 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
		
		
			
		
	
	
			50 lines
		
	
	
		
			1.6 KiB
		
	
	
	
		
			Perl
		
	
	
	
	
	
| 
								 | 
							
								:- module(check_links, [main/1,check_links/2],[]).
							 | 
						||
| 
								 | 
							
								:- use_package(pillow).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								main([URL]) :- !,
							 | 
						||
| 
								 | 
							
								        check_links(URL,BadLinks),
							 | 
						||
| 
								 | 
							
								        report_bad_links(BadLinks).
							 | 
						||
| 
								 | 
							
								main(_) :-
							 | 
						||
| 
								 | 
							
								        error(['Correct usage: check_links <URL>']).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								check_links(URL,BadLinks) :-
							 | 
						||
| 
								 | 
							
								        url_info(URL,URLInfo),
							 | 
						||
| 
								 | 
							
								        fetch_url(URLInfo,[],Response),
							 | 
						||
| 
								 | 
							
								        member(content_type(text,html,_),Response),
							 | 
						||
| 
								 | 
							
								        member(content(Content),Response),
							 | 
						||
| 
								 | 
							
								        html2terms(Content,Terms),
							 | 
						||
| 
								 | 
							
								        check_source_links(Terms,URLInfo,[],BadLinks).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								check_source_links([],_,BL,BL).
							 | 
						||
| 
								 | 
							
								check_source_links([E|Es],BaseURL,BL0,BL) :-
							 | 
						||
| 
								 | 
							
								        check_source_links1(E,BaseURL,BL0,BL1),
							 | 
						||
| 
								 | 
							
								        check_source_links(Es,BaseURL,BL1,BL).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								check_source_links1(env(a,AnchorAtts,_),BaseURL,BL0,BL) :-
							 | 
						||
| 
								 | 
							
								        member((href=URL),AnchorAtts), !,
							 | 
						||
| 
								 | 
							
								        check_link(URL,BaseURL,BL0,BL).
							 | 
						||
| 
								 | 
							
								check_source_links1(env(_Name,_Atts,Env_html),BaseURL,BL0,BL) :- !,
							 | 
						||
| 
								 | 
							
								        check_source_links(Env_html,BaseURL,BL0,BL).
							 | 
						||
| 
								 | 
							
								check_source_links1(_,_,BL,BL).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								check_link(URL,BaseURL,BL0,BL) :-
							 | 
						||
| 
								 | 
							
								        url_info_relative(URL,BaseURL,URLInfo), !,
							 | 
						||
| 
								 | 
							
								        fetch_url_status(URLInfo,Status,Phrase),
							 | 
						||
| 
								 | 
							
								        ( Status \== success ->
							 | 
						||
| 
								 | 
							
								          atom_codes(P,Phrase),
							 | 
						||
| 
								 | 
							
								          atom_codes(U,URL),
							 | 
						||
| 
								 | 
							
								          BL = [badlink(U,P)|BL0]
							 | 
						||
| 
								 | 
							
								        ; BL = BL0
							 | 
						||
| 
								 | 
							
								        ).
							 | 
						||
| 
								 | 
							
								check_link(_,_,BL,BL).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								fetch_url_status(URL,Status,Phrase) :-
							 | 
						||
| 
								 | 
							
								        fetch_url(URL,[head,timeout(20)],Response), !,
							 | 
						||
| 
								 | 
							
								        member(status(Status,_,Phrase),Response).
							 | 
						||
| 
								 | 
							
								fetch_url_status(_,timeout,timeout).
							 | 
						||
| 
								 | 
							
								
							 | 
						||
| 
								 | 
							
								report_bad_links([]).
							 | 
						||
| 
								 | 
							
								report_bad_links([badlink(U,P)|BLs]) :-
							 | 
						||
| 
								 | 
							
								        message(['URL ',U,' : ',P]),
							 | 
						||
| 
								 | 
							
								        report_bad_links(BLs).
							 |