e5f4633c39
which included commits to RCS files with non-trunk default branches. git-svn-id: https://yap.svn.sf.net/svnroot/yap/trunk@5 b08c6af1-5177-4d33-ba66-4b1c6b8b522a
50 lines
1.6 KiB
Prolog
50 lines
1.6 KiB
Prolog
:- module(check_links, [main/1,check_links/2],[]).
|
|
:- use_package(pillow).
|
|
|
|
main([URL]) :- !,
|
|
check_links(URL,BadLinks),
|
|
report_bad_links(BadLinks).
|
|
main(_) :-
|
|
error(['Correct usage: check_links <URL>']).
|
|
|
|
check_links(URL,BadLinks) :-
|
|
url_info(URL,URLInfo),
|
|
fetch_url(URLInfo,[],Response),
|
|
member(content_type(text,html,_),Response),
|
|
member(content(Content),Response),
|
|
html2terms(Content,Terms),
|
|
check_source_links(Terms,URLInfo,[],BadLinks).
|
|
|
|
check_source_links([],_,BL,BL).
|
|
check_source_links([E|Es],BaseURL,BL0,BL) :-
|
|
check_source_links1(E,BaseURL,BL0,BL1),
|
|
check_source_links(Es,BaseURL,BL1,BL).
|
|
|
|
check_source_links1(env(a,AnchorAtts,_),BaseURL,BL0,BL) :-
|
|
member((href=URL),AnchorAtts), !,
|
|
check_link(URL,BaseURL,BL0,BL).
|
|
check_source_links1(env(_Name,_Atts,Env_html),BaseURL,BL0,BL) :- !,
|
|
check_source_links(Env_html,BaseURL,BL0,BL).
|
|
check_source_links1(_,_,BL,BL).
|
|
|
|
check_link(URL,BaseURL,BL0,BL) :-
|
|
url_info_relative(URL,BaseURL,URLInfo), !,
|
|
fetch_url_status(URLInfo,Status,Phrase),
|
|
( Status \== success ->
|
|
atom_codes(P,Phrase),
|
|
atom_codes(U,URL),
|
|
BL = [badlink(U,P)|BL0]
|
|
; BL = BL0
|
|
).
|
|
check_link(_,_,BL,BL).
|
|
|
|
fetch_url_status(URL,Status,Phrase) :-
|
|
fetch_url(URL,[head,timeout(20)],Response), !,
|
|
member(status(Status,_,Phrase),Response).
|
|
fetch_url_status(_,timeout,timeout).
|
|
|
|
report_bad_links([]).
|
|
report_bad_links([badlink(U,P)|BLs]) :-
|
|
message(['URL ',U,' : ',P]),
|
|
report_bad_links(BLs).
|