From: Adam Dickmeiss Date: Sun, 17 Feb 2002 09:29:18 +0000 (+0000) Subject: Robot honour robots meta tag X-Git-Tag: ZMBOT.0.1~6 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=94e3de1af23033b0b4687020d451af4e61111c8e;p=tclrobot.git Robot honour robots meta tag --- diff --git a/robot.tcl b/robot.tcl index b6466c3..ffbfce4 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.29 2001/11/14 09:15:23 adam Exp $ +# $Id: robot.tcl,v 1.30 2002/02/17 09:29:18 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -502,6 +502,13 @@ proc link {url out href body distance} { proc RobotTextHtml {url out} { global URL maxdistance + # set title so we can emit it for the body + set title {} + # if true, nothing will be indexed + set noindex 0 + # if true, nothing will be followed + set nofollow 0 + set distance 0 set fdistance 0 if {$maxdistance < 1000 && [info exists URL($url,dist)]} { @@ -510,36 +517,67 @@ proc RobotTextHtml {url out} { } htmlSwitch $URL($url,buf) \ title { - puts $out "$body" + set title $body } -nonest meta { + # collect metadata and save NAME= CONTENT=.. + set metaname {} + set metacontent {} puts -nonewline $out "" + # go through robots directives (af any) + if {![string compare $metaname robots]} { + set direcs [split [string tolower $metacontent] ,] + if {[lsearch $direcs noindex] >= 0} { + set noindex 1 + } + if {[lsearch $direcs nofollow] >= 0} { + set nofollow 1 + } } - puts $out {>} } body { - regsub -all {