From: Adam Dickmeiss Date: Thu, 7 Jun 2001 08:17:00 +0000 (+0000) Subject: Fixes for robots.txt handling (bug introduced by previous commit). X-Git-Tag: ZMBOT.0.1~20 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=9b7fbb227055fc0ad4eeb10d482fd9e8ada7ce4e;p=tclrobot.git Fixes for robots.txt handling (bug introduced by previous commit). --- diff --git a/robot.tcl b/robot.tcl index fa3c595..b8db6c4 100755 --- a/robot.tcl +++ b/robot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: robot.tcl,v 1.17 2001/06/07 08:10:10 adam Exp $ +# $Id: robot.tcl,v 1.18 2001/06/07 08:17:00 adam Exp $ # proc RobotFileNext1 {area lead} { # puts "RobotFileNext1 area=$area lead=$lead" @@ -550,7 +550,7 @@ proc RobotsTxt0 {v buf} { global URL agent set section 0 foreach l [split $buf \n] { - if {[regexp {([-A-Za-z]+):[ \t]*([^\#\t ]+)} $l match cmd arg]} { + if {[regexp {([-A-Za-z]+):[ ]*([^\# ]+)} $l match cmd arg]} { puts "cmd=$cmd arg=$arg" switch -- [string tolower $cmd] { user-agent { @@ -579,7 +579,8 @@ proc RobotTextPlain {url out} { global URL puts $out "" - puts $out $URL($url,buf) + regsub -all {<} $URL($url,buf) {\<} content + puts $out $content puts $out "" if {![string compare $URL($url,path) /robots.txt]} { @@ -616,7 +617,7 @@ proc Robot200 {url} { } } text/plain { - RobotTextPlain $url $out $outr + RobotTextPlain $url $out } application/pdf { set pdff [open test.pdf w]