projects
/
tclrobot.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
73e2094
)
Fixes for robots.txt handling (bug introduced by previous commit).
author
Adam Dickmeiss
<adam@indexdata.dk>
Thu, 7 Jun 2001 08:17:00 +0000
(08:17 +0000)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Thu, 7 Jun 2001 08:17:00 +0000
(08:17 +0000)
robot.tcl
patch
|
blob
|
history
diff --git
a/robot.tcl
b/robot.tcl
index
fa3c595
..
b8db6c4
100755
(executable)
--- a/
robot.tcl
+++ b/
robot.tcl
@@
-1,5
+1,5
@@
#!/usr/bin/tclsh
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.17 2001/06/07 08:10:10 adam Exp $
+# $Id: robot.tcl,v 1.18 2001/06/07 08:17:00 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
@@
-550,7
+550,7
@@
proc RobotsTxt0 {v buf} {
global URL agent
set section 0
foreach l [split $buf \n] {
global URL agent
set section 0
foreach l [split $buf \n] {
- if {[regexp {([-A-Za-z]+):[ \t]*([^\#\t ]+)} $l match cmd arg]} {
+ if {[regexp {([-A-Za-z]+):[ ]*([^\# ]+)} $l match cmd arg]} {
puts "cmd=$cmd arg=$arg"
switch -- [string tolower $cmd] {
user-agent {
puts "cmd=$cmd arg=$arg"
switch -- [string tolower $cmd] {
user-agent {
@@
-579,7
+579,8
@@
proc RobotTextPlain {url out} {
global URL
puts $out "<documentcontent>"
global URL
puts $out "<documentcontent>"
- puts $out $URL($url,buf)
+ regsub -all {<} $URL($url,buf) {\<} content
+ puts $out $content
puts $out "</documentcontent>"
if {![string compare $URL($url,path) /robots.txt]} {
puts $out "</documentcontent>"
if {![string compare $URL($url,path) /robots.txt]} {
@@
-616,7
+617,7
@@
proc Robot200 {url} {
}
}
text/plain {
}
}
text/plain {
- RobotTextPlain $url $out $outr
+ RobotTextPlain $url $out
}
application/pdf {
set pdff [open test.pdf w]
}
application/pdf {
set pdff [open test.pdf w]