projects
/
tclrobot.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
d3b424b
)
*** empty log message ***
author
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 25 Mar 2002 16:11:08 +0000
(16:11 +0000)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Mon, 25 Mar 2002 16:11:08 +0000
(16:11 +0000)
robot.tcl
patch
|
blob
|
history
diff --git
a/robot.tcl
b/robot.tcl
index
56e33a4
..
bfe875f
100755
(executable)
--- a/
robot.tcl
+++ b/
robot.tcl
@@
-1,5
+1,5
@@
#!/usr/bin/tclsh
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.31 2002/02/28 14:04:11 adam Exp $
+# $Id: robot.tcl,v 1.32 2002/03/25 16:11:08 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
@@
-288,12
+288,12
@@
proc RobotHref {url hrefx hostx pathx} {
if {[string length $href] > 256} {
return 0
}
if {[string length $href] > 256} {
return 0
}
- if {[string first {?} $href] >= 0} {
- return 0
- }
- if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
- return 0
- }
+# if {[string first {?} $href] >= 0} {
+# return 0
+# }
+# if {[string first {?} $url] >= 0 && [string first {?} $href] >= 0} {
+# return 0
+# }
# get method (if any)
if {![regexp {^([^/:]+):(.*)} $href x method hpath]} {
set hpath $href
# get method (if any)
if {![regexp {^([^/:]+):(.*)} $href x method hpath]} {
set hpath $href
@@
-548,7
+548,7
@@
proc RobotTextHtml {url out} {
# don't print title of document content if noindex is used
if {!$noindex} {
puts $out "<title>$title</title>"
# don't print title of document content if noindex is used
if {!$noindex} {
puts $out "<title>$title</title>"
- regsub -all {<!--[^-]*->} $body { } abody
+ regsub -all {<!--[^-]*-->} $body { } abody
regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
regsub -all {<[^\>]+>} $bbody {} nbody
puts $out "<documentcontent>"
regsub -all -nocase {<script[^<]*</script>} $abody {} bbody
regsub -all {<[^\>]+>} $bbody {} nbody
puts $out "<documentcontent>"