projects
/
tclrobot.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
6374990
)
Minor changes.
author
Adam Dickmeiss
<adam@indexdata.dk>
Thu, 15 Oct 1998 13:27:19 +0000
(13:27 +0000)
committer
Adam Dickmeiss
<adam@indexdata.dk>
Thu, 15 Oct 1998 13:27:19 +0000
(13:27 +0000)
robot.tcl
patch
|
blob
|
history
diff --git
a/robot.tcl
b/robot.tcl
index
c942418
..
8c46a63
100755
(executable)
--- a/
robot.tcl
+++ b/
robot.tcl
@@
-1,5
+1,5
@@
#!/usr/bin/tclsh
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.2 1998/10/15 12:31:03 adam Exp $
+# $Id: robot.tcl,v 1.3 1998/10/15 13:27:19 adam Exp $
#
proc RobotFileNext {area} {
if {[catch {set ns [glob ${area}/*]}]} {
#
proc RobotFileNext {area} {
if {[catch {set ns [glob ${area}/*]}]} {
@@
-118,6
+118,7
@@
proc headSave {url out title} {
proc RobotSave {url} {
global URL
proc RobotSave {url} {
global URL
+ global domains
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
@@
-149,15
+150,20
@@
proc RobotSave {url} {
if {[regexp {^\#} $parm(href)]} {
continue
} elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} {
if {[regexp {^\#} $parm(href)]} {
continue
} elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} {
+ set ok 0
if {![string compare $method http]} {
if {![regexp {^//([^/]+)(.*)} $hpath x host path]} {
set host $URL($url,host)
set path $hpath
}
if {![string compare $method http]} {
if {![regexp {^//([^/]+)(.*)} $hpath x host path]} {
set host $URL($url,host)
set path $hpath
}
- if {![regexp {\.indexdata\.dk$} $host]} continue
- } else {
- continue
+ foreach domain $domains {
+ if {[string match $domain $host]} {
+ set ok 1
+ break
+ }
+ }
}
}
+ if {!$ok} continue
} elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} {
set host $URL($url,host)
set method http
} elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} {
set host $URL($url,host)
set method http
@@
-278,7
+284,7
@@
proc RobotGetUrl {url phost} {
set port 80
puts "---------"
puts $url
set port 80
puts "---------"
puts $url
- if {[regexp {([^:]+)://([^/]+)([^ ?]*)} $url x method host path]} {
+ if {[regexp {([^:]+)://([^/]+)([^ ]*)} $url x method host path]} {
puts "method=$method host=$host path=$path"
} else {
return -1
puts "method=$method host=$host path=$path"
} else {
return -1
@@
-304,14
+310,17
@@
if {![llength [info commands htmlSwitch]]} {
}
}
}
}
-if {![llength $argv]} {
- puts "Tclrobot: specify one or more sites."
+if {[llength $argv] < 2} {
+ puts "Tclrobot: usage <domain> <start>"
exit 1
}
exit 1
}
-foreach site $argv {
+set domains [lindex $argv 0]
+set site [lindex $argv 1]
+if {[string length $site]} {
set x [RobotFileOpen unvisited $site /]
close $x
}
set x [RobotFileOpen unvisited $site /]
close $x
}
+
RobotRestart
vwait forever
RobotRestart
vwait forever