projects
/
tclrobot.git
/ commitdiff
commit
grep
author
committer
pickaxe
?
search:
re
summary
|
shortlog
|
log
|
commit
| commitdiff |
tree
raw
|
patch
|
inline
| side by side (parent:
4e9e333
)
Changed tags for the output.
author
Per M. Hansen
<perhans@indexdata.dk>
Thu, 4 Feb 1999 20:37:25 +0000
(20:37 +0000)
committer
Per M. Hansen
<perhans@indexdata.dk>
Thu, 4 Feb 1999 20:37:25 +0000
(20:37 +0000)
robot.tcl
patch
|
blob
|
history
diff --git
a/robot.tcl
b/robot.tcl
index
8c46a63
..
c539a04
100755
(executable)
--- a/
robot.tcl
+++ b/
robot.tcl
@@
-1,5
+1,5
@@
#!/usr/bin/tclsh
#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.3 1998/10/15 13:27:19 adam Exp $
+# $Id: robot.tcl,v 1.4 1999/02/04 20:37:25 perhans Exp $
#
proc RobotFileNext {area} {
if {[catch {set ns [glob ${area}/*]}]} {
#
proc RobotFileNext {area} {
if {[catch {set ns [glob ${area}/*]}]} {
@@
-92,33
+92,32
@@
proc RobotRestart {} {
proc headSave {url out title} {
global URL
proc headSave {url out title} {
global URL
- puts $out {<nwi>}
- puts $out "<ti> $title"
- if {[info exists URL($url,head,Last-modified)]} {
- puts $out "<dm> $URL($url,head,Last-modified)"
+ puts $out {<meta>}
+ puts $out "<title>$title</title>"
+ if {[info exists URL($url,head,last-modified)]} {
+ puts $out "<lastmodified>$URL($url,head,last-modified)</lastmodified>"
}
puts $out {<si>}
}
puts $out {<si>}
- if {[info exists URL($url,head,Date)]} {
- puts $out " <lc> $URL($url,head,Date)"
+ if {[info exists URL($url,head,date)]} {
+ puts $out " <date>$URL($url,head,date)</date>"
}
}
- if {[info exists URL($url,head,Content-length)]} {
- puts $out " <by> $URL($url,head,Content-length)"
+ if {[info exists URL($url,head,content-length)]} {
+ puts $out " <by>$URL($url,head,content-length)</by>"
}
}
- if {[info exists URL($url,head,Server)]} {
- puts $out " <srvr> $URL($url,head,Server)"
+ if {[info exists URL($url,head,server)]} {
+ puts $out " <format>$URL($url,head,server)</format>"
}
puts $out {</si>}
}
puts $out {</si>}
- puts $out {<av>}
- puts $out " <avli> $url"
- if {[info exists URL($url,head,Content-type)]} {
- puts $out " <ty> $URL($url,head,Content-type)"
+ puts $out {<publisher>}
+ puts $out " <identifier>$url</identifier>"
+ if {[info exists URL($url,head,content-type)]} {
+ puts $out " <type>$URL($url,head,content-type)</type>"
}
}
- puts $out {</av>}
+ puts $out {</publisher>}
}
proc RobotSave {url} {
}
proc RobotSave {url} {
- global URL
- global domains
+ global URL domains
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
set ti 0
@@
-134,9
+133,9
@@
proc RobotSave {url} {
} body {
regsub -all -nocase {<script.*</script>} $body {} abody
regsub -all {<[^\>]+>} $abody {} nbody
} body {
regsub -all -nocase {<script.*</script>} $body {} abody
regsub -all {<[^\>]+>} $abody {} nbody
- puts $out "<body>"
+ puts $out "<documentcontent>"
puts $out $nbody
puts $out $nbody
- puts $out "</body>"
+ puts $out "</documentcontent>"
} a {
if {![info exists parm(href)]} {
puts "no href"
} a {
if {![info exists parm(href)]} {
puts "no href"
@@
-191,24
+190,24
@@
proc RobotSave {url} {
set path [lindex $c $i]
incr i -1
while {$i >= 0} {
set path [lindex $c $i]
incr i -1
while {$i >= 0} {
- switch -- [lindex $c $i] {
- .. {
- incr i -2
- }
- . {
- incr i -1
- }
- default {
- set path [lindex $c $i]/$path
- incr i -1
- }
- }
- }
- set href "$method://$host$path"
+ switch -- [lindex $c $i] {
+ .. {
+ incr i -2
+ }
+ . {
+ incr i -1
+ }
+ default {
+ set path [lindex $c $i]/$path
+ incr i -1
+ }
+ }
+ }
+ set href "$method://$host$path"
- puts $out "<cr>"
- puts $out "<li> $href"
- puts $out "<cp> $body"
+ puts $out "<cr>"
+ puts $out "<identifier>$href</identifier>"
+ puts $out "<description>$body</description>"
puts $out "</cr>"
if {![regexp {/.*bin/} $href)]} {
puts $out "</cr>"
if {![regexp {/.*bin/} $href)]} {
@@
-223,7
+222,7
@@
proc RobotSave {url} {
headSave $url $out "untitled"
set ti 1
}
headSave $url $out "untitled"
set ti 1
}
- puts $out "</nwi>"
+ puts $out "</meta>"
close $out
RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
}
close $out
RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
}
@@
-243,12
+242,11
@@
proc RobotRead {url sock} {
head {
puts "head: $line"
if {[regexp {([^:]+):[ ]+(.*)} $line x name value]} {
head {
puts "head: $line"
if {[regexp {([^:]+):[ ]+(.*)} $line x name value]} {
- set URL($url,head,$name) $value
+ set URL($url,head,[string tolower $name]) $value
}
}
html {
lappend URL($url,line) $line
}
}
html {
lappend URL($url,line) $line
-# puts "body: $line"
}
skip {
close $sock
}
skip {
close $sock
@@
-258,8
+256,8
@@
proc RobotRead {url sock} {
}
} else {
set URL($url,state) html
}
} else {
set URL($url,state) html
- if {[info exists URL($url,head,Content-type)]} {
- if {![string compare $URL($url,head,Content-type) text/html]} {
+ if {[info exists URL($url,head,content-type)]} {
+ if {![string compare $URL($url,head,content-type) text/html]} {
set URL($url,state) html
}
}
set URL($url,state) html
}
}
@@
-306,12
+304,13
@@
proc RobotGetUrl {url phost} {
if {![llength [info commands htmlSwitch]]} {
set e [info sharedlibextension]
if {[catch {load ./tclrobot$e}]} {
if {![llength [info commands htmlSwitch]]} {
set e [info sharedlibextension]
if {[catch {load ./tclrobot$e}]} {
- load tclrobot$e
+ load tclrobot$e
}
}
if {[llength $argv] < 2} {
puts "Tclrobot: usage <domain> <start>"
}
}
if {[llength $argv] < 2} {
puts "Tclrobot: usage <domain> <start>"
+ puts " Example: '*.dk' www.indexdata.dk"
exit 1
}
set domains [lindex $argv 0]
exit 1
}
set domains [lindex $argv 0]
@@
-323,4
+322,3
@@
if {[string length $site]} {
RobotRestart
vwait forever
RobotRestart
vwait forever
-