2 # $Id: robot.tcl,v 1.1.1.1 1996/08/06 14:04:23 adam Exp $
4 proc RobotFileNext {area} {
5 if {[catch {set ns [glob $area/*]}]} {
8 set off [string first / $area]
11 if {[file isfile $n]} {
12 if {[string first :.html $n] > 0} {
13 return http://[string range $area/ $off end]
15 return http://[string range $n $off end]
17 if {[file isdirectory $n]} {
18 set sb [RobotFileNext $n]
19 if {[string length $sb]} {
27 proc RobotFileExist {area host path} {
28 set comp [split $area/$host$path /]
31 if {![string length [lindex $comp $l]]} {
32 set comp [split $area/$host$path:.html /]
34 return [file exists [join $comp /]]
37 proc RobotFileUnlink {area host path} {
38 set comp [split $area/$host$path /]
41 if {![string length [lindex $comp $l]]} {
42 set comp [split $area/$host$path:.html /]
44 if {[catch {exec rm [join $comp /]}]} return
46 for {set i $l} {$i > 0} {incr i -1} {
47 set path [join [lrange $comp 0 $i] /]
48 if {![catch {glob $path/*}]} return
53 proc RobotFileOpen {area host path} {
56 set comp [split $area/$host$path /]
57 set len [llength $comp]
59 for {set i 0} {$i < $len} {incr i} {
60 set d [lindex $comp $i]
61 if {[catch {cd ./$d}]} {
66 set d [lindex $comp $len]
67 if {[string length $d]} {
70 set out [open :.html w]
76 proc RobotRestart {} {
80 set url [RobotFileNext unvisited]
81 if {![string length $url]} break
82 set r [RobotGetUrl $url {}]
86 RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
92 proc headSave {url out title} {
96 puts $out "<ti> $title"
97 if {[info exists URL($url,head,Last-modified)]} {
98 puts $out "<dm> $URL($url,head,Last-modified)"
101 if {[info exists URL($url,head,Date)]} {
102 puts $out " <lc> $URL($url,head,Date)"
104 if {[info exists URL($url,head,Content-length)]} {
105 puts $out " <by> $URL($url,head,Content-length)"
107 if {[info exists URL($url,head,Server)]} {
108 puts $out " <srvr> $URL($url,head,Server)"
112 puts $out " <avli> $url"
113 if {[info exists URL($url,head,Content-type)]} {
114 puts $out " <ty> $URL($url,head,Content-type)"
119 proc RobotSave {url} {
122 set out [RobotFileOpen visited $URL($url,host) $URL($url,path)]
124 if {[info exists URL($url,line)]} {
125 set htmlContent [join $URL($url,line)]
127 htmlSwitch $htmlContent \
130 headSave $url $out $body
134 if {![info exists parm(href)]} continue
136 headSave $url $out "untitled"
140 if {[regexp {^\#} $parm(href)]} {
142 } elseif {[regexp {^([^:]+):([^#]+)} $parm(href) x method hpath]} {
143 if {![string compare $method http]} {
144 if {![regexp {^//([^/]+)(.*)} $hpath x host path]} {
145 set host $URL($url,host)
148 if {![regexp {\.dk$} $host]} continue
152 } elseif {[regexp {^([/~][^#]*)} $parm(href) x path]} {
153 set host $URL($url,host)
156 puts " href=$parm(href)"
157 set ext [file extension $URL($url,path)]
158 if {[string compare $ext {}]} {
159 set dpart [file dirname $URL($url,path)]
161 set dpart $URL($url,path)
163 regexp {^([^#]+)} $parm(href) x path
164 set host $URL($url,host)
165 set path [string trimright $dpart /]/$path
168 set ext [file extension $path]
169 if {![string length $ext]} {
170 set path [string trimright $path /]/
172 set path [string trimright $path /]
174 set c [split $path /]
177 set path [lindex $c $i]
180 switch -- [lindex $c $i] {
188 set path [lindex $c $i]/$path
193 set href "$method://$host$path"
196 puts $out "<li> $href"
197 puts $out "<cp> $body"
200 if {![regexp {/.*bin/} $href)]} {
201 if {![RobotFileExist visited $host $path]} {
202 set outf [RobotFileOpen unvisited $host $path]
209 headSave $url $out "untitled"
214 RobotFileUnlink unvisited $URL($url,host) $URL($url,path)
217 proc RobotRead {url sock} {
220 set readCount [gets $sock line]
221 if {$readCount < 0} {
227 } elseif {$readCount > 0} {
228 switch $URL($url,state) {
231 if {[regexp {([^:]+):[ ]+(.*)} $line x name value]} {
232 set URL($url,head,$name) $value
236 lappend URL($url,line) $line
246 set URL($url,state) skip
247 if {[info exists URL($url,head,Content-type)]} {
248 if {![string compare $URL($url,head,Content-type) text/html]} {
249 set URL($url,state) html
255 proc RobotConnect {url sock} {
258 fileevent $sock readable [list RobotRead $url $sock]
259 puts $sock "GET $URL($url,path) HTTP/1.0"
268 proc RobotGetUrl {url phost} {
273 if {[regexp {([^:]+)://([^/]+)([^ ?]*)} $url x method host path]} {
274 puts "method=$method host=$host path=$path"
278 set URL($url,method) $method
279 set URL($url,host) $host
280 set URL($url,port) $port
281 set URL($url,path) $path
282 set URL($url,state) head
283 if [catch {set sock [socket -async $host $port]}] {
286 fconfigure $sock -translation {auto crlf}
287 RobotConnect $url $sock
292 #RobotGetUrl http://www.dtv.dk/ {}