2 # $Id: dcdot.tcl,v 1.2 2000/12/08 08:55:35 adam Exp $
11 proc RobotTextHtml {url} {
15 set e {<[mM][eE][tT][aA][^>]*>}
16 catch {unset $URL($url,meta)}
17 while {[regexp -indices $e $b i]} {
18 set meta [string range $b [lindex $i 0] [lindex $i 1]]
19 lappend URL($url,meta) $meta
20 set b [string range $b [lindex $i 1] end]
28 switch $URL($url,head,content-type) {
36 proc RobotReadContent {url sock} {
39 set buffer [read $sock 16384]
40 set readCount [string length $buffer]
42 if {$readCount <= 0} {
47 # puts "Got $readCount bytes"
48 set URL($url,buf) $URL($url,buf)$buffer
52 proc RobotReadHeader {url sock} {
55 set buffer [read $sock 2148]
56 set readCount [string length $buffer]
58 if {$readCount <= 0} {
62 # puts "Got $readCount bytes"
63 set URL($url,buf) $URL($url,buf)$buffer
65 set n [string first \n\n $URL($url,buf)]
69 set headbuf [string range $URL($url,buf) 0 $n]
72 set URL($url,buf) [string range $URL($url,buf) $n end]
74 regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
75 set lines [split $headbuf \n]
77 if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
78 set URL($url,head,[string tolower $name]) $value
81 set URL($url,state) skip
84 if {![info exists URL($url,head,content-type)]} {
85 set URL($url,head,content-type) {}
87 switch $URL($url,head,content-type) {
89 fileevent $sock readable [list RobotReadContent $url $sock]
92 fileevent $sock readable [list RobotReadContent $url $sock]
111 proc RobotConnect {url sock} {
114 fconfigure $sock -translation {auto crlf} -blocking 0
115 fileevent $sock readable [list RobotReadHeader $url $sock]
116 puts $sock "GET $URL($url,path) HTTP/1.0"
117 puts $sock "Host: $URL($url,host)"
118 puts $sock "User-Agent: $agent"
123 proc RobotGetUrl {url phost} {
125 if {![regexp {([^:]+)://([^/]+)([^ ]*)} $url x method hostport path]} {
128 if {![regexp {([^:]+):([0-9]+)} $hostport x host port]} {
132 set URL($url,method) $method
133 set URL($url,host) $host
134 set URL($url,port) $port
135 set URL($url,path) $path
136 set URL($url,state) head
138 if [catch {set sock [socket -async $host $port]}] {
141 RobotConnect $url $sock
146 set agent "dcdot.tcl/0.0"
147 if {![catch {set os [exec uname -s -r]}]} {
148 set agent "$agent ($os)"
151 proc RobotGetDCDOT {url} {
152 global robotMoreWork 1
155 if [RobotGetUrl $url {}] {
159 while {$robotMoreWork} {
165 set url [lindex $argv 0]
168 if {[info exist URL($url,meta)]} {
169 foreach m $URL($url,meta) {