2 # $Id: dcdot.tcl,v 1.3 2000/12/08 22:46:53 adam Exp $
11 proc RobotTextHtml {url} {
15 set e {<[mM][eE][tT][aA][^>]*>}
16 catch {unset $URL($url,meta)}
17 while {[regexp -indices $e $b i]} {
18 set meta [string range $b [lindex $i 0] [lindex $i 1]]
19 lappend URL($url,meta) $meta
20 set b [string range $b [lindex $i 1] end]
28 switch $URL($url,head,content-type) {
36 proc RobotReadContent {url sock} {
39 set buffer [read $sock 16384]
40 set readCount [string length $buffer]
42 if {$readCount <= 0} {
47 # puts "Got $readCount bytes"
48 set URL($url,buf) $URL($url,buf)$buffer
52 proc RobotReadHeader {url sock} {
55 set buffer [read $sock 2148]
56 set readCount [string length $buffer]
58 if {$readCount <= 0} {
62 # puts "Got $readCount bytes"
63 set URL($url,buf) $URL($url,buf)$buffer
65 set n [string first \n\n $URL($url,buf)]
69 set headbuf [string range $URL($url,buf) 0 $n]
72 set URL($url,buf) [string range $URL($url,buf) $n end]
74 regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
75 set lines [split $headbuf \n]
77 if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
78 set URL($url,head,[string tolower $name]) $value
81 set URL($url,state) skip
84 if {![info exists URL($url,head,content-type)]} {
85 set URL($url,head,content-type) {}
87 switch $URL($url,head,content-type) {
89 fileevent $sock readable [list RobotReadContent $url $sock]
92 fileevent $sock readable [list RobotReadContent $url $sock]
110 proc RobotConnect {url sock} {
113 fconfigure $sock -translation {auto crlf} -blocking 0
114 fileevent $sock readable [list RobotReadHeader $url $sock]
115 puts $sock "GET $URL($url,path) HTTP/1.0"
116 puts $sock "Host: $URL($url,host)"
117 puts $sock "User-Agent: $agent"
122 proc RobotGetUrl {url phost} {
124 if {![regexp {([^:]+)://([^/]+)([^ ]*)} $url x method hostport path]} {
127 if {![regexp {([^:]+):([0-9]+)} $hostport x host port]} {
131 set URL($url,method) $method
132 set URL($url,host) $host
133 set URL($url,port) $port
134 set URL($url,path) $path
135 set URL($url,state) head
137 if [catch {set sock [socket -async $host $port]}] {
140 RobotConnect $url $sock
145 set agent "dcdot.tcl/0.0"
146 if {![catch {set os [exec uname -s -r]}]} {
147 set agent "$agent ($os)"
150 proc RobotGetDCDOT {url} {
151 global robotMoreWork 1
154 if [RobotGetUrl $url {}] {
158 while {$robotMoreWork} {
164 set url [lindex $argv 0]
167 if {[info exist URL($url,meta)]} {
168 foreach m $URL($url,meta) {
172 foreach v [array names URL $url,head,*] {