2 # $Id: dcdot.tcl,v 1.4 2000/12/11 17:11:03 adam Exp $
11 proc RobotTextHtml {url} {
15 set e {<[mM][eE][tT][aA][^>]*>}
16 catch {unset $URL($url,meta)}
17 while {[regexp -indices $e $b i]} {
18 set meta [string range $b [lindex $i 0] [lindex $i 1]]
19 lappend URL($url,meta) $meta
20 set b [string range $b [lindex $i 1] end]
28 switch $URL($url,head,content-type) {
36 proc RobotReadContent {url sock} {
39 set buffer [read $sock 16384]
40 set readCount [string length $buffer]
42 if {$readCount <= 0} {
47 # puts "Got $readCount bytes"
48 set URL($url,buf) $URL($url,buf)$buffer
52 proc RobotReadHeader {url sock} {
55 set buffer [read $sock 2148]
56 set readCount [string length $buffer]
58 if {$readCount <= 0} {
62 # puts "Got $readCount bytes"
63 set URL($url,buf) $URL($url,buf)$buffer
65 set n [string first \r\n\r\n $URL($url,buf)]
67 puts "string first match n = $n"
70 set headbuf [string range $URL($url,buf) 0 $n]
72 set URL($url,buf) [string range $URL($url,buf) $n end]
74 regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
75 set lines [split $headbuf \n]
77 if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
78 set URL($url,head,[string tolower $name]) [string trim $value]
81 set URL($url,state) skip
84 if {![info exists URL($url,head,content-type)]} {
85 set URL($url,head,content-type) {}
87 switch $URL($url,head,content-type) {
89 fileevent $sock readable [list RobotReadContent $url $sock]
92 fileevent $sock readable [list RobotReadContent $url $sock]
95 puts "ok preceeed with this thingy"
96 fileevent $sock readable [list RobotReadContent $url $sock]
114 proc RobotConnect {url sock} {
117 fconfigure $sock -translation {lf crlf} -blocking 0
118 fileevent $sock readable [list RobotReadHeader $url $sock]
119 puts $sock "GET $URL($url,path) HTTP/1.0"
120 puts $sock "Host: $URL($url,host)"
121 puts $sock "User-Agent: $agent"
126 proc RobotGetUrl {url phost} {
128 if {![regexp {([^:]+)://([^/]+)([^ ]*)} $url x method hostport path]} {
131 if {![regexp {([^:]+):([0-9]+)} $hostport x host port]} {
135 set URL($url,method) $method
136 set URL($url,host) $host
137 set URL($url,port) $port
138 set URL($url,path) $path
139 set URL($url,state) head
141 if [catch {set sock [socket -async $host $port]}] {
144 RobotConnect $url $sock
149 set agent "dcdot.tcl/0.0"
150 if {![catch {set os [exec uname -s -r]}]} {
151 set agent "$agent ($os)"
154 proc RobotGetDCDOT {url} {
155 global robotMoreWork 1
158 if [RobotGetUrl $url {}] {
162 while {$robotMoreWork} {
168 set url [lindex $argv 0]
171 if {[info exist URL($url,meta)]} {
172 foreach m $URL($url,meta) {
176 foreach v [array names URL $url,head,*] {
179 puts "Buffer length is [string length $URL($url,buf)]"
180 set f [open out.pdf w]
181 puts -nonewline $f $URL($url,buf)