#!/usr/bin/tclsh
-# $Id: dcdot.tcl,v 1.1 2000/12/07 20:16:11 adam Exp $
+# $Id: dcdot.tcl,v 1.4 2000/12/11 17:11:03 adam Exp $
#
proc RobotRestart {} {
proc RobotTextHtml {url} {
global URL
- set head 0
- htmlSwitch $URL($url,buf) \
- title {
- set URL($url,title) $body
- } -nonest meta {
- set scheme {}
- if {[info exist parm(scheme)]} {
- set scheme $parm(scheme)
- unset parm(scheme)
- }
- if {[info exist parm(name)]} {
- if {[info exist parm(content)]} {
- set URL($url,meta,$parm(name),$scheme) $parm(content)
- unset parm(content)
- }
- unset parm(name)
- }
- } a {
- if {[info exists parm(href)]} {
- lappend URL($url,links) $parm(href)
- }
- }
+ set b $URL($url,buf)
+ set e {<[mM][eE][tT][aA][^>]*>}
+ catch {unset $URL($url,meta)}
+ while {[regexp -indices $e $b i]} {
+ set meta [string range $b [lindex $i 0] [lindex $i 1]]
+ lappend URL($url,meta) $meta
+ set b [string range $b [lindex $i 1] end]
+ }
}
proc Robot200 {url} {
# puts "Got $readCount bytes"
set URL($url,buf) $URL($url,buf)$buffer
- set n [string first \n\n $URL($url,buf)]
+ set n [string first \r\n\r\n $URL($url,buf)]
if {$n > 1} {
+ puts "string first match n = $n"
set code 0
set version {}
set headbuf [string range $URL($url,buf) 0 $n]
- incr n
- incr n
+ incr n 4
set URL($url,buf) [string range $URL($url,buf) $n end]
regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code
set lines [split $headbuf \n]
foreach line $lines {
if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} {
- set URL($url,head,[string tolower $name]) $value
+ set URL($url,head,[string tolower $name]) [string trim $value]
}
}
set URL($url,state) skip
text/plain {
fileevent $sock readable [list RobotReadContent $url $sock]
}
+ application/pdf {
+ puts "ok preceeed with this thingy"
+ fileevent $sock readable [list RobotReadContent $url $sock]
+ }
default {
close $sock
Robot200 $url
}
}
default {
- Robot404 $url
close $sock
RobotRestart
}
proc RobotConnect {url sock} {
global URL agent
- fconfigure $sock -translation {auto crlf} -blocking 0
+ fconfigure $sock -translation {lf crlf} -blocking 0
fileevent $sock readable [list RobotReadHeader $url $sock]
puts $sock "GET $URL($url,path) HTTP/1.0"
puts $sock "Host: $URL($url,host)"
return 0
}
-if {![llength [info commands htmlSwitch]]} {
- set e [info sharedlibextension]
- if {[catch {load ./tclrobot$e}]} {
- load tclrobot$e
- }
-}
-
-set agent "zmbot/0.0"
+set agent "dcdot.tcl/0.0"
if {![catch {set os [exec uname -s -r]}]} {
set agent "$agent ($os)"
}
if {$argc == 1} {
set url [lindex $argv 0]
RobotGetDCDOT $url
- set mask {,meta,[Dd][Cc]\.*}
- foreach a [array names URL $url$mask] {
- puts "URL($a) = $URL($a)"
+ set mask {,meta}
+ if {[info exist URL($url,meta)]} {
+ foreach m $URL($url,meta) {
+ puts $m
+ }
+ }
+ foreach v [array names URL $url,head,*] {
+ puts "$v = $URL($v)"
}
-}
\ No newline at end of file
+ puts "Buffer length is [string length $URL($url,buf)]"
+ set f [open out.pdf w]
+ puts -nonewline $f $URL($url,buf)
+ close $f
+}