From: Adam Dickmeiss Date: Mon, 13 Jan 2003 13:59:07 +0000 (+0000) Subject: Fix check for content-type X-Git-Tag: ZMBOT.0.1 X-Git-Url: http://lists.indexdata.dk/?a=commitdiff_plain;h=9469b1906003ce135067f87adb00647760a4de19;p=tclrobot.git Fix check for content-type --- diff --git a/dcdot.tcl b/dcdot.tcl index 3b2edcb..8e15fea 100755 --- a/dcdot.tcl +++ b/dcdot.tcl @@ -1,5 +1,5 @@ #!/usr/bin/tclsh -# $Id: dcdot.tcl,v 1.4 2000/12/11 17:11:03 adam Exp $ +# $Id: dcdot.tcl,v 1.5 2003/01/13 13:59:07 adam Exp $ # proc RobotRestart {} { @@ -10,15 +10,23 @@ proc RobotRestart {} { proc RobotTextHtml {url} { global URL - + set b $URL($url,buf) - set e {<[mM][eE][tT][aA][^>]*>} + set e {]*>} catch {unset $URL($url,meta)} - while {[regexp -indices $e $b i]} { + while {[regexp -nocase -indices $e $b i]} { set meta [string range $b [lindex $i 0] [lindex $i 1]] lappend URL($url,meta) $meta set b [string range $b [lindex $i 1] end] } + set b $URL($url,buf) + set e {[^>]*>} + catch {unset $URL($url,meta)} + while {[regexp -nocase -indices $e $b i]} { + set title [string range $b [lindex $i 0] [lindex $i 1]] + lappend URL($url,title) $title + set b [string range $b [lindex $i 1] end] + } } proc Robot200 {url} { @@ -74,11 +82,12 @@ proc RobotReadHeader {url sock} { regexp {^HTTP/([0-9.]+)[ ]+([0-9]+)} $headbuf x version code set lines [split $headbuf \n] foreach line $lines { - if {[regexp {^([^:]+):[ ]+(.*)} $line x name value]} { + if {[regexp {^([^:]+):[ ]+([^;]*)} $line x name value]} { set URL($url,head,[string tolower $name]) [string trim $value] } } set URL($url,state) skip + puts "code=$code" switch $code { 200 { if {![info exists URL($url,head,content-type)]} { @@ -173,6 +182,11 @@ if {$argc == 1} { puts $m } } + if {[info exist URL($url,title)]} { + foreach m $URL($url,title) { + puts $m + } + } foreach v [array names URL $url,head,*] { puts "$v = $URL($v)" }