#!/usr/bin/tclsh
-# $Id: robot.tcl,v 1.28 2001/11/13 11:17:26 adam Exp $
+# $Id: robot.tcl,v 1.29 2001/11/14 09:15:23 adam Exp $
#
proc RobotFileNext1 {area lead} {
# puts "RobotFileNext1 area=$area lead=$lead"
if {![string length $n]} {
set robotSeq -1
flush stdout
- puts "Round robin un,ba,vi=$status(unvisited),$status(bad),$status(visited)"
+ set statusfile [open status w]
+ puts $statusfile "$status(unvisited) $status(bad) $status(visited)"
+ close $statusfile
return wait
}
incr robotSeq
proc RobotFileExist {area host path} {
- # puts "RobotFileExist begin area=$area host=$host path=$path"
+ global debuglevel
+
+ if {$debuglevel > 3} {
+ puts "RobotFileExist begin area=$area host=$host path=$path"
+ }
set lpath [split $path /]
set l [llength $lpath]
incr l -1
set t [lindex $lpath $l]
incr l -1
set npath $area/$host[join [lrange $lpath 0 $l] /d]/f$t
- # puts "RobotFileExist end npath=$npath"
+ if {$debuglevel > 3} {
+ puts "RobotFileExist end npath=$npath"
+ }
return [file exists $npath]
}
set orgPwd [pwd]
global workdir
global status
+ global debuglevel
if {![info exists workdir]} {
return stdout
}
- #puts "RobotFileOpen orgPwd=$orgPwd area=$area host=$host path=$path mode=$mode"
+ if {$debuglevel > 3} {
+ puts "RobotFileOpen orgPwd=$orgPwd area=$area host=$host path=$path mode=$mode"
+ }
if {[string compare $orgPwd $workdir]} {
puts "ooops. RobotFileOpen failed"
puts "workdir = $workdir"
}
set d [lindex $comp $len]
if {[string length $d]} {
- if {[file isdirectory $d]} {
- set out [open $d/f $mode]
- } else {
- set out [open f$d $mode]
- }
+ set out [open f$d $mode]
+ if {0} {
+ if {[file isfile $d/f]} {
+ set out [open $d/f $mode]
+ } else {
+ set out [open f$d $mode]
+ }
+ }
} else {
set out [open f $mode]
}
}
if {[string first / $surl]} {
# relative path
- regexp {^([^\#?]*)} $URL($url,path) x dpart
+ set curpath $URL($url,path)
+ if {[info exists URL($url,bpath)]} {
+ set curpath $URL($url,bpath)
+ }
+ regexp {^([^\#?]*)} $curpath x dpart
set l [string last / $dpart]
if {[expr $l >= 0]} {
set surl [string range $dpart 0 $l]$surl
}
}
+proc link {url out href body distance} {
+ global URL maxdistance
+ if {[expr $distance > $maxdistance]} return
+
+ if {![RobotHref $url href host path]} return
+
+ puts $out "<cr>"
+ puts $out "<identifier>$href</identifier>"
+ puts $out "<description>$body</description>"
+ puts $out "</cr>"
+
+ if {![RobotFileExist visited $host $path]} {
+ set olddistance 1000
+ if {![RobotFileExist bad $host $path]} {
+ if {[RobotFileExist unvisited $host $path]} {
+ set inf [RobotFileOpen unvisited $host $path r]
+ RobotReadRecord $inf oldurl olddistance
+ RobotFileClose $inf
+ }
+ } else {
+ set olddistance 0
+ }
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ } elseif {[string compare $href $url]} {
+ set inf [RobotFileOpen visited $host $path r]
+ RobotReadRecord $inf xurl olddistance
+ close $inf
+ if {[string length $olddistance] == 0} {
+ set olddistance 1000
+ }
+ if {[expr $distance < $olddistance]} {
+ puts "OK remarking url=$url href=$href"
+ puts "olddistance = $olddistance"
+ puts "newdistance = $distance"
+ set outf [RobotFileOpen unvisited $host $path]
+ RobotWriteRecord $outf $url $distance
+ RobotFileClose $outf
+ }
+ }
+}
+
proc RobotTextHtml {url out} {
global URL maxdistance
set distance 0
+ set fdistance 0
if {$maxdistance < 1000 && [info exists URL($url,dist)]} {
- set distance [expr $URL($url,dist) + 1]
+ set fdistance $URL($url,dist)
+ set distance [expr $fdistance + 1]
}
htmlSwitch $URL($url,buf) \
title {
puts $out "<documentcontent>"
puts $out $nbody
puts $out "</documentcontent>"
+ } -nonest base {
+ if {![info exists parm(href)]} {
+ continue
+ }
+ set href [string trim $parm(href)]
+ if {![RobotHref $url href host path]} continue
+ set URL($url,bpath) $path
} -nonest a {
if {![info exists parm(href)]} {
- puts "no href"
continue
}
- if {[expr $distance <= $maxdistance]} {
- set href [string trim $parm(href)]
- if {![RobotHref $url href host path]} continue
-
- puts $out "<cr>"
- puts $out "<identifier>$href</identifier>"
- puts $out "<description>$body</description>"
- puts $out "</cr>"
-
- if {![RobotFileExist visited $host $path]} {
- set olddistance 1000
- if {![RobotFileExist bad $host $path]} {
- if {[RobotFileExist unvisited $host $path]} {
- set inf [RobotFileOpen unvisited $host $path r]
- RobotReadRecord $inf oldurl olddistance
- RobotFileClose $inf
- }
- } else {
- set olddistance 0
- }
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- } elseif {[string compare $href $url]} {
- set inf [RobotFileOpen visited $host $path r]
- RobotReadRecord $inf xurl olddistance
- close $inf
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- puts "OK remarking url=$url href=$href"
- puts "olddistance = $olddistance"
- puts "newdistance = $distance"
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- }
- }
+ link $url $out [string trim $parm(href)] $body $distance
} -nonest area {
if {![info exists parm(href)]} {
- puts "no href"
continue
}
- if {[expr $distance <= $maxdistance]} {
- set href [string trim $parm(href)]
- if {![RobotHref $url href host path]} continue
-
- puts $out "<cr>"
- puts $out "<identifier>$href</identifier>"
- puts $out "<description></description>"
- puts $out "</cr>"
-
- if {![RobotFileExist visited $host $path]} {
- set olddistance 1000
- if {![RobotFileExist bad $host $path]} {
- if {[RobotFileExist unvisited $host $path]} {
- set inf [RobotFileOpen unvisited $host $path r]
- RobotReadRecord $inf oldurl olddistance
- RobotFileClose $inf
- }
- } else {
- set olddistance 0
- }
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- } elseif {[string compare $href $url]} {
- set inf [RobotFileOpen visited $host $path r]
- RobotReadRecord $inf xurl olddistance
- close $inf
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- puts "OK remarking url=$url href=$href"
- puts "olddistance = $olddistance"
- puts "newdistance = $distance"
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- }
- }
+ link $url $out [string trim $parm(href)] $body $distance
} -nonest frame {
if {![info exists parm(src)]} {
- puts "no src"
continue
}
- if {[expr $distance <= $maxdistance]} {
- set href [string trim $parm(src)]
- if {![RobotHref $url href host path]} continue
-
- puts $out "<cr>"
- puts $out "<identifier>$href</identifier>"
- puts $out "<description></description>"
- puts $out "</cr>"
-
- if {![RobotFileExist visited $host $path]} {
- set olddistance 1000
- if {![RobotFileExist bad $host $path]} {
- if {[RobotFileExist unvisited $host $path]} {
- set inf [RobotFileOpen unvisited $host $path r]
- RobotReadRecord $inf oldurl olddistance
- RobotFileClose $inf
- }
- } else {
- set olddistance 0
- }
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- } elseif {[string compare $href $url]} {
- set inf [RobotFileOpen visited $host $path r]
- RobotReadRecord $inf xurl olddistance
- close $inf
- if {[string length $olddistance] == 0} {
- set olddistance 1000
- }
- if {[expr $distance < $olddistance]} {
- puts "OK remarking url=$url href=$href"
- puts "olddistance = $olddistance"
- puts "newdistance = $distance"
- set outf [RobotFileOpen unvisited $host $path]
- RobotWriteRecord $outf $url $distance
- RobotFileClose $outf
- }
- }
- }
+ link $url $out [string trim $parm(src)] $body $fdistance
}
}
vwait robotsRunning
}
-puts "End un,ba,vi=$status(unvisited),$status(bad),$status(visited)"
+set statusfile [open status w]
+puts $statusfile "$status(unvisited) $status(bad) $status(visited)"
+close $statusfile
+