4 puts {charconv.tcl: [-p prefix] [-s split] [-o ofile] file ... }
8 proc preamble_trie {ofilehandle ifiles ofile} {
11 set totype {unsigned }
13 puts $f "/** \\file $ofile"
14 puts $f " \\brief Character conversion, generated from [lindex $ifiles 0]"
16 puts $f " Generated automatically by charconv.tcl"
18 puts $f "\#include <string.h>"
20 struct yaz_iconv_trie_flat {
22 unsigned combining : 1;
25 struct yaz_iconv_trie_dir {
27 unsigned combining : 1;
31 struct yaz_iconv_trie {
32 struct yaz_iconv_trie_flat *flat;
33 struct yaz_iconv_trie_dir *dir;
37 static unsigned long lookup(struct yaz_iconv_trie **ptrs, int ptr, unsigned char *inp,
38 size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
40 struct yaz_iconv_trie *t = ptrs[ptr-1];
45 size_t ch = (inp[0] & mask) + boffset;
49 code = lookup(ptrs, t->dir[ch].ptr, inp+1, inbytesleft-1, no_read, combining, mask, boffset);
59 *combining = t->dir[ch].combining;
66 struct yaz_iconv_trie_flat *flat = t->flat;
69 size_t len = strlen(flat->from);
70 if (len <= inbytesleft)
73 for (i = 0; i < len; i++)
75 if (((unsigned char *) flat->from)[i] != (inp[i] & mask) + boffset)
81 *combining = flat->combining;
96 foreach x [array names trie] {
107 proc ins_trie {from to combining codename} {
109 if {![info exists trie(no)]} {
114 if {$trie(max) < $to} {
118 ins_trie_r [split $from] $to $combining $codename 0
121 proc split_trie {this} {
123 set trie($this,type) d
124 foreach e $trie($this,content) {
125 set from [lindex $e 0]
127 set combining [lindex $e 2]
128 set codename [lindex $e 3]
130 set ch [lindex $from 0]
131 set rest [lrange $from 1 end]
133 if {[llength $rest]} {
134 if {![info exist trie($this,ptr,$ch)]} {
135 set trie($this,ptr,$ch) $trie(no)
138 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
140 set trie($this,to,$ch) $to
141 set trie($this,combining,$ch) $combining
142 set trie($this,codename,$ch) $codename
145 set trie($this,content) missing
148 proc ins_trie_r {from to combining codename this} {
151 if {![info exist trie($this,type)]} {
152 set trie($this,type) f
154 if {$trie($this,type) == "f"} {
156 if {[info exists trie($this,content)]} {
157 foreach e $trie($this,content) {
158 set efrom [lindex $e 0]
159 if { $efrom == $from } {
165 lappend trie($this,content) [list $from $to $combining $codename]
169 if {[llength $trie($this,content)] > $trie(split)} {
171 return [ins_trie_r $from $to $combining $codename $this]
174 set ch [lindex $from 0]
175 set rest [lrange $from 1 end]
177 if {[llength $rest]} {
178 if {![info exist trie($this,ptr,$ch)]} {
179 set trie($this,ptr,$ch) $trie(no)
182 ins_trie_r $rest $to $combining $codename $trie($this,ptr,$ch)
184 if {![info exist trie($this,to,$ch)]} {
185 set trie($this,to,$ch) $to
186 set trie($this,combining,$ch) $combining
187 set trie($this,codename,$ch) $codename
193 proc dump_trie {ofilehandle} {
198 puts $f "/* TRIE: size $trie(size) */"
201 while { [incr this -1] >= 0 } {
202 puts $f "/* PAGE $this */"
203 if {$trie($this,type) == "f"} {
204 puts $f "struct yaz_iconv_trie_flat $trie(prefix)page${this}_flat\[\] = \{"
205 foreach m $trie($this,content) {
206 puts -nonewline $f " \{\""
207 foreach d [lindex $m 0] {
208 puts -nonewline $f "\\x$d"
210 puts -nonewline $f "\", [lindex $m 2], 0x[lindex $m 1]"
212 puts $f "\}, /* $v */"
214 puts $f " \{\"\", 0\}"
216 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
217 puts $f " $trie(prefix)page${this}_flat, 0"
220 puts $f "struct yaz_iconv_trie_dir $trie(prefix)page${this}_dir\[256\] = \{"
221 for {set i 0} {$i < 256} {incr i} {
222 puts -nonewline $f " \{"
223 set ch [format %02X $i]
225 if {[info exist trie($this,ptr,$ch)]} {
226 puts -nonewline $f "[expr $trie($this,ptr,$ch)+1], "
229 puts -nonewline $f "0, "
231 if {[info exist trie($this,combining,$ch)]} {
232 puts -nonewline $f "$trie($this,combining,$ch), "
234 puts -nonewline $f "0, "
236 if {[info exist trie($this,to,$ch)]} {
237 puts -nonewline $f "0x$trie($this,to,$ch)\}"
240 puts -nonewline $f "0\}"
242 if {[info exist trie($this,codename,$ch)]} {
243 set v $trie($this,codename,$ch)
244 puts -nonewline $f " /* $v */"
253 puts $f "struct yaz_iconv_trie $trie(prefix)page${this} = \{"
254 puts $f " 0, $trie(prefix)page${this}_dir"
259 puts $f "struct yaz_iconv_trie *$trie(prefix)ptrs \[\] = {"
260 for {set this 0} {$this < $trie(no)} {incr this} {
261 puts $f " &$trie(prefix)page$this,"
266 puts $f "unsigned long yaz_$trie(prefix)_conv
267 (unsigned char *inp, size_t inbytesleft, size_t *no_read, int *combining, unsigned mask, int boffset)
271 code = lookup($trie(prefix)ptrs, 1, inp, inbytesleft, no_read, combining, mask, boffset);
281 proc readfile {fname ofilehandle prefix omits reverse} {
290 set f [open $fname r]
297 set cnt [gets $f line]
301 if {[regexp {</characterSet>} $line s]} {
302 dump_trie $ofilehandle
303 } elseif {[regexp {<characterSet .*ISOcode="([0-9A-Fa-f]+)"} $line s tablenumber]} {
305 set trie(prefix) "${prefix}_$tablenumber"
307 } elseif {[regexp {</code>} $line s]} {
308 if {[string length $ucs]} {
310 for {set i 0} {$i < [string length $utf]} {incr i 2} {
311 lappend hex [string range $utf $i [expr $i+1]]
313 # puts "ins_trie $hex $marc
314 ins_trie $hex $marc $combining $codename
318 for {set i 0} {$i < [string length $marc]} {incr i 2} {
319 lappend hex [string range $marc $i [expr $i+1]]
321 # puts "ins_trie $hex $ucs"
322 ins_trie $hex $ucs $combining $codename
326 if {$reverse && [string length $marc]} {
327 for {set i 0} {$i < [string length $altutf]} {incr i 2} {
328 lappend hex [string range $altutf $i [expr $i+1]]
330 if {[info exists hex]} {
331 ins_trie $hex $marc $combining $codename
340 } elseif {[regexp {<marc>([0-9A-Fa-f]*)</marc>} $line s marc]} {
342 } elseif {[regexp {<name>(.*)</name>} $line s codename]} {
344 } elseif {[regexp {<name>(.*)} $line s codename]} {
347 set cnt [gets $f line]
351 if {[regexp {(.*)</name>} $line s codename_ex]} {
352 set codename "${codename} ${codename_ex}"
354 } elseif {[regexp {<isCombining>true</isCombining>} $line s]} {
356 } elseif {[regexp {<ucs>([0-9A-Fa-f]*)</ucs>} $line s ucs]} {
358 } elseif {[regexp {<utf-8>([0-9A-Fa-f]*)</utf-8>} $line s utf]} {
360 } elseif {[regexp {<altutf-8>([0-9A-Fa-f]*)</altutf-8>} $line s altutf]} {
373 set l [llength $argv]
377 set arg [lindex $argv $i]
378 switch -glob -- $arg {
383 if {[string length $arg]} {
384 set arg [lindex $argv [incr i]]
389 if {[string length $arg]} {
390 set arg [lindex $argv [incr i]]
395 if {[string length $arg]} {
396 set arg [lindex $argv [incr i]]
401 if {[string length $arg]} {
402 set arg [lindex $argv [incr i]]
415 if {![info exists ifiles]} {
416 puts "charconv.tcl: missing input file(s)"
420 set ofilehandle [open ${ofile}.tmp w]
421 preamble_trie $ofilehandle $ifiles $ofile
423 foreach ifile $ifiles {
424 readfile $ifile $ofilehandle $prefix $omits $reverse_map
428 file rename -force ${ofile}.tmp ${ofile}