4 # Copyright (c) 2002, DecisionSoft Limited All rights reserved.
6 # http://software.decisionsoft.com/license.html
7 # for more information.
12 # xmlpp: XML pretty printing
15 # For custom attribute sorting create an attributeOrdering.txt file that
16 # lists each attributes separated by a newline in the order you would like
17 # them to be sorted separated by a newline. Then use the -s option.
23 use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n $opt_l);
32 if (!getopts('nzhHsteScl:') or $opt_h) {
36 my $indentSize = $opt_l || 2;
40 # expect to find attributeOrdering.txt file in same directory
41 # as xmlpp is being run from
44 if ($scriptDir =~ m#/#){
45 $scriptDir =~ s#/[^/]+$##;
51 # get attribute ordering from external file
52 if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) {
53 @sortlist = <SORTLIST>;
56 @specialSort = grep(/^\w+/, @sortlist);
59 print STDERR "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n";
64 # set line separator to ">" speeding up parsing of XML files
70 my $sortAttributes = $opt_s;
71 my $newLineComments = $opt_c;
72 my $splitAttributes = $opt_t;
73 my $schemaHackMode = $opt_S;
74 my $normaliseWhiteSpace = $opt_n;
76 my $filename = $ARGV[0];
77 if ($opt_z && (!$filename or $filename eq '-')) {
78 print STDERR "Error: I can't edit STDIN in place.\n";
82 if (!$opt_z && scalar(@ARGV) > 1) {
83 print STDERR "Warning: Multiple files specified without -z option\n";
90 if (!$filename or $filename eq '-') {
94 $fh = open_next_file() or exit(1);
103 my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)";
104 my $re_attr = "(?:'[^']*'|\"[^\"]*\")";
107 while ($input .= <$fh>) {
109 if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) {
111 my ($name,$attr,$selfclose) = ($1,$2,$3);
112 while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) {
113 my ($name,$value) = ($1,$2);
114 $value =~ s/^["'](.*)["']$/$1/s;
115 $attr{$name} = $value;
118 parseStart($name, 0, %attr);
119 if ($selfclose) { parseEnd($name) }
121 parseStart($name, $selfclose, %attr);
123 } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) {
125 } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) {
127 } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) {
129 } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) {
131 } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) {
144 $input =~ m/([^\n]+)/gs;
145 print STDERR "WARNING: junk remaining on input: $1\n";
153 print html_escape($output)."\n"
157 print STDERR "Not overwriting file\n";
159 open FOUT,"> $filename" or die "Cannot overwrite file: $!";
161 print FOUT "$output\n"
163 print FOUT html_escape($output)."\n"
169 !$stdin && $opt_z && ($fh = open_next_file(\$filename))
176 my $selfclose = shift;
179 $textContent =~ s/\s+$//;
180 printContent($textContent);
186 if($schemaHackMode and $s =~ m/(^|:)annotation$/) {
192 if (length($output)) {
196 $output .= " " x ($indent * $indentSize);
200 if ($sortAttributes && (scalar(@k) > 1) ){
207 # sort attributes alphabetically (default ordering)
208 @alphaSorted = sort @k;
210 # read through sorted list, if attribute doesn't have specified
211 # sort order, push it onto the end of the final array (this maintains
212 # alphabetic order). Else create a list that has attributes needing
214 foreach $attribute (@alphaSorted){
216 foreach $sortAttrib (@specialSort){
217 if ($attribute eq $sortAttrib){
218 push @needSpecialSort, $attribute;
223 push @final, $attribute;
227 # now read through the specialSort list backwards looking for
228 # any match in the needSpecialSort list. Unshift this onto the
229 # front of the final array to maintain proper order.
230 foreach my $attribute (reverse @specialSort){
231 foreach (@needSpecialSort){
232 if ($attribute eq $_){
233 unshift @final, $attribute;
241 foreach my $attr (@k) {
243 # Remove (min|max)Occurs = 1 if schemaHackMode
245 if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") {
249 if ($splitAttributes) {
250 $output .= "\n"." " x ($indent * $indentSize) ." ";
252 if ($attr{$attr} =~ /'/) {
253 $output .= " $attr=\"$attr{$attr}\"";
255 $output .= " $attr='$attr{$attr}'";
258 if ($splitAttributes and @k) {
259 $output .= "\n"." " x ($indent * $indentSize);
276 if($s =~ m/(^|:)annotation$/) {
282 if($normaliseWhiteSpace) {
283 $textContent =~ s/^\s*(.*?)\s*$/$1/;
286 printContent($textContent);
289 $output .= " " x ($indent * $indentSize);
298 if($inAnnotation) { return }
299 $textContent .= "$s";
309 if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) {
313 $DTD =~ s/\</\n \</msg;
314 $output .= "$start$DTD\n$finish\n";
322 if($inAnnotation) { return }
323 printContent($textContent,1);
324 if ($s =~ /([^\<]*)(<.*>)(.*)/ms) {
328 $xml =~ s/\</\n\</msg;
329 $xml =~ s/(\n\s*\n?)+/\n/msg;
332 $s = "$start\n$xml\n$finish";
334 $s =~ s/\n\s*$/\n /msg;
335 if ($newLineComments) {
336 $output .= "\n<!--$s-->\n";
338 $output .= "<!--$s-->";
346 my ($LF,$ret) = ("","");
348 if ($s =~ m/\n\s*$/) {
351 if ($s =~ m/^[\s\n]*$/) {
372 my $filename = shift;
373 $$filename = shift @ARGV;
374 while ($$filename and ! -f $$filename) {
375 print STDERR "WARNING: Could not find file: $$filename\n";
376 $$filename = shift @ARGV;
381 my $fh = new FileHandle;
382 $fh->open("< $$filename") or die "Can't open $$filename: $!";
388 usage: $0 [ options ] [ file.xml ... ]
391 -h display this help message
392 -H escape characters (useful for further processing)
393 -t split attributes, one per line (useful for diff)
394 -s sort attributes (useful for diff)
395 -z in place edit (zap)
396 -e expand self closing tags (useful for diff)
397 -S schema hack mode (used by xmldiff)
398 -c place comments on new line.
399 -n normalise whitespace (remove leading and trailing whitespace from nodes
401 -l <num> Indent each level by <num> spaces [default: 2]