4 # Copyright (c) 2002, DecisionSoft Limited All rights reserved.
6 # http://software.decisionsoft.com/license.html
7 # for more information.
12 # xmlpp: XML pretty printing
15 # For custom attribute sorting create an attributeOrdering.txt file that
16 # lists each attributes separated by a newline in the order you would like
17 # them to be sorted separated by a newline. Then use the -s option.
23 use vars qw($opt_h $opt_H $opt_s $opt_z $opt_t $opt_e $opt_S $opt_c $opt_n);
32 if (!getopts('nzhHsteSc') or $opt_h) {
38 # expect to find attributeOrdering.txt file in same directory
39 # as xmlpp is being run from
42 if ($scriptDir =~ m#/#){
43 $scriptDir =~ s#/[^/]+$##;
49 # get attribute ordering from external file
50 if (open(SORTLIST, "<$scriptDir/attributeOrdering.txt")) {
51 @sortlist = <SORTLIST>;
54 @specialSort = grep(/^\w+/, @sortlist);
57 print STDERR "Could not open $scriptDir/attributeOrdering.txt: $!\nWARNING attribute sorting will only be alphabetic\n\n";
62 # set line separator to ">" speeding up parsing of XML files
68 my $sortAttributes = $opt_s;
69 my $newLineComments = $opt_c;
70 my $splitAttributes = $opt_t;
71 my $schemaHackMode = $opt_S;
72 my $normaliseWhiteSpace = $opt_n;
74 my $filename = $ARGV[0];
75 if ($opt_z && (!$filename or $filename eq '-')) {
76 print STDERR "Error: I can't edit STDIN in place.\n";
80 if (!$opt_z && scalar(@ARGV) > 1) {
81 print STDERR "Warning: Multiple files specified without -z option\n";
88 if (!$filename or $filename eq '-') {
92 $fh = open_next_file() or exit(1);
101 my $re_name = "(?:[A-Za-z0-9_:][A-Za-z0-9_:.-]*)";
102 my $re_attr = "(?:'[^']*'|\"[^\"]*\")";
105 while ($input .= <$fh>) {
107 if ($input =~ s/^<($re_name)((?:\s+$re_name\s*=\s*$re_attr)*\s*)(\/?)>(.*)$/$4/s ) {
109 my ($name,$attr,$selfclose) = ($1,$2,$3);
110 while ($attr =~ m/($re_name)\s*=\s*($re_attr)/gs) {
111 my ($name,$value) = ($1,$2);
112 $value =~ s/^["'](.*)["']$/$1/s;
113 $attr{$name} = $value;
116 parseStart($name, 0, %attr);
117 if ($selfclose) { parseEnd($name) }
119 parseStart($name, $selfclose, %attr);
121 } elsif ($input =~ s/^<\/($re_name)\s*>(.*)$/$2/s) {
123 } elsif ($input =~ s/^<!--(.*?)-->(.*)$/$2/s) {
125 } elsif ($input =~ s/^([^<]+)(.*)$/$2/s) {
127 } elsif ($input =~ s/^(<\?[^>]*\?>)(.*)$/$2/s) {
129 } elsif ($input =~ s/^(<\!DOCTYPE[^\[>]*(\[[^\]]*\])?[^>]*>)(.*)$/$3/s) {
142 $input =~ m/([^\n]+)/gs;
143 print STDERR "WARNING: junk remaining on input: $1\n";
151 print html_escape($output)."\n"
155 print STDERR "Not overwriting file\n";
157 open FOUT,"> $filename" or die "Cannot overwrite file: $!";
159 print FOUT "$output\n"
161 print FOUT html_escape($output)."\n"
167 !$stdin && $opt_z && ($fh = open_next_file(\$filename))
174 my $selfclose = shift;
177 $textContent =~ s/\s+$//;
178 printContent($textContent);
184 if($schemaHackMode and $s =~ m/(^|:)annotation$/) {
190 if (length($output)) {
194 $output .= " " x $indent;
198 if ($sortAttributes && (scalar(@k) > 1) ){
205 # sort attributes alphabetically (default ordering)
206 @alphaSorted = sort @k;
208 # read through sorted list, if attribute doesn't have specified
209 # sort order, push it onto the end of the final array (this maintains
210 # alphabetic order). Else create a list that has attributes needing
212 foreach $attribute (@alphaSorted){
214 foreach $sortAttrib (@specialSort){
215 if ($attribute eq $sortAttrib){
216 push @needSpecialSort, $attribute;
221 push @final, $attribute;
225 # now read through the specialSort list backwards looking for
226 # any match in the needSpecialSort list. Unshift this onto the
227 # front of the final array to maintain proper order.
228 foreach my $attribute (reverse @specialSort){
229 foreach (@needSpecialSort){
230 if ($attribute eq $_){
231 unshift @final, $attribute;
239 foreach my $attr (@k) {
241 # Remove (min|max)Occurs = 1 if schemaHackMode
243 if ($schemaHackMode and $attr =~ m/^(minOccurs|maxOccurs)$/ and $attr{$attr} eq "1") {
247 if ($splitAttributes) {
248 $output .= "\n"." " x $indent." ";
250 if ($attr{$attr} =~ /'/) {
251 $output .= " $attr=\"$attr{$attr}\"";
253 $output .= " $attr='$attr{$attr}'";
256 if ($splitAttributes and @k) {
257 $output .= "\n"." " x $indent;
274 if($s =~ m/(^|:)annotation$/) {
280 if($normaliseWhiteSpace) {
281 $textContent =~ s/^\s*(.*?)\s*$/$1/;
284 printContent($textContent);
287 $output .= " " x $indent;
296 if($inAnnotation) { return }
297 $textContent .= "$s";
307 if ($s =~ /^([^\[]*\[)([^\]]*)(\].*)$/ms) {
311 $DTD =~ s/\</\n \</msg;
312 $output .= "$start$DTD\n$finish\n";
320 if($inAnnotation) { return }
321 printContent($textContent,1);
322 if ($s =~ /([^\<]*)(<.*>)(.*)/ms) {
326 $xml =~ s/\</\n\</msg;
327 $xml =~ s/(\n\s*\n?)+/\n/msg;
330 $s = "$start\n$xml\n$finish";
332 $s =~ s/\n\s*$/\n /msg;
333 if ($newLineComments) {
334 $output .= "\n<!--$s-->\n";
336 $output .= "<!--$s-->";
344 my ($LF,$ret) = ("","");
346 if ($s =~ m/\n\s*$/) {
349 if ($s =~ m/^[\s\n]*$/) {
370 my $filename = shift;
371 $$filename = shift @ARGV;
372 while ($$filename and ! -f $$filename) {
373 print STDERR "WARNING: Could not find file: $$filename\n";
374 $$filename = shift @ARGV;
379 my $fh = new FileHandle;
380 $fh->open("< $$filename") or die "Can't open $$filename: $!";
386 usage: $0 [ options ] [ file.xml ... ]
389 -h display this help message
390 -H escape characters (useful for further processing)
391 -t split attributes, one per line (useful for diff)
392 -s sort attributes (useful for diff)
393 -z in place edit (zap)
394 -e expand self closing tags (useful for diff)
395 -S schema hack mode (used by xmldiff)
396 -c place comments on new line.
397 -n normalise whitespace (remove leading and trailing whitespace from nodes