Make any XML readable

XML-files can be delivered without any formatting, just everyting in one row. No problem for an XML-parser but very inconvinient for humans. This script converts any XML-file to a better readble format with indent and newlines where you want them. This script does not use the template-environment I use for applications as it is to be easily used in any environment.

#!/usr/bin/perl -w

# ReadableXML.pl converts any wellformed XML-file in a file relatively easy readable by humans
# The correct things start on a new line and everything is nicely indented
# The only thing changed is that empty tags (selfclosing) are replaced by an opening and a closing tag.
# Usage: ReadableXML.pl <xml-file>
# Limitations:

if ( @ARGV != 1 ) {
 print “Usage: ReadableML.pl <xml-file>”;
 print ”  Output is <xml-file>read”;
 exit;
}
use strict;
use XML::Parser;

my ($outline,$char,$filler, $lastwrite,$start);
$filler = ” “;
$lastwrite = “none”;
$start = “”;

open (OUT, ‘> ‘ . $ARGV[0].”read”) || die(“Open failed for $ARGV[0].’read'”);

my $document    = $ARGV[0];

# initialize the parser
my $parser = XML::Parser->new( Handlers =>
                                     {
                                      Start=>&handle_start,
           Char=>&handle_char,
                                      End=>&handle_end,
                                     });

#$myfile = $document;
#print “File is $myfile”;
my $starttime = scalar localtime();
our $count = 0;

$parser->parsefile( $document );

my @element_stack;                # remember which elements are open
print “$starttimen”;
print scalar localtime();
# process a start-of-element event: print message about element
#
sub handle_start {
    my( $expat, $element, %attrs ) = @_;
    # ask the expat object about our position
    my $line = $expat->current_line;
    # remember this element and its starting position by pushing a
    # little hash onto the element stack
    push( @element_stack, { element=>$element, line=>$line });
 if ($lastwrite ne “none”) {
  $start = “n”;
 }
 $lastwrite = “start”;
 $outline = “$start$filler<$element”;
    if( %attrs ) {
  while( my( $key, $value ) = each( %attrs )) {
   $outline .= ” $key=$value”;
  }
 }
 $outline .= “>”;
 print OUT $outline;
 $filler .= ” “;
}
 
# process an end-of-element event
#
sub handle_end {
    my( $expat, $element ) = @_;
 
    # We’ll just pop from the element stack with blind faith that
    # we’ll get the correct closing element, unlike what our
    # homebrewed well-formedness did, since XML::Parser will scream
    # bloody murder if any well-formedness errors creep in.
    my $element_record = pop( @element_stack );
 chop $filler;
 if ($char eq “” && $lastwrite ne “start” ) {
    $start = “n$filler”;
 } else {
  $outline = “$char”;
  $char = “”;
  print OUT $outline;
  $start = “”;
  $lastwrite = “char”;
 }
 $outline = “$start</$element>”;
 print OUT $outline;
}

sub handle_char {
 my ( $expat, $string) = @_;
 my $element = ${$element_stack[-1]}{‘element’};
 if ($string =~ /S/) {
  $char .= $string;
    }
}

 Readable_XML

Leave a Comment

This site uses Akismet to reduce spam. Learn how your comment data is processed.