简体   繁体   中英

How to extract and transform XML into a Perl data structure

I have xml files in a folder, I need to extract some information from xml files and store in a hash.My xml files look like this

<?xml version="1.0" encoding="UTF-8"?>
<Servicemodule xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
<Service Id="478" Name="Pump Motor">
<Description>It delivers actual pump speed</Description>
<ServiceCustomers>
   <SW Service="SKRM" Path="/work/hr_service.xml"/>
</ServiceCustomers>
<ServiceSuppliers>
   <HW Type="s" Nr="12" Service="1" Path="/work/hardware.xml"/>
   <HW Type="v" Nr="2" Service="1" Path="/work/hardware.xml"/> 
   <HW Type="mt" Nr="1" Service="1" Path="/work/hardware.xml"/>
 </ServiceSuppliers>
 </Service>
 </Servicemodule>

I want to save this information in a hash like Service Id as the key and reaming information as the array of hash values of that key. SW and HW attributes in both service customers and suppliers elements are the array of values for the hash key(service Id). Its easy task for experts but I am new learner so this problem suffers me. I tried like this

use strict;
use warnings;
use feature ':5.10';
use XML::Twig;
use File::Find;

my $num=0;
my %combeh;
my $dir="V:/Main/work";
find(\&wanted, $dir);
 sub wanted() {
    if ( -f and /(_service\.xml)$/) {# find all the files with a suffix of .xml                                          
 my $tweak_server =sub{
                my @bhi;                                                                                       
    my ($twig, $root) =@_;                                                                                     
    my $code=$root->first_child_text('Service Id');                                                                                 
    my $ser=$root->first_child('ServiceCustomers');                                                                                      
    my $ser_cnt=$root->first_child_text('SW');
     my $ser1=$root->first_child('ServiceSuppliers');                                                                                      
    my $ser1_cnt=$root->first_child_text('HW');                                                                                      
    if ($ser){                                                                                                                      
    push (@bhi, $ser->toString,$File::Find::name);                                                                                                                     
       $combeh{$code}=[@bhi];
         }
       if ($ser1){                                                                                                                     
    push (@bhi, $ser1->toString,$File::Find::name);                                                                                                                    
       $combeh{$code}=[@bhi];           
            };
        my $roots = { Service => 1 };
  my $handlers = { 'Servicemodule/Service' => $tweak_server,                                                                                                                                                                             
                       };
       my $twig = new XML::Twig(TwigRoots => $roots,                                                                                                                                                                                                           
                             TwigHandlers => $handlers,                                                                                                                                                                                                          
                               pretty_print  => 'indented'                                                                                                                                                                            
                               );
               $twig->parsefile($_);                                                                                      
                             }                       
                       }
               return (%combeh) ;
                }

I am not able to create hash with as I want using above script. please help me with script how to get attribute values and store in a hash like this out put need like this

 '478'=>[
          {
          Description='It delivers actual pump speed'
          }
         {
           Service='SKRM',
           Path='/work/hr_service.xml'
          }
          {
             Type='s'.
             Nr='12',
             Service='s',
             path='/work/hardware.xml'
           }

          {
             Type='v'.
             Nr='2',
             Service='s',
             path='/work/hardware.xml'
           }
          {
             Type='mt'.
             Nr='1',
             Service='1',
             path='/work/hardware.xml'
           }
         ...
          ...
          ....

Please help me with this problem.

Thanks in advance.

I tried like this After your suggestion

#!/usr/bin/perl
 use warnings;
 use strict;
 use XML::Simple;
 use Carp;
 use File::Find;
 use File::Spec::Functions qw( canonpath );     
 use Data::Dumper;

 my @ARGV ="C:/Main/work";die "Need directories\n" unless @ARGV;
 find(
  sub {
     return unless ( /(_service\.xml)$/ and -f );
     extract_information();
    return;
  },
 @ARGV
  );

sub extract_information {
         my $path= $_;

my $xml=XMLin($path);
   my $xml_services = $xml->{Service};  
   my %services;
   for my $xml_service (@$xml_services) {

    my %service = (
        description     => $xml_service->{Description},
        name            => $xml_service->{Name},
        id              => $xml_service->{Id},
    );

     $service{sw} = _maybe_list( $xml_service->{ServiceCustomers}{SW} );
    $service{hw} = _maybe_list( $xml_service->{ServiceSuppliers}{HW} );
    $service{sw} = _maybe_list( $xml_service->{ServiceSuppliers}{SW} );
     $services{ $service{id} } = \%service;
 }

 print Dumper \%services;

  }
 sub _maybe_list {
 my $maybe = shift;
 return ref $maybe eq 'ARRAY' ? $maybe : [$maybe];
 }

Thanks for your reply,I am new to XML::Simple and I studied that module and I understand your script. But when I run your code I am getting error like "Not an array Reference" at for loop line.I tried in different ways to overcome this but still same error. And some times I have SW and HW attribute in ServiceSuppliers. So I added one more line same as your format. I have one question You said "If there's a single element in the XML it won't be wrapped in" but some times In ServiceCustomers I have only one element with some attributes like as I shown in my xml file.Is it OK ? or what should I do? Can you help me with these problems.

please any one help me with this error.

If the XML file isn't too big, you can transform it much easier with XML::Simple .

The advantage to XML::Simple is it is far more convenient to manipulate Perl data structures than XML.

The downsides are that it will consume more memory, since it must load the whole XML file into memory. It is also sensitive to the casing in the XML.

use strict;
use warnings;

use XML::Simple;
use Data::Dumper;

process_service_xml(shift);

sub process_service_xml {
    my $xml = XMLin(shift);

    # Illustrating what you've got after XML::Simple processes it.
    print "******* XML::Simple input ********\n";
    print Dumper $xml;
    print "**********************************\n";

    # Pull out the Services
    my $xml_services = $xml->{Service};

    # Iterate through each Service to transform them
    my %services;
    for my $xml_service (@$xml_services) {
        # Pull out the basic information
        my %service = (
            description     => $xml_service->{Description},
            name            => $xml_service->{Name},

            # Redundant with the key, but useful to keep all the data about the
            # service in one place.
            id              => $xml_service->{Id},
        );

        # Get SW and HW as their own attributes.
        # If there's a single element in the XML it won't be wrapped in
        # an array, so make sure each are a list.
        $service{sw} = _maybe_list( $xml_service->{ServiceCustomers}{SW} );
        $service{hw} = _maybe_list( $xml_service->{ServiceSuppliers}{HW} );

        # Store the service in the larger hash, keyed by the ID.
        $services{ $service{id} } = \%service;
    }

    # And here's what the information has been transformed into.
    print "******* Services ********\n";
    print Dumper \%services;
    print "*************************\n";    
}

sub _maybe_list {
    my $maybe = shift;
    return ref $maybe eq 'ARRAY' ? $maybe : [$maybe];
}

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM