Tuesday, September 26, 2017

XML Parsing - DOM / SAX /StAX


Reference links:
https://examples.javacodegeeks.com/core-java/xml/java-xml-parser-tutorial/
https://www.mkyong.com/java/how-to-read-xml-file-in-java-dom-parser/


The code : https://github.com/dianaplazar10/JAVAproject.git

An XML document consists of elements. Each element has a start tag, its content and an end tag. Also, an XML document must have exactly one root element.

In java, there are 3 ways of parsing an XML:

1) DOM parser : 

The DOM(Document Object Model) parser parses the entire XML document and loads the XML content into a Tree structureUsing the Node and NodeList classes, we can retrieve and modify the content of an XML file.

Inside the main method, we 

  • create a DocumentBuilder from the DocumentBuilderFactory and then, 
  • parse and store the XML file in an instance of the Document class. 
  • Then, we parse that document and when we find a node of type Node.ELEMENT_NODE
  • we retrieve all its information and store them in an instance of the Employee class. 
  • Finally, we print the information of all stored employees.
  • APIs used: DocumentBuilder, Document, NodeList, Node, Element


2) SAX parser : 

SAX(event-based Sequential Access parser API) parser only needs to report each parsing event as it happens and the minimum memory required for a SAX parser is proportional to the maximum depth of the XML file.



  • APIs used: SAXParserFactory, SAXParser,
  • extends the DefaultHandler class, in order to provide the following callbacks:



  • startElement: this event is triggered when a start tag is encountered.


  • endElement: – this event is triggered when an end tag is encountered.


  • characters: – this event is triggered when some text data is encountered.


  • 3) StAX parser : 

    StAX(Streaming API for XML)parser is able to process tree-like structured data as the data gets streamed-in. StAX was designed as a median between DOM and SAX parsers. In this parsing, the entry point is a cursor that represents a point within the XML document. The application moves the cursor forward, in order to pull the information from the parser. In contrast, a SAX parser pushes data to the application, instead of pulling.

    Examples:
    1) DOM parser 
    ___________________
    package com.soap.xmlParse;

    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;

    import javax.xml.parsers.DocumentBuilder;
    import javax.xml.parsers.DocumentBuilderFactory;
    import javax.xml.parsers.ParserConfigurationException;

    import org.w3c.dom.Document;
    import org.w3c.dom.Element;
    import org.w3c.dom.Node;
    import org.w3c.dom.NodeList;
    import org.xml.sax.SAXException;

    public class DomParserExample {

         public static void main(String[] args) throws ParserConfigurationException,
              SAXException, IOException {

              if(args.length != 1)
                   throw new RuntimeException("The name of the XML file is required!");

              DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
              DocumentBuilder builder = factory.newDocumentBuilder();

              // Load the input XML document, parse it and return an instance of the
              // Document class.
              Document document = builder.parse(new File(args[0]));

              List<Employee> employees = new ArrayList<Employee>();
              NodeList nodeList = document.getDocumentElement().getChildNodes();
              for (int i = 0; i < nodeList.getLength(); i++) {
                   Node node = nodeList.item(i);
                   if (node.getNodeType() == Node.ELEMENT_NODE) {
                        Element elem = (Element) node;
                        String ID = node.getAttributes().getNamedItem("ID").getNodeValue(); // Get the value of the ID attribute.

                        // Get the value of all sub-elements.
                        String firstname = elem.getElementsByTagName("Firstname").item(0).getChildNodes().item(0).getNodeValue();
                        String lastname = elem.getElementsByTagName("Lastname").item(0).getChildNodes().item(0).getNodeValue();

                        Integer age = Integer.parseInt(elem.getElementsByTagName("Age").item(0).getChildNodes().item(0).getNodeValue());
                        Double salary = Double.parseDouble(elem.getElementsByTagName("Salary").item(0).getChildNodes().item(0).getNodeValue());

                        employees.add(new Employee(ID, firstname, lastname, age, salary));
                   }
              }
              // Print all employees.
              for (Employee empl : employees)
                   System.out.println(empl.toString());
         }

    }
    ___________________

    2) SAX parser
    ___________________
    package com.soap.xmlParse;
    import java.io.File;
    import java.io.IOException;
    import java.util.ArrayList;
    import java.util.List;

    import javax.xml.parsers.ParserConfigurationException;
    import javax.xml.parsers.SAXParser;
    import javax.xml.parsers.SAXParserFactory;

    import org.xml.sax.Attributes;
    import org.xml.sax.SAXException;
    import org.xml.sax.helpers.DefaultHandler;

    public class SAXParserExample extends DefaultHandler {

         private static List<Employee> employees = new ArrayList<Employee>();
         private static Employee empl = null;
         private static String text = null;

         @Override
         // A start tag is encountered.
         public void startElement(String uri, String localName, String qName, Attributes attributes)
              throws SAXException {

              switch (qName) {
                   // Create a new Employee.
                   case "Employee": {
                    empl = new Employee();//attributes.getValue("ID"), "Firstname", "Lastname", 33, 4000);
                    empl.setID(attributes.getValue("ID"));
                        break;
                   }
              }
         }

         @Override
         public void endElement(String uri, String localName, String qName) throws SAXException {
              switch (qName) {
                   case "Employee": {
                        // The end tag of an employee was encountered, so add the employee to the list.
                        employees.add(empl);
                        break;
                   }
                   case "Firstname": {
                        empl.setFirstname(text);
                        break;
                   }
                   case "Lastname": {
                        empl.setLastname(text);
                        break;
                   }
                   case "Age": {
                        empl.setAge(Integer.parseInt(text));
                        break;
                   }
                   case "Salary": {
                        empl.setSalary(Double.parseDouble(text));
                        break;
                   }
              }
         }

         @Override
         public void characters(char[] ch, int start, int length) throws SAXException {
              text = String.copyValueOf(ch, start, length).trim();
              System.out.println(text + ":" + ch.toString() + ":" + start + ":" + length);
         }

         public static void main(String[] args) throws ParserConfigurationException,
              SAXException, IOException {

              if (args.length != 1)
                   throw new RuntimeException("The name of the XML file is required!");

              SAXParserFactory parserFactor = SAXParserFactory.newInstance();
              SAXParser parser = parserFactor.newSAXParser();
              SAXParserExample handler = new SAXParserExample();

              parser.parse(new File(args[0]), handler);

              // Print all employees.
              for (Employee empl : employees)
                   System.out.println(empl.toString());
         }

    }
    ___________________

    3) StAX parser
    ___________________
    package com.soap.xmlParse;

    import java.io.File;
    import java.io.FileInputStream;
    import java.io.FileNotFoundException;
    import java.util.ArrayList;
    import java.util.List;
    import javax.xml.stream.XMLInputFactory;
    import javax.xml.stream.XMLStreamConstants;
    import javax.xml.stream.XMLStreamException;
    import javax.xml.stream.XMLStreamReader;

    public class StaxParserExample {

         public static void main(String[] args) throws FileNotFoundException,
              XMLStreamException {

              if (args.length != 1)
                   throw new RuntimeException("The name of the XML file is required!");

              List<Employee> employees = null;
              Employee empl = null;
              String text = null;

              XMLInputFactory factory = XMLInputFactory.newInstance();
              XMLStreamReader reader = factory.createXMLStreamReader(new FileInputStream(
                                                      new File(args[0])));

              while (reader.hasNext()) {
                   int Event = reader.next();

                   switch (Event) {
                        case XMLStreamConstants.START_ELEMENT: {
                             if ("Employee".equals(reader.getLocalName())) {
                                  empl = new Employee();
                                  empl.setID(reader.getAttributeValue(0));
                             }
                             if ("Employees".equals(reader.getLocalName()))
                                  employees = new ArrayList<>();

                             break;
                        }
                        case XMLStreamConstants.CHARACTERS: {
                             text = reader.getText().trim();
                             break;
                        }
                        case XMLStreamConstants.END_ELEMENT: {
                             switch (reader.getLocalName()) {
                                  case "Employee": {
                                       employees.add(empl);
                                       break;
                                  }
                                  case "Firstname": {
                                       empl.setFirstname(text);
                                       break;
                                  }
                                  case "Lastname": {
                                       empl.setLastname(text);
                                       break;
                                  }
                                  case "Age": {
                                       empl.setAge(Integer.parseInt(text));
                                       break;
                                  }
                                  case "Salary": {
                                       empl.setSalary(Double.parseDouble(text));
                                       break;
                                  }
                             }
                             break;
                        }
                   }
              }

              // Print all employees.
              for (Employee employee : employees)
                   System.out.println(employee.toString());
         }

    }

    ___________________