001 import java.io.IOException;
002 import org.apache.xerces.parsers.DOMParser;
003 import org.w3c.dom.*;
004 import org.xml.sax.SAXException;
005
006 /**
007 * This class provides static methods to analyse and display a DOM
008 * structure of an XML Document
009 * @author Udo Altmann
010 * @version 1.0
011 *
012 */
013
014 public class analyse {
015
016 /**
017 * @param args File to be analysed
018 * @throws IOException
019 * @throws SAXException
020 */
021 public static void main(String args[]) throws IOException, SAXException {
022 DOMParser dp = new DOMParser();
023 //
024 // Es gibt keine DOM-Methode zum Anlegen von Documenten
025 //
026 dp.parse(args[0]);
027 //
028 // Die Verarbeitung startet mit dem Interface Document
029 //
030 Document doc = dp.getDocument();
031 //
032 // Dokumente sind auch Knoten
033 //
034 process(doc);
035 }
036
037 /**
038 * @param nd Node to be analysed. Child nodes are processed recursively
039 */
040 public static void process(Node nd) {
041 System.out.println(
042 "Node type:\t"
043 + nd.getNodeType()
044 + " - "
045 + nodeTypeToString(nd.getNodeType()));
046 System.out.println("\tname:\t" + nd.getNodeName());
047 System.out.println("\tvalue:\t\"" + nd.getNodeValue() + "\"");
048 if (nd.getParentNode() != null) {
049 System.out.println(
050 "\tparent:\t"
051 + nodeTypeToString(nd.getParentNode().getNodeType())
052 + "\t"
053 + nd.getParentNode().getNodeName());
054 } else {
055 System.out.println("\tno parent\t");
056 }
057 System.out.println("\tspecific details:\t");
058 //nd.
059 switch (nd.getNodeType()) {
060 case Node.DOCUMENT_NODE :
061 Document doc = (Document) nd;
062 System.out.println(
063 "\t\tDocumentElement: "
064 + doc.getDocumentElement().getNodeName());
065 break;
066 case Node.ELEMENT_NODE :
067 Element el = (Element) nd;
068 //
069 // hasAttributes ist nicht in DOM-Level 1!
070 //
071 if (el.hasAttributes()) {
072 System.out.println("\t\tgetTagName: " + el.getTagName());
073 System.out.println("\t\tAttribute:");
074 NamedNodeMap nnm = el.getAttributes();
075 int i;
076 for (i = 0; i < nnm.getLength(); i++) {
077 process(nnm.item(i));
078 }
079 } else {
080 System.out.println("\t\tno attributes!");
081 }
082 break;
083 case Node.ATTRIBUTE_NODE :
084 Attr at = (Attr) nd;
085 System.out.println("\t\tgetName:" + at.getName());
086 System.out.println("\t\tspecified: " + at.getSpecified());
087 System.out.println("\t\tgetValue: " + at.getValue());
088 break;
089 case Node.COMMENT_NODE :
090 Comment cm = (Comment) nd;
091 System.out.println("\t\tgetData: " + cm.getData());
092 System.out.println("\t\tgetLength: " + cm.getLength());
093 break;
094 case Node.TEXT_NODE :
095 Text tn = (Text) nd;
096 System.out.println("\t\tgetData: " + tn.getData());
097 System.out.println("\t\tgetLength: " + tn.getLength());
098 break;
099 case Node.CDATA_SECTION_NODE :
100 Text cs = (Text) nd;
101 System.out.println("\t\tgetData: " + cs.getData());
102 System.out.println("\t\tgetLength: " + cs.getLength());
103 break;
104 case Node.DOCUMENT_TYPE_NODE :
105 int i;
106 DocumentType dt = (DocumentType) nd;
107 System.out.println("\t\tgetName: " + dt.getName());
108 System.out.println("\t\tgetPublicId: " + dt.getPublicId());
109 System.out.println("\t\tgetSytemId: " + dt.getSystemId());
110 //
111 // DOM 2 und parserabhängig
112 //
113 System.out.println(
114 "\t\tgetInternalSubset: " + dt.getInternalSubset());
115 NamedNodeMap nnm = dt.getEntities();
116 if (nnm.getLength() <= 0) {
117 System.out.println("\t\tno Entities");
118 } else {
119 for (i = 0; i < nnm.getLength(); i++) {
120 process(nnm.item(i));
121 }
122 }
123 nnm = dt.getNotations();
124 if (nnm.getLength() <= 0) {
125 System.out.println("\t\tno Notations");
126 } else {
127 for (i = 0; i < nnm.getLength(); i++) {
128 process(nnm.item(i));
129 }
130 }
131 break;
132 case Node.NOTATION_NODE :
133 Notation no = (Notation) nd;
134 System.out.println("\t\tgetPublicId: " + no.getPublicId());
135 System.out.println("\t\tgetSytemId: " + no.getSystemId());
136 break;
137 case Node.ENTITY_NODE :
138 Entity en = (Entity) nd;
139 System.out.println(
140 "\t\tgetNotationName: " + en.getNotationName());
141 System.out.println("\t\tgetPublicId: " + en.getPublicId());
142 System.out.println("\t\tgetSytemId: " + en.getSystemId());
143 break;
144 case Node.PROCESSING_INSTRUCTION_NODE :
145 ProcessingInstruction pe = (ProcessingInstruction) nd;
146 System.out.println("\t\tgetTarget: " + pe.getTarget());
147 System.out.println("\t\tgetData: " + pe.getData());
148 break;
149 //
150 // Für die folgenden Knotentyp gibt es keine speziellen Attribute/Methoden
151 //
152 case Node.ENTITY_REFERENCE_NODE :
153 //
154 // EntityRefences werden evtl. nicht erkannt, wenn sie beim Parsen ersetzt werden
155 //
156 // EntityReference er = (EntityReference) nd;
157 // break;
158 case Node.DOCUMENT_FRAGMENT_NODE :
159 // DocumentFragment df = (DocumentFragment) nd;
160 // break;
161 default :
162 System.out.println("\t\tno details!");
163 }
164 if (nd.hasChildNodes()) {
165 NodeList nl = nd.getChildNodes();
166 int i;
167 for (i = 0; i < nl.getLength(); i++) {
168 process(nl.item(i));
169 }
170 }
171 }
172
173 /**
174 * Static method to convert Nodetypes to Strings
175 * @param s Type as short
176 * @return Type as String
177 */
178 public static String nodeTypeToString(short s) {
179
180 switch (s) {
181 case Node.ATTRIBUTE_NODE :
182 return "ATTRIBUTE_NODE";
183 case Node.CDATA_SECTION_NODE :
184 return "CDATA_SECTION_NODE";
185 case Node.COMMENT_NODE :
186 return "COMMENT_NODE";
187 case Node.DOCUMENT_FRAGMENT_NODE :
188 return "DOCUMENT_FRAGMENT_NODE";
189 case Node.DOCUMENT_NODE :
190 return "DOCUMENT_NODE";
191 case Node.DOCUMENT_TYPE_NODE :
192 return "DOCUMENT_TYPE_NODE";
193 case Node.ELEMENT_NODE :
194 return "ELEMENT_NODE";
195 case Node.ENTITY_NODE :
196 return "ENTITY_NODE";
197 case Node.ENTITY_REFERENCE_NODE :
198 return "ENTITY_REFERENCE_NODE";
199 case Node.NOTATION_NODE :
200 return "NOTATION_NODE";
201 case Node.PROCESSING_INSTRUCTION_NODE :
202 return "PROCESSING_INSTRUCTION_NODE";
203 case Node.TEXT_NODE :
204 return "TEXT_NODE";
205 default :
206
207 return null;
208 }
209 }
210
211
212 }