380 likes | 511 Views
J2EE —— 第 5 章 Simple API for XML(SAX). 何时使用 SAX. SAX :状态无关 (StAX :状态相关 ) 事件驱动模型,不能倒退到文档前面部分 SAX 与 DOM :相同的错误处理、处理验证错误 何时使用 SAX 把现有数据转换成 XML 速度快,效率高 何时使用 DOM 修改 XML 结构,交互修改 jdom, dom4j. 回显具有 SAX 解析器的 XML 文件. public class Echo { public static void main(String argv[]) { } }.
E N D
何时使用SAX • SAX:状态无关(StAX:状态相关) • 事件驱动模型,不能倒退到文档前面部分 • SAX与DOM:相同的错误处理、处理验证错误 • 何时使用SAX • 把现有数据转换成XML • 速度快,效率高 • 何时使用DOM • 修改XML结构,交互修改 • jdom, dom4j
回显具有SAX解析器的XML文件 public class Echo { public static void main(String argv[]) { } }
导入类 import java.io.*; import org.xml.sax.*; import org.xml.sax.helpers.DefaultHandler; import javax.xml.parsers.SAXParserFactory; import javax.xml.parsers.ParserConfigurationException; import javax.xml.parsers.SAXParser;
针对I/O进行设置 public static void main(String argv[]) { if (argv.length != 1) { System.err.println("Usage: cmd filename"); System.exit(1); } try { // Set up output stream out = new OutputStreamWriter(System.out, "UTF8"); } catch (Throwable t) { t.printStackTrace(); } System.exit(0); } static private Writer out;
实现ContentHandler接口 public class Echo extends DefaultHandler { ... } • startDocument • endDocument • startElement • endElement • characters • SAXException
设置解析器 DefaultHandler handler = new Echo(); SAXParserFactory factory = SAXParserFactory.newInstance(); try { out = new OutputStreamWriter(System.out, "UTF8"); SAXParser saxParser = factory.newSAXParser(); saxParser.parse( new File(argv[0]), handler ); }
编写输出 private void emit(String s) throws SAXException { try { out.write(s); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } }
隔开输出 private void nl() throws SAXException { String lineEnd = System.getProperty("line.separator"); try { out.write(lineEnd); } catch (IOException e) { throw new SAXException("I/O error", e); }
文档事件 public void startDocument() throws SAXException { emit("<?xml version='1.0' encoding='UTF-8'?>"); nl(); } public void endDocument() throws SAXException { try { nl(); out.flush(); } catch (IOException e) { throw new SAXException("I/O error", e); } }
元素开始事件 public void startElement(String namespaceURI, String sName, // simple name String qName, // qualified name Attributes attrs) throws SAXException { String eName = sName; // element name if ("".equals(eName)) eName = qName; // not namespace-aware emit("<"+eName); if (attrs != null) { for (int i = 0; i < attrs.getLength(); i++) { String aName = attrs.getLocalName(i); // Attr name if ("".equals(aName)) aName = attrs.getQName(i); emit(" "); emit(aName+"=\""+attrs.getValue(i)+"\""); } } emit(">"); }
元素结束事件 public void endElement(String namespaceURI, String sName, // simple name String qName // qualified name ) throws SAXException { String eName = sName; // element name if ("".equals(eName)) eName = qName; // not namespace-aware emit("</"+eName+">"); }
字符事件 StringBuffer textBuffer; public void characters(char buf[], int offset, int len) throws SAXException { String s = new String(buf, offset, len); if (textBuffer == null) { textBuffer = new StringBuffer(s); } else { textBuffer.append(s); } }
回显字符 private void echoText() throws SAXException { if (textBuffer == null) return; String s = ""+textBuffer; emit(s); textBuffer = null; }
调用回显字符函数 public void startElement(...) throws SAXException { echoText(); String eName = sName; // element name ... } public void endElement(...) throws SAXException { echoText(); String eName = sName; // element name ... }
编译和运行程序 • javac Echo.java • java Echo slideSample.xml • C:\dvlp\Sun\AppServer\jdk\jre\lib\rt.jar • 程序输出 • 没有注释 • 元素属性在一行中 • <item/>和<item></item>是一样的
确定引起空行的事件 private void echoText() throws SAXException { if (textBuffer == null) return; nl(); emit("CHARS: |"); String s = ""+textBuffer; emit(s); emit("|"); textBuffer = null; }
确定引起空行的事件:结果 CHARS: | | • 空行是由characters事件引起的 • 压缩输出 public void echoText() throws SAXException { nl(); emit("CHARS: "); String s = ""+textBuffer; if (!s.trim().equals(“”)) emit(s); }
回显解释器发送的每个字符集 public void characters(char buf[], int offset, int len) throws SAXException { if (textBuffer != null) { echoText(); textBuffer = null; } String s = new String(buf, offset, len); ... }
管理缩进 private String indentString = " "; private int indentLevel = 0; public void startElement(...) throws SAXException { indentLevel++; … } public void endElement(...) throws SAXException { nl(); emit("END_ELM: "); emit("</"+sName+">"); indentLevel--; }
输出缩进 private void nl() throws SAXException { ... try { out.write(lineEnd); for (int i=0; i < indentLevel; i++) out.write(indentString); } catch (IOException e) { ... } }
检查输出 ELEMENT: <slideshow ... > CHARS: CHARS: ELEMENT: <slide ... END_ELM: </slide> CHARS: CHARS:
确定文档位置 public void setDocumentLocator(Locator l) { try { out.write("LOCATOR"); out.write("SYS ID: " + l.getSystemId() ); out.flush(); } catch (IOException e) { // Ignore errors } } LOCATOR SYS ID: file:<path>/../samples/slideSample01.xml
显示处理指令 public void processingInstruction(String target, String data) throws SAXException { nl(); emit("PROCESS: "); emit("<?"+target+" "+data+"?>"); } ELEMENT: <slideshow ... > PROCESS: <?my.presentation.Program QUERY="exec, tech, all"?> CHARS: ...
处理SAXParseException } catch (SAXParseException spe) { // Error generated by the parser System.out.println("\n** Parsing error" + ", line " + spe.getLineNumber() + ", uri " + spe.getSystemId()); System.out.println(" " + spe.getMessage() ); } ** Parsing error, line 22, uri file:<path>/ slideSampleBad1.xml The element type "item" must be terminated by the matching end-tag “</item>"
处理SAXException catch (SAXException sxe) { // Error generated by this application // (or a parser-initialization error) Exception x = sxe; if (sxe.getException() != null) x = sxe.getException(); x.printStackTrace(); }
其它异常 catch (ParserConfigurationException pce) { // Parser with specified options can't be built pce.printStackTrace(); } catch (IOException ioe) { // I/O error ioe.printStackTrace(); }
处理非致命错误和警告 // treat validation errors as fatal public void error(SAXParseException e) throws SAXParseException { throw e; } // dump warnings too public void warning(SAXParseException err) throws SAXParseException { System.out.println("** Warning" + ", line " + err.getLineNumber() + ", uri " + err.getSystemId()); System.out.println(" " + err.getMessage()); }
显示特殊字符和CDATA • 实体引用: Market Size < predicted • 输出: CHARS: Market Size < predicted • CDATA段:不需要替换特殊字符
用DTD解析 • <!ELEMENT slideshow (slide+)> • 解析器不再传递以前正确回显的围绕slide元素的空白字符 • 可忽略空白 public void ignorableWhitespace (char buf[], int offset, int Len) throws SAXException { nl(); emit("IGNORABLE"); } • 回显实体引用(内部实体,外部实体)
使用验证解析器 // Use the validating parser SAXParserFactory factory = SAXParserFactory.newInstance(); factory.setValidating(true); • 用XML Schema验证 static final String JAXP_SCHEMA_LANGUAGE = "http://java.sun.com/xml/jaxp/properties/schemaLanguage"; static final String W3C_XML_SCHEMA = "http://www.w3.org/2001/XMLSchema"; factory.setNamespaceAware(true); saxParser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA);
建立合适的错误处理程序 try { saxParser.setProperty(JAXP_SCHEMA_LANGUAGE, W3C_XML_SCHEMA); } catch (SAXNotRecognizedException x) { // Happens if the parser does not support JAXP 1.2 ... } • 关联文档和Schema <documentRoot xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation='YourSchemaDefinition.xsd' >
试验验证错误 • 没有DTD • XML和DTD不匹配 • <!ELEMENT slide(image?, title, item*)> • 元素没有在DTD中定义 • <em>
解析参数化DTD • slideshow2.dtd引用xhtml.dtd • title定义冲突,换名或使用名称空间解决 • DTD警告 • 非验证:重复声明 • DTD中引用未声明的元素 • 为没有声明的元素类型声明属性 • XML的SAX解析器警告 • 验证时没有<!DOCTYPE …> • 非验证:引用一个未定义的参数实体 • 字符编码声明不正确
处理词法事件 • org.xml.sax.ext.LexicalHandler • 识别注释、CDATA段和对已解析实体的引用 • LexicalHandler如何工作 • comment(String comment) • startCDATA(), endCDATA() • startEntity(String name), endEntity(String name) • startDTD(String name, String publicId, String systemId), endDTD()
使用LexcicalHandler import org.xml.sax.ext.LexicalHandler; public class Echo extends HandlerBase implements LexicalHandler { public static void main(String argv[]) { ... // Use an instance of ourselves as the SAX event handler DefaultHandler handler = new Echo(); Echo handler = new Echo();
设置词法处理程序 SAXParser saxParser = factory.newSAXParser(); XMLReader xmlReader = saxParser.getXMLReader(); xmlReader.setProperty( "http://xml.org/sax/properties/lexical-handler", handler ); • 回显注释 public void comment(char[] ch, int start, int length) throws SAXException { String text = new String(ch, start, length); nl(); emit("COMMENT: "+text); }
使用DTDHandler和EntityResolver • 未解析的实体或符号声明:DTDHandler <!ENTITY myEntity SYSTEM "..URL.." NDATA gif> <!NOTATION gif SYSTEM "..URL.."> notationDecl(String name, String publicId, String systemId) unparsedEntityDecl(String name, String publicId, String systemId, String notationName) • 公有ID(URN)转换成系统ID(URL) resolveEntity(String publicId, String systemId)