`
Tristan_S
  • 浏览: 363546 次
  • 性别: Icon_minigender_1
  • 来自: 上海
社区版块
存档分类
最新评论

DOM4J vs SAX

    博客分类:
  • XML
阅读更多
网上都说DOM4J加载大型的xml开销很大, 这里来做一个实验


package xml.dom4j;

public class Test {
	
	public static void main(String[] args) {
		// operlog.xml 10万行  4MB  消耗JVM内存 11M
		// operlog2.xml 100行  5k  消耗JVM内存 0
		Test.loadLogXML("C:\\WKP_HP1\\TestCaseHp1\\src\\operlog2.xml");
	}
	
	
	public static void loadLogXML(String file) {
		Runtime runtime = Runtime.getRuntime();
		System.out.println("当前虚拟机最大可用内存为: " + runtime.maxMemory() / 1024 / 1024 + "M");
		System.out.println("当前虚拟机已占用内存: " + runtime.totalMemory() / 1024 / 1024 + "M");

		try {
			SAXReader reader = new SAXReader();
			Document doc = null;
			try {
				doc = reader.read(new File(file));
			} catch (DocumentException e) {
				System.out.println("读取配置文件错误");
				return;
			}
			System.out.println("当前虚拟机已占用内存: " + runtime.totalMemory() / 1024 / 1024 + "M");
			Element root = doc.getRootElement();
			List poNodes = root.elements("po");
			for (Iterator it = (Iterator) poNodes.iterator(); it.hasNext();) {
				Element elm = (Element) it.next();

				String beanName = elm.attributeValue("name");
				String moduleName = elm.attributeValue("moduleName");
				String sysId = elm.attributeValue("sysId");


				List nodes = elm.elements("field");
				List<String> listField = new ArrayList<String>();
				for (Iterator it2 = (Iterator) nodes.iterator(); it2.hasNext();) {

					Element elm2 = (Element) it2.next();
					String fieldName = elm2.attributeValue("name");
					String CNName = elm2.getText();
					//System.out.println((fieldName + "," + CNName));

				}
			}
		} catch (Exception e) {
			System.out.println("加载日志配置文件时,碰到运行时异常: " + e.toString());
		}
	}
}


最后发现 加载 10万行数据的 jvm也只用了11M, 完全可以接受的。


附xml文件

<doc>
<po name="cia.common.db.orm.po.TblBkeMchntUsrInf" moduleName="MMG" sysId="2">
	<field name="usrId">用户ID</field>
	<field name="mchntCd">所属机构号</field>
</po>
<po name="cia.common.db.orm.po.TblBkeMchntUsrpwdInf" moduleName="MMG" sysId="2">
	<field name="mchntCd">商户号</field>
	<field name="usrId">用户ID</field>
	<field name="usrPwd">用户密码</field>
</po>

<!-- PPP机构信息管理 -->
<po name="cia.common.db.orm.po.TblBkePppInsInfo"  moduleName="CIM" sysId="2">
	<field name="insTp">机构类型</field>
	<field name="pppInsCd">机构号</field>
	<field name="insNm">机构名称</field>
</po>
<!-- 全卡bin管理  -->
<po name="cia.common.db.orm.po.TblBkeBin" moduleName="BIM" sysId="2">
	<field name="cardBin">卡BIN</field>
	<field name="panLen">卡号长度</field>
	<field name="issInsCd">发卡机构代码</field>
	<field name="issInsCnNm">发卡机构名称</field>
	<field name="cardAttr">卡性质</field>
	<field name="cardBrand">卡品牌</field>
	<field name="cardCata">卡类别</field>

</po>
</doc>



DOM4J 用来加载系统的配置文件还是OK的。 但用来接收外系统的数据就不合适了
1, 因为数据量的问题
2, DOM4J 适合解析固定tab name的xml, 对于处理下列的XML就杯具了。
<?xml version="1.0" encoding="utf-8"?>

<!--XML file generated by Excel Input Tool-->
<ConfigMessage xmlns="http://schemas.hp.com/CfM/ucmdb/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://schemas.hp.com/CfM/ucmdb/ ConfigMessage.xsd">
  <ConfigMessageHeader>
    <cmdbContext>cis</cmdbContext>
    <dataSource>CIS DERIVED VALUE - will be a valid MDM Company Code</dataSource>
    <dataSourceContact/>
    <dataSourceContactEmail>CPLUS001@cplus.com</dataSourceContactEmail>
    <simulate>false</simulate>
	</ConfigMessageHeader>

	<CIsAndRelations>
		<CIsForUpdate>
			<CI>
			  <netdevice>
			    <data_adminstate>0</data_adminstate>
			    <data_externalid>CPLUS_consoleswitch_002</data_externalid>
			    <data_note>ConsoleSwitch_Note</data_note>
			    <data_operationstate>0</data_operationstate>
			    <data_origin>ESL</data_origin>
			    <description>Description_ConsoleSwitch</description>
			    <document_list>DocumentsDocumentsDocumentsDocumentsDocumentsDocum</document_list>
			    <name>ConsoleSwitch_Name_Judy_002</name>
			    <TenantOwner>ACT</TenantOwner>
			    <TenantsUses>CPLUS</TenantsUses>
			    <user_label>ConsoleSwitch_UserLable_Judy_002</user_label>
			    <related_ciexternalid></related_ciexternalid>
			    <related_cit></related_cit>
			    <hp_envrmt_type_nm>Pending</hp_envrmt_type_nm>
			    <hp_user_defined_attribute1_txt>Attribute1</hp_user_defined_attribute1_txt>
			    <hp_user_defined_attribute2_txt>Attribute2</hp_user_defined_attribute2_txt>
			    <hp_user_defined_attribute3_txt>Attribute3</hp_user_defined_attribute3_txt>
			    <hp_user_defined_attribute4_txt>Attribute4</hp_user_defined_attribute4_txt>
			    <hp_user_defined_attribute5_txt>Attribute5</hp_user_defined_attribute5_txt>
			    <hp_regulatory_status_flg>true</hp_regulatory_status_flg>
			    <sacm_lifecycle_status>Pending</sacm_lifecycle_status>
			    <hp_inventory_id>InventoryId123</hp_inventory_id>
			    <codepage>CodePage</codepage>
			    <hp_cust_req_excpn_flg>true</hp_cust_req_excpn_flg>
			    <hp_location_txt>VIA|3F</hp_location_txt>
			    <hp_ci_alias_nm>AliasName123</hp_ci_alias_nm>
			    <bios_asset_tag>BiosAssetTag01</bios_asset_tag>
			    <bios_serial_number>N123456</bios_serial_number>
			    <bios_uuid>BiosUuid001</bios_uuid>
			    <calculated_location>CalculatedLocation001</calculated_location>
			    <domain_name>HP.com</domain_name>
			    <host_iscomplete>true</host_iscomplete>
			    <host_isdesktop>true</host_isdesktop>
			    <host_isroute>true</host_isroute>
			    <host_isvirtual>true</host_isvirtual>
			    <host_key>HostKey1234</host_key>
			    <host_nnm_uid>123456</host_nnm_uid>
			    <host_osaccuracy>HostOSA</host_osaccuracy>
			    <host_osrelease>HostOSRelease123</host_osrelease>
			    <host_servertype>HostServerType123</host_servertype>
			    <memory_size>1024</memory_size>
			    <net_bios_name>NetBiosName1213</net_bios_name>
			    <node_family>NodeFamily123</node_family>
			    <node_model>AIX 6.1</node_model>
			    <node_role>ConsoleSwitch</node_role>
			    <os_family>baremetal_hypervisor</os_family>
			    <primary_dns_name>PrimaryDnsName123</primary_dns_name>
			    <serial_number>SerialNo123</serial_number>
			    <swap_memory_size>100</swap_memory_size>
			    <sys_object_id>SysObjectId123</sys_object_id>
			    <vendor>IBM</vendor>
			    <hp_product_code_txt>ProductNo123</hp_product_code_txt>
			    <hp_approval_group_txt>StringVal456</hp_approval_group_txt>
			    <hp_approval_sequence_nbr>59</hp_approval_sequence_nbr>
			  </netdevice>
			</CI>
		</CIsForUpdate>
		<relationsForUpdate>
		</relationsForUpdate>
	</CIsAndRelations>
</ConfigMessage>


这时候适合用SAX来解析

package com.hp.ucmdb.util;

import java.io.FileInputStream;
import java.io.IOException;

import org.xml.sax.Attributes;
import org.xml.sax.ContentHandler;
import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import org.xml.sax.SAXParseException;
import org.xml.sax.XMLReader;
import org.xml.sax.helpers.DefaultHandler;
import org.xml.sax.helpers.XMLReaderFactory;

public class ParseFileUtils extends DefaultHandler {

	public static void main(String[] args) throws SAXException, IOException {
		ParseFileUtils instance = new ParseFileUtils();
		instance.doParsing("c:\\222.xml");
	}
	
	// method for file validation and parsing.
	public void doParsing(String fileName) throws SAXException,
			IOException {
		XMLReader parser = XMLReaderFactory.createXMLReader();
		ContentHandler cHandler = this;
		parser.setContentHandler(cHandler);

		FileInputStream input = null;
		try {
			input = new FileInputStream(fileName);
			parser.parse(new InputSource(input));
		} catch (SAXParseException ex) {
			throw ex;
		} finally {
			if (input != null) {
				try {
					input.close();
				} catch (Exception e) {
				}
			}
		}
	}

	String v = "";
	StringBuffer sb = null;
	
	public void startElement(String namespace, String localName,
			String qualifiedName, Attributes atts) throws SAXException {
		sb = new StringBuffer();
		System.out.println(localName);
	}

	public void characters(char[] ch, int start, int length)
			throws SAXException {
		sb.append(ch, start, length);
	}
	public void endElement(String namespaceURI, String localName, String qName)
			throws SAXException {
		String strValue = sb.toString();
		if(strValue != null && !"".equals(strValue) && !"".equals(strValue.trim())){
			System.out.println(" v: " + strValue);
		}
		sb = new StringBuffer();
		
	}

	
}



另外, DOM4J 和 SAX 都可以进行schema验证



分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics