Java解析xml或rss的方法有好幾種
這邊紀錄一下透過javax.xml和org.w3c.dom將xml轉換成DOM tree
範例用xml:
<?xml version="1.0" encoding="UTF-8"?> <channel> <title>佛祖球球</title> <link>http://google.com</link> <description>佛祖球球焦點新聞</description> <generator>Codeigniter</generator> <item> <title><![CDATA[佛祖球球出名了]]></title> <link>http://google.com/article/517.html</link> <creator>行政部</creator> <pubDate>Tue, 03 Jul 2012 00:00:01 +0800</pubDate> <description><![CDATA[內文內文]]></description> <category></category> </item> </channel>
Java:
import javax.xml.parsers.DocumentBuilderFactory;
import javax.xml.parsers.DocumentBuilder;
import org.w3c.dom.Document;
import org.w3c.dom.NodeList;
import org.w3c.dom.Node;
import org.w3c.dom.Element;
public class ParseXML {
	public static void main(String argv[]) {
		try {
			//定義XML DOM parser解析器
			DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance();
			//建立DOM document
			DocumentBuilder dBuilder = dbFactory.newDocumentBuilder();
			//引入xml
			Document doc = dBuilder.parse("http://test.com/test/test.xml");
			//針對xml文檔的元素做normalize
			doc.getDocumentElement().normalize();
			//XML根節點
			System.out.println("Root Element :" + doc.getDocumentElement().getNodeName());
			//取得所有item元素
			NodeList nList = doc.getElementsByTagName("item");
			for (int temp = 0; temp < nList.getLength(); temp++) {
				//取得該item裡的元素
				Node nNode = nList.item(temp);
				if (nNode.getNodeType() == Node.ELEMENT_NODE) {
					Element eElement = (Element) nNode;
					System.out.println("Title : " + getTagValue("title", eElement));
					System.out.println("Link : " + getTagValue("link", eElement));
					System.out.println("Creator : " + getTagValue("creator", eElement));
				}
			}
		} catch (Exception e) {
			e.printStackTrace();
		}
	}
	//取得元素的值(過濾掉cdata-section等標記)
	private static String getTagValue(String sTag, Element eElement) {
		NodeList nlList = eElement.getElementsByTagName(sTag).item(0).getChildNodes();
		Node nValue = (Node) nlList.item(0);
		return nValue.getNodeValue();
	}
}