Java解析xml或rss的方法有好幾種
這邊紀錄一下透過javax.xml和org.w3c.dom將xml轉換成DOM tree
範例用xml:
<?xml version="1.0" encoding="UTF-8"?> <channel> <title>佛祖球球</title> <link>http://google.com</link> <description>佛祖球球焦點新聞</description> <generator>Codeigniter</generator> <item> <title><![CDATA[佛祖球球出名了]]></title> <link>http://google.com/article/517.html</link> <creator>行政部</creator> <pubDate>Tue, 03 Jul 2012 00:00:01 +0800</pubDate> <description><![CDATA[內文內文]]></description> <category></category> </item> </channel>
Java:
import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.DocumentBuilder; import org.w3c.dom.Document; import org.w3c.dom.NodeList; import org.w3c.dom.Node; import org.w3c.dom.Element; public class ParseXML { public static void main(String argv[]) { try { //定義XML DOM parser解析器 DocumentBuilderFactory dbFactory = DocumentBuilderFactory.newInstance(); //建立DOM document DocumentBuilder dBuilder = dbFactory.newDocumentBuilder(); //引入xml Document doc = dBuilder.parse("http://test.com/test/test.xml"); //針對xml文檔的元素做normalize doc.getDocumentElement().normalize(); //XML根節點 System.out.println("Root Element :" + doc.getDocumentElement().getNodeName()); //取得所有item元素 NodeList nList = doc.getElementsByTagName("item"); for (int temp = 0; temp < nList.getLength(); temp++) { //取得該item裡的元素 Node nNode = nList.item(temp); if (nNode.getNodeType() == Node.ELEMENT_NODE) { Element eElement = (Element) nNode; System.out.println("Title : " + getTagValue("title", eElement)); System.out.println("Link : " + getTagValue("link", eElement)); System.out.println("Creator : " + getTagValue("creator", eElement)); } } } catch (Exception e) { e.printStackTrace(); } } //取得元素的值(過濾掉cdata-section等標記) private static String getTagValue(String sTag, Element eElement) { NodeList nlList = eElement.getElementsByTagName(sTag).item(0).getChildNodes(); Node nValue = (Node) nlList.item(0); return nValue.getNodeValue(); } }