`
macken
  • 浏览: 341477 次
  • 性别: Icon_minigender_1
  • 来自: 北京
社区版块
存档分类
最新评论

dom4j读取http xml文件

    博客分类:
  • Java
 
阅读更多

使用dom4j读取http xml文件,结合XPATH提取数据

使用httpClient、dom4j包

maven依赖

 

<dependency>
			<groupId>org.apache.httpcomponents</groupId>
			<artifactId>httpclient</artifactId>
			<version>4.0.1</version>
		</dependency>
		<dependency>
			<groupId>dom4j</groupId>
			<artifactId>dom4j</artifactId>
			<version>1.6</version>
		</dependency>
		<dependency>
			<groupId>jaxen</groupId>
			<artifactId>jaxen</artifactId>
			<version>1.1.1</version>
			<exclusions>
				<exclusion>
					<groupId>com.ibm.icu</groupId>
					<artifactId>icu4j</artifactId>
				</exclusion>
			</exclusions>
		</dependency>
 

示例代码:

 

import java.io.InputStream;
import java.util.Iterator;
import java.util.List;

import org.apache.http.HttpEntity;
import org.apache.http.HttpResponse;
import org.apache.http.client.HttpClient;
import org.apache.http.client.methods.HttpGet;
import org.apache.http.impl.client.DefaultHttpClient;
import org.dom4j.Document;
import org.dom4j.Element;
import org.dom4j.io.SAXReader;

public class HttpXML {
	public static void main(String[] args) throws Exception {
		test();
	}
	public static void test() throws Exception {
		SAXReader reader = new SAXReader();
		HttpClient client = new DefaultHttpClient();
		String url = "http://bbs.btwuji.com/rss.php";
		HttpGet httpget = new HttpGet(url);
		HttpResponse response = client.execute(httpget);
		HttpEntity entity = response.getEntity();
		if (entity != null) {
			InputStream is = entity.getContent();
			Document doc = reader.read(is);
			Element root = doc.getRootElement();
			List l=doc.selectNodes("//item/title");
			Iterator iter=l.iterator();
			while(iter.hasNext()){
				Element e=(Element)iter.next();
				System.out.println(e.getText());
			}
		}
	}
}
 

 

 

参考资料:

XPATH:http://www.w3school.com.cn/xpath/

dom4j:http://www.ibm.com/developerworks/cn/xml/x-dom4j.html/

分享到:
评论

相关推荐

Global site tag (gtag.js) - Google Analytics