HtmlParser学习笔记(三)-
使用NodeVisitor方式访问html结点,代码如下:package com.javaeye.suo.htmlparser.samples;import org.htmlparser.Parser;import org.htmlparser.Remark;import org.htmlparser.Tag;import org.htmlparser.Text;import org.htmlparser.visitors.NodeVisitor;import com.javaeye.suo.htmlparser.HtmlParserUtils;public class VisitorDemo extends NodeVisitor{//记录Remark Node数量private int remark_node_count;//记录Text Node数量private int tag_node_count;//记录Tag Node数量private int text_node_count;public void visitRemarkNode(Remark remark) {System.out.println("正在访问第 "+(++remark_node_count)+" 个Remark Node ");}public void visitStringNode(Text text) {System.out.println("正在访问第 "+(++tag_node_count)+" 个Text Node ");}public void visitTag(Tag tag) {System.out.println("正在访问第 "+(++text_node_count)+" 个Tag Node ");}public static void main(String[] args) {try{ //方式一:String urlStr = "http://localhost:8080/HtmlParser/htmlparser.html";Parser parser = HtmlParserUtils.getParserWithUrlConn(urlStr, "utf-8");NodeVisitor visitor = new VisitorDemo (); parser.visitAllNodesWith (visitor); System.out.println("========================================="); //方式二(常用): parser.reset(); NodeVisitor visitor2 = new NodeVisitor() { public void visitTag(Tag tag) { System.out.println("正在访问的tag:" + tag.getTagName() + "||Class is :"+ tag.getClass()); } }; parser.visitAllNodesWith(visitor2);}catch(Exception e){e.printStackTrace();}}} 补充:
一、 Visitor方式访问Html:
页:
[1]