class Parser の main を改変したもの。
import java.io.Serializable; import java.net.HttpURLConnection; import java.net.URLConnection; import org.htmlparser.Parser; import org.htmlparser.NodeFilter; import org.htmlparser.filters.TagNameFilter; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.http.ConnectionManager; import org.htmlparser.http.ConnectionMonitor; import org.htmlparser.http.HttpHeader; import org.htmlparser.lexer.Lexer; import org.htmlparser.lexer.Page; import org.htmlparser.util.DefaultParserFeedback; import org.htmlparser.util.IteratorImpl; import org.htmlparser.util.NodeIterator; import org.htmlparser.util.NodeList; import org.htmlparser.util.ParserException; import org.htmlparser.util.ParserFeedback; import org.htmlparser.util.EncodingChangeException; import org.htmlparser.visitors.NodeVisitor; public class SimpleParser { public static void main (String [] args) { Parser parser = null; NodeFilter filter = null; if (args.length < 1 || args[0].equals ("-help")) { System.out.println ("HTML Parser v" + Parser.getVersion () + "\n"); System.out.println (); System.out.println ("Syntax : java -jar htmlparser.jar" + " <file/page> [type]"); System.out.println (" <file/page> the URL or file to be parsed"); System.out.println (" type the node type, for example:"); System.out.println (" A - Show only the link tags"); System.out.println (" IMG - Show only the image tags"); System.out.println (" TITLE - Show only the title tag"); System.out.println (); System.out.println ("Example : java -jar htmlparser.jar" + " http://www.yahoo.com"); System.out.println (); } else try { parser = new Parser (); if (1 < args.length) filter = new TagNameFilter (args[1]); else { filter = null; // for a simple dump, use more verbose settings parser.setFeedback (Parser.STDOUT); Parser.getConnectionManager ().setMonitor (parser); } Parser.getConnectionManager ().setRedirectionProcessingEnabled (true); Parser.getConnectionManager ().setCookieProcessingEnabled (true); parser.setResource (args[0]); System.out.println (parser.parse (filter)); } catch (EncodingChangeException ece) { try { parser.reset (); System.out.println (parser.parse (filter)); } catch (ParserException e) { e.printStackTrace (); } } catch (ParserException e) { e.printStackTrace (); } } }