import java.util.ArrayList; import java.util.List; import org.htmlparser.NodeFilter; import org.htmlparser.Parser; import org.htmlparser.filters.NodeClassFilter; import org.htmlparser.tags.TableColumn; import org.htmlparser.tags.TableRow; import org.htmlparser.tags.TableTag; import org.htmlparser.util.NodeList; public class ParsePage { public static String filter(String character) { character = character.replaceAll("[^(0-9)]", ""); return character; } public static String parseFromString(String content) throws Exception { Parser parser1 = new Parser(content); parser1.setEncoding("utf-8"); NodeFilter filter1 = new NodeClassFilter(TableTag.class); NodeList nodeList11 = parser1.parse(filter1); List<String> lsit= new ArrayList<String>(); for(int i = 0; i < nodeList11.size(); ++i){ if(nodeList11.elementAt(i) instanceof TableTag){ TableTag tag = (TableTag) nodeList11.elementAt(i); TableRow[] rows = tag.getRows(); for (int j = 0; j < rows.length; ++j) { TableRow row = (TableRow) rows[j]; TableColumn[] columns = row.getColumns(); for (int k = 0; k < columns.length; ++k) { String info = filter(columns[k].toPlainTextString().trim()); // System.out.println(info); lsit.add(info); } } } } System.out.println("第"+lsit.get(3)+"期获取号码:"+lsit.get(5)); return lsit.get(5); } }