package com.daimac.worm.thread; import com.daimac.worm.entity.PPTEntity; import com.daimac.worm.entity.TagEntity; import lombok.Data; import lombok.EqualsAndHashCode; import org.jsoup.Jsoup; import org.jsoup.nodes.Document; import org.jsoup.nodes.Element; import org.jsoup.select.Elements; import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.concurrent.atomic.AtomicInteger; @EqualsAndHashCode(callSuper = true) @Data public class PPTThread extends Thread { /** * 页码开始 */ private int numBegin; /** * 页码结束 */ private int numEnd; /** * 当前工作页码 */ private int currWork; /** * 错误的页码 */ private List<String> errorPage = new ArrayList<>(); /** * 错误的索引页 */ private List<String> errorIndex = new ArrayList<>(); /** * 容器指针 */ private List<PPTEntity> ppts; /** * 重试次数 */ private int retryCount = 3; /** * 完成 */ private boolean complete = false; public PPTThread(List<PPTEntity> ppts) { this.ppts = ppts; } @Override public void run() { for (int j = this.numBegin + 1; j <= this.numEnd; j++) { this.currWork = j; String numUrl = "http://www.1ppt.com/moban/ppt_moban_" + j + ".html"; Document doc = findDocument(numUrl); if(doc == null){continue;} Elements select = doc.select(".tplist li"); AtomicInteger pnum = new AtomicInteger(1); select.forEach(v -> { Element a = v.selectFirst("a"); String href = a.attr("href"); String indexUrl = "http://www.1ppt.com/" + href; PPTEntity pptEntity = new PPTEntity(); pptEntity.setName(a.selectFirst("img").attr("alt")); Document document = findDocument(indexUrl); if(document == null){return;} // 获取ppt详情 pptEntity.setPic(v.selectFirst("img").attr("src")); String pptName = document.selectFirst(".ppt_info h1").text(); Elements infoUl = document.select(".info_left ul"); pptEntity.setSort(infoUl.select("li:eq(0) a").text()); pptEntity.setUpdateTime(infoUl.select("li:eq(1)").text().replace("更新时间:", "")); pptEntity.setPptVersion(infoUl.select("li:eq(2)").text().replace("素材版本:", "")); pptEntity.setFileSize(infoUl.select("li:eq(4)").text().replace("文件大小:", "")); pptEntity.setFileType(infoUl.select("li:eq(6)").text().replace("附件类型:", "")); Elements taga = infoUl.select("li:eq(8) a"); List<TagEntity> tagList = new ArrayList<>(); pptEntity.setTags(tagList); taga.forEach(vv -> { TagEntity tag = new TagEntity(); tag.setName(vv.text()); tagList.add(tag); }); pptEntity.setDownUrl(document.selectFirst(".downurllist a").attr("href")); this.ppts.add(pptEntity); pnum.getAndIncrement(); }); } complete = true; } /** * 获取页面文档(获取失败,自动重试${retryCount}次) * @param url 地址 * @return 文档 */ private Document findDocument(String url){ Document document = null; for (int r = 0; r < this.retryCount; r++) { try { document = Jsoup.connect(url).get(); break; } catch (IOException e) { e.printStackTrace(); } } return document; } }