package cn.edu.hfut.dmic.webcollector.example;

import cn.edu.hfut.dmic.webcollector.model.CrawlDatums;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.plugin.berkeley.BreadthCrawler;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/example/TutorialCrawler.class */
public class TutorialCrawler extends BreadthCrawler {
    public TutorialCrawler(String str, boolean z) {
        super(str, z);
        addSeed("http://blog.csdn.net/");
        addRegex("http://blog.csdn.net/.*/article/details/.*");
        getConf().setExecuteInterval(1000);
        setThreads(30);
    }

    @Override // cn.edu.hfut.dmic.webcollector.fetcher.Visitor
    public void visit(Page page, CrawlDatums crawlDatums) {
        if (page.matchUrl("http://blog.csdn.net/.*/article/details/.*")) {
            System.out.println("title:" + page.select("div[class=article_title]").first().text() + "\tauthor:" + page.select("div[id=blog_userface]").first().text());
        }
    }

    public static void main(String[] strArr) throws Exception {
        new TutorialCrawler("crawl", true).start(2);
    }
}
