package cn.edu.hfut.dmic.webcollector.model;

import cn.edu.hfut.dmic.webcollector.util.CharsetDetector;
import cn.edu.hfut.dmic.webcollector.util.GsonUtils;
import cn.edu.hfut.dmic.webcollector.util.ListUtils;
import cn.edu.hfut.dmic.webcollector.util.RegexRule;
import com.google.gson.JsonArray;
import com.google.gson.JsonObject;
import java.io.UnsupportedEncodingException;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.jsoup.Jsoup;
import org.jsoup.nodes.Document;
import org.jsoup.nodes.Element;
import org.jsoup.select.Elements;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/model/Page.class */
public class Page implements MetaGetter, MetaSetter<Page> {
    public static final Logger LOG = LoggerFactory.getLogger(Page.class);
    private CrawlDatum crawlDatum;
    private String contentType;
    private Integer code;
    private byte[] content;
    private Exception exception = null;
    private String html = null;
    private Document doc = null;
    private String charset = null;
    private Object obj = null;

    public boolean matchUrl(String str) {
        return Pattern.matches(str, url());
    }

    public boolean matchType(String str) {
        return this.crawlDatum.matchType(str);
    }

    public boolean matchContentType(String str) {
        return str == null ? contentType() == null : Pattern.matches(str, contentType());
    }

    public JsonObject jsonObject() {
        return GsonUtils.parse(html()).getAsJsonObject();
    }

    public JsonArray jsonArray() {
        return GsonUtils.parse(html()).getAsJsonArray();
    }

    public JsonObject regexJSONObject(String str) {
        return GsonUtils.parse(regex(str)).getAsJsonObject();
    }

    public JsonObject regexJSONObject(String str, int i) {
        return GsonUtils.parse(regex(str, i)).getAsJsonObject();
    }

    public JsonArray regexJSONArray(String str) {
        return GsonUtils.parse(regex(str)).getAsJsonArray();
    }

    public JsonArray regexJSONArray(String str, int i) {
        return GsonUtils.parse(regex(str, i)).getAsJsonArray();
    }

    public ArrayList<String> attrs(String str, String str2) {
        ArrayList<String> arrayList = new ArrayList<>();
        Iterator it = select(str).iterator();
        while (it.hasNext()) {
            Element element = (Element) it.next();
            if (element.hasAttr(str2)) {
                arrayList.add(element.attr(str2));
            }
        }
        return arrayList;
    }

    public String attr(String str, String str2) {
        return select(str).attr(str2);
    }

    public Links links(boolean z) {
        return new Links().addFromElement(doc(), z);
    }

    public Links links() {
        return links(false);
    }

    public Links links(String str, boolean z) {
        return new Links().addBySelector(doc(), str, z);
    }

    public Links links(String str) {
        return links(str, false);
    }

    public Links regexLinks(RegexRule regexRule, boolean z) {
        return new Links().addByRegex((Element) doc(), regexRule, z);
    }

    public Links regexLinks(String str, boolean z) {
        return new Links().addByRegex((Element) doc(), str, z);
    }

    public Links regexLinks(RegexRule regexRule) {
        return regexLinks(regexRule, false);
    }

    public Links regexLinks(String str) {
        return regexLinks(str, false);
    }

    public ArrayList<String> selectTextList(String str) {
        ArrayList<String> arrayList = new ArrayList<>();
        Iterator it = select(str).iterator();
        while (it.hasNext()) {
            arrayList.add(((Element) it.next()).text());
        }
        return arrayList;
    }

    public String selectText(String str, int i) {
        return (String) ListUtils.getByIndex(selectTextList(str), i);
    }

    public String selectText(String str) {
        return select(str).first().text();
    }

    public ArrayList<Integer> selectIntList(String str) {
        ArrayList<Integer> arrayList = new ArrayList<>();
        Iterator<String> it = selectTextList(str).iterator();
        while (it.hasNext()) {
            arrayList.add(Integer.valueOf(it.next().trim()));
        }
        return arrayList;
    }

    public int selectInt(String str, int i) {
        return Integer.valueOf(selectText(str, i).trim()).intValue();
    }

    public int selectInt(String str) {
        return selectInt(str, 0);
    }

    public ArrayList<Double> selectDoubleList(String str) {
        ArrayList<Double> arrayList = new ArrayList<>();
        Iterator<String> it = selectTextList(str).iterator();
        while (it.hasNext()) {
            arrayList.add(Double.valueOf(it.next().trim()));
        }
        return arrayList;
    }

    public double selectDouble(String str, int i) {
        return Double.valueOf(selectText(str, i).trim()).doubleValue();
    }

    public double selectDouble(String str) {
        return selectDouble(str, 0);
    }

    public ArrayList<Long> selectLongList(String str) {
        ArrayList<Long> arrayList = new ArrayList<>();
        Iterator<String> it = selectTextList(str).iterator();
        while (it.hasNext()) {
            arrayList.add(Long.valueOf(it.next().trim()));
        }
        return arrayList;
    }

    public long selectLong(String str, int i) {
        return Long.valueOf(selectText(str, i).trim()).longValue();
    }

    public long selectLong(String str) {
        return selectLong(str, 0);
    }

    public Elements select(String str) {
        return doc().select(str);
    }

    public Element select(String str, int i) {
        Elements select = select(str);
        int i2 = i;
        if (i < 0) {
            i2 = select.size() + i;
        }
        return (Element) select.get(i2);
    }

    public String regex(String str, int i, String str2) {
        Matcher matcher = Pattern.compile(str).matcher(html());
        return matcher.find() ? matcher.group(i) : str2;
    }

    public String regex(String str, int i) {
        Matcher matcher = Pattern.compile(str).matcher(html());
        matcher.find();
        return matcher.group(i);
    }

    public String regexAndFormat(String str, String str2) {
        Matcher matcher = Pattern.compile(str).matcher(html());
        matcher.find();
        String[] strArr = new String[matcher.groupCount()];
        for (int i = 0; i < matcher.groupCount(); i++) {
            strArr[i] = matcher.group(i + 1);
        }
        return String.format(str2, strArr);
    }

    public String regex(String str, String str2) {
        return regex(str, 0, str2);
    }

    public String regex(String str) {
        return regex(str, 0);
    }

    public Page(CrawlDatum crawlDatum, Integer num, String str, byte[] bArr) {
        this.crawlDatum = null;
        this.code = null;
        this.content = null;
        this.crawlDatum = crawlDatum;
        this.code = num;
        this.contentType = str;
        this.content = bArr;
    }

    public byte[] content() {
        return this.content;
    }

    public void content(byte[] bArr) {
        this.content = bArr;
    }

    public String url() {
        return this.crawlDatum.url();
    }

    public String html() {
        if (this.html != null) {
            return this.html;
        }
        if (this.content == null) {
            return null;
        }
        if (this.charset == null) {
            this.charset = CharsetDetector.guessEncoding(content());
        }
        try {
            this.html = new String(this.content, this.charset);
            return this.html;
        } catch (UnsupportedEncodingException e) {
            LOG.info("Exception when decoding " + key(), e);
            return null;
        }
    }

    public void html(String str) {
        this.html = str;
    }

    public String contentType() {
        return this.contentType;
    }

    public Document doc() {
        if (this.doc != null) {
            return this.doc;
        }
        try {
            this.doc = Jsoup.parse(html(), url());
            return this.doc;
        } catch (Exception e) {
            LOG.info("Exception", e);
            return null;
        }
    }

    public void doc(Document document) {
        this.doc = document;
    }

    public Exception getException() {
        return this.exception;
    }

    public void setException(Exception exc) {
        this.exception = exc;
    }

    public CrawlDatum crawlDatum() {
        return this.crawlDatum;
    }

    public void crawlDatum(CrawlDatum crawlDatum) {
        this.crawlDatum = crawlDatum;
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public JsonObject meta() {
        return this.crawlDatum.meta();
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public String meta(String str) {
        return this.crawlDatum.meta(str);
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public int metaAsInt(String str) {
        return this.crawlDatum.metaAsInt(str);
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public boolean metaAsBoolean(String str) {
        return this.crawlDatum.metaAsBoolean(str);
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public double metaAsDouble(String str) {
        return this.crawlDatum.metaAsDouble(str);
    }

    @Override // cn.edu.hfut.dmic.webcollector.model.MetaGetter
    public long metaAsLong(String str) {
        return this.crawlDatum.metaAsLong(str);
    }

    public String charset() {
        if (this.charset == null) {
            this.charset = CharsetDetector.guessEncoding(content());
        }
        return this.charset;
    }

    public void charset(String str) {
        this.charset = str;
    }

    public String key() {
        return this.crawlDatum.key();
    }

    public void code(int i) {
        this.code = Integer.valueOf(i);
    }

    public int code() {
        return this.code.intValue();
    }

    public <T> T obj() {
        return (T) this.obj;
    }

    public void obj(Object obj) {
        this.obj = obj;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(JsonObject jsonObject) {
        this.crawlDatum.meta(jsonObject);
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(String str, String str2) {
        this.crawlDatum.meta(str, str2);
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(String str, int i) {
        this.crawlDatum.meta(str, i);
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(String str, boolean z) {
        this.crawlDatum.meta(str, z);
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(String str, double d) {
        this.crawlDatum.meta(str, d);
        return this;
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // cn.edu.hfut.dmic.webcollector.model.MetaSetter
    public Page meta(String str, long j) {
        this.crawlDatum.meta(str, j);
        return this;
    }
}
