package cn.edu.hfut.dmic.webcollector.generator;

import cn.edu.hfut.dmic.webcollector.fetcher.FSSegmentWriter;
import cn.edu.hfut.dmic.webcollector.fetcher.SegmentUtils;
import cn.edu.hfut.dmic.webcollector.fetcher.SegmentWriter;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.Link;
import cn.edu.hfut.dmic.webcollector.parser.ParseData;
import cn.edu.hfut.dmic.webcollector.util.Config;
import cn.edu.hfut.dmic.webcollector.util.FileUtils;
import cn.edu.hfut.dmic.webcollector.util.LogUtils;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/generator/FSDbUpdater.class */
public class FSDbUpdater implements DbUpdater {
    private SegmentWriter segmentWriter = null;
    private String crawlPath;
    private String segmentName;

    public FSDbUpdater(String str) {
        this.crawlPath = str;
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public String getLastSegmentName() {
        String[] list = new File(this.crawlPath, "segments").list();
        if (list == null) {
            return null;
        }
        String str = null;
        long j = 0;
        for (String str2 : list) {
            long longValue = Long.valueOf(str2).longValue();
            if (longValue > j) {
                j = longValue;
                str = str2;
            }
        }
        return str;
    }

    public void backup() throws IOException {
        LogUtils.getLogger().info("backup " + getCrawlPath());
        FileUtils.copy(new File(this.crawlPath, Config.current_info_path), new File(this.crawlPath, Config.old_info_path));
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public boolean isLocked() throws IOException {
        File file = new File(this.crawlPath + "/" + Config.lock_path);
        if (file.exists()) {
            return new String(FileUtils.readFile(file), "utf-8").equals("1");
        }
        return false;
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void lock() throws IOException {
        FileUtils.writeFile(this.crawlPath + "/" + Config.lock_path, "1".getBytes("utf-8"));
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void unlock() throws IOException {
        FileUtils.writeFile(this.crawlPath + "/" + Config.lock_path, "0".getBytes("utf-8"));
    }

    private void updateAll(ArrayList<CrawlDatum> arrayList) throws IOException {
        File file = new File(this.crawlPath, Config.current_info_path);
        if (!file.getParentFile().exists()) {
            file.getParentFile().mkdirs();
        }
        DbWriter dbWriter = new DbWriter(CrawlDatum.class, file);
        Iterator<CrawlDatum> it = arrayList.iterator();
        while (it.hasNext()) {
            dbWriter.write(it.next());
        }
        dbWriter.close();
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void close() throws Exception {
        if (this.segmentWriter != null) {
            this.segmentWriter.close();
        }
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void merge() throws IOException {
        if (this.segmentName == null) {
            this.segmentName = getLastSegmentName();
        }
        if (this.segmentName == null) {
            return;
        }
        try {
            backup();
        } catch (IOException e) {
            LogUtils.getLogger().info("Exception", e);
        }
        LogUtils.getLogger().info("merge " + getSegmentPath());
        File file = new File(getSegmentPath(), "fetch/info.avro");
        if (file.exists()) {
            DbReader dbReader = new DbReader(CrawlDatum.class, new File(this.crawlPath, Config.current_info_path));
            DbReader dbReader2 = new DbReader(CrawlDatum.class, file);
            HashMap hashMap = new HashMap();
            ArrayList<CrawlDatum> arrayList = new ArrayList<>();
            while (dbReader.hasNext()) {
                CrawlDatum crawlDatum = (CrawlDatum) dbReader.readNext();
                arrayList.add(crawlDatum);
                hashMap.put(crawlDatum.getUrl(), Integer.valueOf(arrayList.size() - 1));
            }
            while (dbReader2.hasNext()) {
                CrawlDatum crawlDatum2 = (CrawlDatum) dbReader2.readNext();
                if (!hashMap.containsKey(crawlDatum2.getUrl())) {
                    arrayList.add(crawlDatum2);
                    hashMap.put(crawlDatum2.getUrl(), Integer.valueOf(arrayList.size() - 1));
                } else if (crawlDatum2.getStatus() != 1) {
                    int intValue = ((Integer) hashMap.get(crawlDatum2.getUrl())).intValue();
                    arrayList.set(intValue, crawlDatum2);
                    hashMap.put(crawlDatum2.getUrl(), Integer.valueOf(intValue));
                }
            }
            dbReader2.close();
            File file2 = new File(getSegmentPath(), "parse_data/info.avro");
            if (file2.exists()) {
                DbReader dbReader3 = new DbReader(ParseData.class, file2);
                while (dbReader3.hasNext()) {
                    Iterator<Link> it = ((ParseData) dbReader3.readNext()).getLinks().iterator();
                    while (it.hasNext()) {
                        Link next = it.next();
                        CrawlDatum crawlDatum3 = new CrawlDatum();
                        crawlDatum3.setUrl(next.getUrl());
                        crawlDatum3.setStatus(1);
                        if (!hashMap.containsKey(crawlDatum3.getUrl())) {
                            arrayList.add(crawlDatum3);
                            hashMap.put(crawlDatum3.getUrl(), Integer.valueOf(arrayList.size() - 1));
                        }
                    }
                }
                dbReader3.close();
            }
            dbReader.close();
            updateAll(arrayList);
        }
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public SegmentWriter getSegmentWriter() {
        return this.segmentWriter;
    }

    public String getSegmentPath() {
        return this.crawlPath + "/segments/" + this.segmentName;
    }

    public String getCrawlPath() {
        return this.crawlPath;
    }

    public void setCrawlPath(String str) {
        this.crawlPath = str;
    }

    public String getSegmentName() {
        return this.segmentName;
    }

    public void setSegmentName(String str) {
        this.segmentName = str;
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void clearHistory() {
        File file = new File(this.crawlPath);
        LogUtils.getLogger().info("clear " + file.getAbsolutePath());
        if (file.exists()) {
            FileUtils.deleteDir(file);
        }
    }

    @Override // cn.edu.hfut.dmic.webcollector.generator.DbUpdater
    public void initSegmentWriter() throws Exception {
        this.segmentName = SegmentUtils.createSegmengName();
        this.segmentWriter = new FSSegmentWriter(this.crawlPath, getSegmentPath());
    }
}
