package cn.edu.hfut.dmic.webcollector.fetcher;

import cn.edu.hfut.dmic.webcollector.generator.DbUpdater;
import cn.edu.hfut.dmic.webcollector.generator.Generator;
import cn.edu.hfut.dmic.webcollector.handler.Handler;
import cn.edu.hfut.dmic.webcollector.handler.Message;
import cn.edu.hfut.dmic.webcollector.model.Content;
import cn.edu.hfut.dmic.webcollector.model.CrawlDatum;
import cn.edu.hfut.dmic.webcollector.model.Page;
import cn.edu.hfut.dmic.webcollector.net.Request;
import cn.edu.hfut.dmic.webcollector.net.RequestFactory;
import cn.edu.hfut.dmic.webcollector.net.Response;
import cn.edu.hfut.dmic.webcollector.parser.Parser;
import cn.edu.hfut.dmic.webcollector.parser.ParserFactory;
import cn.edu.hfut.dmic.webcollector.util.Config;
import cn.edu.hfut.dmic.webcollector.util.HandlerUtils;
import cn.edu.hfut.dmic.webcollector.util.LogUtils;
import java.util.Collections;
import java.util.LinkedList;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
import java.util.concurrent.atomic.AtomicLong;

/* loaded from: input_file:cn/edu/hfut/dmic/webcollector/fetcher/Fetcher.class */
public class Fetcher {
    private AtomicInteger activeThreads;
    private AtomicInteger spinWaiting;
    private AtomicLong lastRequestStart;
    private QueueFeeder feeder;
    private FetchQueue fetchQueue;
    public static final int FETCH_SUCCESS = 1;
    public static final int FETCH_FAILED = 2;
    boolean running;
    public DbUpdater dbUpdater = null;
    public Handler handler = null;
    public RequestFactory requestFactory = null;
    public ParserFactory parserFactory = null;
    private int retry = 3;
    private boolean needUpdateDb = true;
    private int threads = 10;
    private boolean isContentStored = true;
    private boolean parsing = true;

    /* loaded from: input_file:cn/edu/hfut/dmic/webcollector/fetcher/Fetcher$FetchItem.class */
    public static class FetchItem {
        public CrawlDatum datum;

        public FetchItem(CrawlDatum crawlDatum) {
            this.datum = crawlDatum;
        }
    }

    /* loaded from: input_file:cn/edu/hfut/dmic/webcollector/fetcher/Fetcher$FetchQueue.class */
    public static class FetchQueue {
        public AtomicInteger totalSize = new AtomicInteger(0);
        public List<FetchItem> queue = Collections.synchronizedList(new LinkedList());

        public synchronized void clear() {
            this.queue.clear();
        }

        public int getSize() {
            return this.queue.size();
        }

        public void addFetchItem(FetchItem fetchItem) {
            if (fetchItem == null) {
                return;
            }
            this.queue.add(fetchItem);
            this.totalSize.incrementAndGet();
        }

        public synchronized FetchItem getFetchItem() {
            if (this.queue.size() == 0) {
                return null;
            }
            return this.queue.remove(0);
        }

        public synchronized void dump() {
            for (int i = 0; i < this.queue.size(); i++) {
                LogUtils.getLogger().info("  " + i + ". " + this.queue.get(i).datum.getUrl());
            }
        }
    }

    /* JADX INFO: Access modifiers changed from: private */
    /* loaded from: input_file:cn/edu/hfut/dmic/webcollector/fetcher/Fetcher$FetcherThread.class */
    public class FetcherThread extends Thread {
        private FetcherThread() {
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            Fetcher.this.activeThreads.incrementAndGet();
            while (true) {
                try {
                    try {
                        try {
                            FetchItem fetchItem = Fetcher.this.fetchQueue.getFetchItem();
                            if (fetchItem != null) {
                                Fetcher.this.lastRequestStart.set(System.currentTimeMillis());
                                CrawlDatum crawlDatum = new CrawlDatum();
                                String url = fetchItem.datum.getUrl();
                                crawlDatum.setUrl(url);
                                Request createRequest = Fetcher.this.requestFactory.createRequest(url);
                                Response response = null;
                                for (int i = 0; i <= Fetcher.this.retry; i++) {
                                    if (i > 0) {
                                        LogUtils.getLogger().info("retry " + i + "th " + url);
                                    }
                                    try {
                                        response = createRequest.getResponse(crawlDatum);
                                        break;
                                    } catch (Exception e) {
                                    }
                                }
                                crawlDatum.setStatus(2);
                                crawlDatum.setFetchTime(System.currentTimeMillis());
                                Page page = new Page();
                                page.setUrl(url);
                                page.setFetchTime(crawlDatum.getFetchTime());
                                if (response == null) {
                                    LogUtils.getLogger().info("failed " + url);
                                    HandlerUtils.sendMessage(Fetcher.this.handler, new Message(2, page), true);
                                } else {
                                    page.setResponse(response);
                                    LogUtils.getLogger().info("fetch " + url);
                                    String contentType = response.getContentType();
                                    if (Fetcher.this.parsing) {
                                        try {
                                            Parser createParser = Fetcher.this.parserFactory.createParser(url, contentType);
                                            if (createParser != null) {
                                                page.setParseResult(createParser.getParse(page));
                                            }
                                        } catch (Exception e2) {
                                            LogUtils.getLogger().info("Exception", e2);
                                        }
                                    }
                                    if (Fetcher.this.needUpdateDb) {
                                        try {
                                            Fetcher.this.dbUpdater.getSegmentWriter().wrtieFetch(crawlDatum);
                                            if (Fetcher.this.isContentStored) {
                                                Content content = new Content();
                                                content.setUrl(url);
                                                if (response.getContent() != null) {
                                                    content.setContent(response.getContent());
                                                } else {
                                                    content.setContent(new byte[0]);
                                                }
                                                content.setContentType(contentType);
                                                Fetcher.this.dbUpdater.getSegmentWriter().wrtieContent(content);
                                            }
                                            if (Fetcher.this.parsing && page.getParseResult() != null) {
                                                Fetcher.this.dbUpdater.getSegmentWriter().wrtieParse(page.getParseResult());
                                            }
                                        } catch (Exception e3) {
                                            LogUtils.getLogger().info("Exception", e3);
                                        }
                                    }
                                    HandlerUtils.sendMessage(Fetcher.this.handler, new Message(1, page), true);
                                }
                            } else {
                                if (!Fetcher.this.feeder.isAlive() && Fetcher.this.fetchQueue.getSize() <= 0) {
                                    return;
                                }
                                Fetcher.this.spinWaiting.incrementAndGet();
                                try {
                                    Thread.sleep(500L);
                                } catch (Exception e4) {
                                }
                                Fetcher.this.spinWaiting.decrementAndGet();
                            }
                        } catch (Exception e5) {
                            LogUtils.getLogger().info("Exception", e5);
                        }
                    } catch (Exception e6) {
                        LogUtils.getLogger().info("Exception", e6);
                        Fetcher.this.activeThreads.decrementAndGet();
                        return;
                    }
                } finally {
                    Fetcher.this.activeThreads.decrementAndGet();
                }
            }
        }
    }

    /* loaded from: input_file:cn/edu/hfut/dmic/webcollector/fetcher/Fetcher$QueueFeeder.class */
    public static class QueueFeeder extends Thread {
        public FetchQueue queue;
        public Generator generator;
        public int size;

        public QueueFeeder(FetchQueue fetchQueue, Generator generator, int i) {
            this.queue = fetchQueue;
            this.generator = generator;
            this.size = i;
        }

        @Override // java.lang.Thread, java.lang.Runnable
        public void run() {
            boolean z = true;
            while (z) {
                int size = this.size - this.queue.getSize();
                if (size <= 0) {
                    try {
                        Thread.sleep(1000L);
                    } catch (InterruptedException e) {
                    }
                } else {
                    while (size > 0 && z) {
                        CrawlDatum next = this.generator.next();
                        z = next != null;
                        if (z) {
                            this.queue.addFetchItem(new FetchItem(next));
                            size--;
                        }
                    }
                }
            }
        }
    }

    private void before() throws Exception {
        if (this.needUpdateDb) {
            try {
                if (this.dbUpdater.isLocked()) {
                    this.dbUpdater.merge();
                    this.dbUpdater.unlock();
                }
            } catch (Exception e) {
                e.printStackTrace();
            }
            this.dbUpdater.initSegmentWriter();
            this.dbUpdater.lock();
        }
        this.running = true;
    }

    public void fetchAll(Generator generator) throws Exception {
        before();
        this.lastRequestStart = new AtomicLong(System.currentTimeMillis());
        this.activeThreads = new AtomicInteger(0);
        this.spinWaiting = new AtomicInteger(0);
        this.fetchQueue = new FetchQueue();
        this.feeder = new QueueFeeder(this.fetchQueue, generator, 1000);
        this.feeder.start();
        for (int i = 0; i < this.threads; i++) {
            new FetcherThread().start();
        }
        do {
            try {
                Thread.sleep(1000L);
            } catch (InterruptedException e) {
            }
            LogUtils.getLogger().info("-activeThreads=" + this.activeThreads.get() + ", spinWaiting=" + this.spinWaiting.get() + ", fetchQueue.size=" + this.fetchQueue.getSize());
            if (!this.feeder.isAlive() && this.fetchQueue.getSize() < 5) {
                this.fetchQueue.dump();
            }
            if (System.currentTimeMillis() - this.lastRequestStart.get() > Config.requestMaxInterval) {
                LogUtils.getLogger().info("Aborting with " + this.activeThreads + " hung threads.");
                return;
            } else {
                if (this.activeThreads.get() <= 0) {
                    break;
                }
            }
        } while (this.running);
        this.feeder.stop();
        this.fetchQueue.clear();
        after();
    }

    public void stop() {
        this.running = false;
    }

    private void after() throws Exception {
        if (this.needUpdateDb) {
            this.dbUpdater.close();
            this.dbUpdater.merge();
            this.dbUpdater.unlock();
        }
    }

    public int getThreads() {
        return this.threads;
    }

    public void setThreads(int i) {
        this.threads = i;
    }

    public Handler getHandler() {
        return this.handler;
    }

    public void setHandler(Handler handler) {
        this.handler = handler;
    }

    public boolean getNeedUpdateDb() {
        return this.needUpdateDb;
    }

    public void setNeedUpdateDb(boolean z) {
        this.needUpdateDb = z;
    }

    public int getRetry() {
        return this.retry;
    }

    public void setRetry(int i) {
        this.retry = i;
    }

    public boolean isIsContentStored() {
        return this.isContentStored;
    }

    public void setIsContentStored(boolean z) {
        this.isContentStored = z;
    }

    public boolean isParsing() {
        return this.parsing;
    }

    public void setParsing(boolean z) {
        this.parsing = z;
    }

    public DbUpdater getDbUpdater() {
        return this.dbUpdater;
    }

    public void setDbUpdater(DbUpdater dbUpdater) {
        this.dbUpdater = dbUpdater;
    }

    public RequestFactory getRequestFactory() {
        return this.requestFactory;
    }

    public void setRequestFactory(RequestFactory requestFactory) {
        this.requestFactory = requestFactory;
    }

    public ParserFactory getParserFactory() {
        return this.parserFactory;
    }

    public void setParserFactory(ParserFactory parserFactory) {
        this.parserFactory = parserFactory;
    }
}
