package dev.langchain4j.data.document.splitter;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.segment.TextSegment;
import dev.langchain4j.internal.Utils;
import dev.langchain4j.internal.ValidationUtils;
import dev.langchain4j.model.Tokenizer;
import java.util.ArrayList;
import java.util.Iterator;
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;

/* loaded from: input_file:dev/langchain4j/data/document/splitter/HierarchicalDocumentSplitter.class */
public abstract class HierarchicalDocumentSplitter implements DocumentSplitter {
    private static final String INDEX = "index";
    protected final int maxSegmentSize;
    protected final int maxOverlapSize;
    protected final Tokenizer tokenizer;
    protected final DocumentSplitter subSplitter;

    protected HierarchicalDocumentSplitter(int i, int i2) {
        this(i, i2, null, null);
    }

    protected HierarchicalDocumentSplitter(int i, int i2, HierarchicalDocumentSplitter hierarchicalDocumentSplitter) {
        this(i, i2, null, hierarchicalDocumentSplitter);
    }

    protected HierarchicalDocumentSplitter(int i, int i2, Tokenizer tokenizer) {
        this(i, i2, tokenizer, null);
    }

    /* JADX INFO: Access modifiers changed from: protected */
    public HierarchicalDocumentSplitter(int i, int i2, Tokenizer tokenizer, DocumentSplitter documentSplitter) {
        this.maxSegmentSize = ValidationUtils.ensureGreaterThanZero(Integer.valueOf(i), "maxSegmentSize");
        this.maxOverlapSize = ValidationUtils.ensureBetween(Integer.valueOf(i2), 0, this.maxSegmentSize, "maxOverlapSize");
        this.tokenizer = tokenizer;
        this.subSplitter = documentSplitter == null ? defaultSubSplitter() : documentSplitter;
    }

    protected abstract String[] split(String str);

    protected abstract String joinDelimiter();

    protected abstract DocumentSplitter defaultSubSplitter();

    public List<TextSegment> split(Document document) {
        ValidationUtils.ensureNotNull(document, "document");
        ArrayList arrayList = new ArrayList();
        SegmentBuilder segmentBuilder = new SegmentBuilder(this.maxSegmentSize, this::sizeOf, joinDelimiter());
        AtomicInteger atomicInteger = new AtomicInteger(0);
        String str = null;
        for (String str2 : split(document.text())) {
            if (segmentBuilder.hasSpaceFor(str2)) {
                segmentBuilder.append(str2);
            } else {
                if (segmentBuilder.isNotEmpty() && !segmentBuilder.build().equals(str)) {
                    String build = segmentBuilder.build();
                    arrayList.add(createSegment(build, document, atomicInteger.getAndIncrement()));
                    segmentBuilder.reset();
                    str = overlapFrom(build);
                    segmentBuilder.append(str);
                }
                if (segmentBuilder.hasSpaceFor(str2)) {
                    segmentBuilder.append(str2);
                } else {
                    if (this.subSplitter == null) {
                        Object[] objArr = new Object[5];
                        objArr[0] = Utils.firstChars(str2, 30);
                        objArr[1] = Integer.valueOf(sizeOf(str2));
                        objArr[2] = this.tokenizer == null ? "characters" : "tokens";
                        objArr[3] = Integer.valueOf(this.maxSegmentSize);
                        objArr[4] = this.tokenizer == null ? "characters" : "tokens";
                        throw new RuntimeException(String.format("The text \"%s...\" (%s %s long) doesn't fit into the maximum segment size (%s %s), and there is no subSplitter defined to split it further.", objArr));
                    }
                    segmentBuilder.append(str2);
                    Iterator it = this.subSplitter.split(Document.from(segmentBuilder.build())).iterator();
                    while (it.hasNext()) {
                        arrayList.add(createSegment(((TextSegment) it.next()).text(), document, atomicInteger.getAndIncrement()));
                    }
                    segmentBuilder.reset();
                    str = overlapFrom(((TextSegment) arrayList.get(arrayList.size() - 1)).text());
                    segmentBuilder.append(str);
                }
            }
        }
        if (segmentBuilder.isNotEmpty() && !segmentBuilder.build().equals(str)) {
            arrayList.add(createSegment(segmentBuilder.build(), document, atomicInteger.getAndIncrement()));
        }
        return arrayList;
    }

    private String overlapFrom(String str) {
        if (this.maxOverlapSize == 0) {
            return "";
        }
        SegmentBuilder segmentBuilder = new SegmentBuilder(this.maxOverlapSize, this::sizeOf, joinDelimiter());
        String[] split = new DocumentBySentenceSplitter(1, 0, null, null).split(str);
        for (int length = split.length - 1; length >= 0; length--) {
            String str2 = split[length];
            if (!segmentBuilder.hasSpaceFor(str2)) {
                return segmentBuilder.build();
            }
            segmentBuilder.prepend(str2);
        }
        return "";
    }

    private int sizeOf(String str) {
        return this.tokenizer != null ? this.tokenizer.estimateTokenCountInText(str) : str.length();
    }

    private static TextSegment createSegment(String str, Document document, int i) {
        return TextSegment.from(str, document.metadata().copy().add(INDEX, Integer.valueOf(i)));
    }
}
