package org.apache.spark.ml.feature;

import java.io.IOException;
import org.apache.spark.annotation.Experimental;
import org.apache.spark.ml.Estimator;
import org.apache.spark.ml.Pipeline;
import org.apache.spark.ml.PipelineStage;
import org.apache.spark.ml.feature.RFormulaBase;
import org.apache.spark.ml.linalg.VectorUDT;
import org.apache.spark.ml.param.BooleanParam;
import org.apache.spark.ml.param.Param;
import org.apache.spark.ml.param.ParamMap;
import org.apache.spark.ml.param.ParamValidators$;
import org.apache.spark.ml.param.shared.HasFeaturesCol;
import org.apache.spark.ml.param.shared.HasHandleInvalid;
import org.apache.spark.ml.param.shared.HasLabelCol;
import org.apache.spark.ml.util.DefaultParamsWritable;
import org.apache.spark.ml.util.Identifiable$;
import org.apache.spark.ml.util.MLReader;
import org.apache.spark.ml.util.MLWritable;
import org.apache.spark.ml.util.MLWriter;
import org.apache.spark.sql.Dataset;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.DoubleType$;
import org.apache.spark.sql.types.StringType$;
import org.apache.spark.sql.types.StructField;
import org.apache.spark.sql.types.StructField$;
import org.apache.spark.sql.types.StructType;
import scala.MatchError;
import scala.Predef$;
import scala.StringContext;
import scala.Tuple2;
import scala.collection.Seq;
import scala.collection.Seq$;
import scala.collection.TraversableLike;
import scala.collection.TraversableOnce;
import scala.collection.immutable.Nil$;
import scala.collection.mutable.ArrayBuffer;
import scala.collection.mutable.ArrayBuffer$;
import scala.collection.mutable.Map;
import scala.collection.mutable.Map$;
import scala.reflect.ClassTag$;
import scala.reflect.ScalaSignature;
import scala.runtime.BooleanRef;
import scala.runtime.BoxedUnit;
import scala.runtime.BoxesRunTime;

/* compiled from: RFormula.scala */
@ScalaSignature(bytes = "\u0006\u0001\u0005Ue\u0001B\u0001\u0003\u00015\u0011\u0001B\u0015$pe6,H.\u0019\u0006\u0003\u0007\u0011\tqAZ3biV\u0014XM\u0003\u0002\u0006\r\u0005\u0011Q\u000e\u001c\u0006\u0003\u000f!\tQa\u001d9be.T!!\u0003\u0006\u0002\r\u0005\u0004\u0018m\u00195f\u0015\u0005Y\u0011aA8sO\u000e\u00011\u0003\u0002\u0001\u000f-e\u00012a\u0004\t\u0013\u001b\u0005!\u0011BA\t\u0005\u0005%)5\u000f^5nCR|'\u000f\u0005\u0002\u0014)5\t!!\u0003\u0002\u0016\u0005\ti!KR8s[Vd\u0017-T8eK2\u0004\"aE\f\n\u0005a\u0011!\u0001\u0004*G_JlW\u000f\\1CCN,\u0007C\u0001\u000e\u001e\u001b\u0005Y\"B\u0001\u000f\u0005\u0003\u0011)H/\u001b7\n\u0005yY\"!\u0006#fM\u0006,H\u000e\u001e)be\u0006l7o\u0016:ji\u0006\u0014G.\u001a\u0005\tA\u0001\u0011)\u0019!C!C\u0005\u0019Q/\u001b3\u0016\u0003\t\u0002\"aI\u0015\u000f\u0005\u0011:S\"A\u0013\u000b\u0003\u0019\nQa]2bY\u0006L!\u0001K\u0013\u0002\rA\u0013X\rZ3g\u0013\tQ3F\u0001\u0004TiJLgn\u001a\u0006\u0003Q\u0015B3aH\u00174!\tq\u0013'D\u00010\u0015\t\u0001d!\u0001\u0006b]:|G/\u0019;j_:L!AM\u0018\u0003\u000bMKgnY3\"\u0003Q\nQ!\r\u00186]AB\u0001B\u000e\u0001\u0003\u0002\u0003\u0006IAI\u0001\u0005k&$\u0007\u0005K\u00026[MBQ!\u000f\u0001\u0005\u0002i\na\u0001P5oSRtDCA\u001e=!\t\u0019\u0002\u0001C\u0003!q\u0001\u0007!\u0005K\u0002=[MB3\u0001O\u00174\u0011\u0015I\u0004\u0001\"\u0001A)\u0005Y\u0004fA .g!)1\t\u0001C\u0001\t\u0006Q1/\u001a;G_JlW\u000f\\1\u0015\u0005\u00153U\"\u0001\u0001\t\u000b\u001d\u0013\u0005\u0019\u0001\u0012\u0002\u000bY\fG.^3)\u0007\tk3\u0007C\u0003K\u0001\u0011\u00051*\u0001\ttKRD\u0015M\u001c3mK&sg/\u00197jIR\u0011Q\t\u0014\u0005\u0006\u000f&\u0003\rA\t\u0015\u0004\u00136r\u0015%A(\u0002\u000bIr3G\f\u0019\t\u000bE\u0003A\u0011\u0001*\u0002\u001dM,GOR3biV\u0014Xm]\"pYR\u0011Qi\u0015\u0005\u0006\u000fB\u0003\rA\t\u0015\u0004!6\u001a\u0004\"\u0002,\u0001\t\u00039\u0016aC:fi2\u000b'-\u001a7D_2$\"!\u0012-\t\u000b\u001d+\u0006\u0019\u0001\u0012)\u0007Uk3\u0007C\u0003\\\u0001\u0011\u0005A,\u0001\ntKR4uN]2f\u0013:$W\r\u001f'bE\u0016dGCA#^\u0011\u00159%\f1\u0001_!\t!s,\u0003\u0002aK\t9!i\\8mK\u0006t\u0007f\u0001..E\u0006\n1-A\u00033]Er\u0003\u0007C\u0003f\u0001\u0011\u0005a-A\rtKR\u001cFO]5oO&sG-\u001a=fe>\u0013H-\u001a:UsB,GCA#h\u0011\u00159E\r1\u0001#Q\r!WF\u0014\u0005\u0007U\u0002!\t\u0001B6\u0002\u0019!\f7/\u00138uKJ\u001cW\r\u001d;\u0016\u0003yCQ!\u001c\u0001\u0005B9\f1AZ5u)\t\u0011r\u000eC\u0003qY\u0002\u0007\u0011/A\u0004eCR\f7/\u001a;1\u0005IT\bcA:wq6\tAO\u0003\u0002v\r\u0005\u00191/\u001d7\n\u0005]$(a\u0002#bi\u0006\u001cX\r\u001e\t\u0003sjd\u0001\u0001B\u0005|_\u0006\u0005\t\u0011!B\u0001y\n\u0019q\fJ\u0019\u0012\u0007u\f\t\u0001\u0005\u0002%}&\u0011q0\n\u0002\b\u001d>$\b.\u001b8h!\r!\u00131A\u0005\u0004\u0003\u000b)#aA!os\"\"A.LA\u0005C\t\tY!A\u00033]Ar\u0003\u0007C\u0004\u0002\u0010\u0001!\t%!\u0005\u0002\u001fQ\u0014\u0018M\\:g_Jl7k\u00195f[\u0006$B!a\u0005\u0002 A!\u0011QCA\u000e\u001b\t\t9BC\u0002\u0002\u001aQ\fQ\u0001^=qKNLA!!\b\u0002\u0018\tQ1\u000b\u001e:vGR$\u0016\u0010]3\t\u0011\u0005\u0005\u0012Q\u0002a\u0001\u0003'\taa]2iK6\f\u0007\u0006BA\u0007[MBq!a\n\u0001\t\u0003\nI#\u0001\u0003d_BLHcA\u001e\u0002,!A\u0011QFA\u0013\u0001\u0004\ty#A\u0003fqR\u0014\u0018\r\u0005\u0003\u00022\u0005]RBAA\u001a\u0015\r\t)\u0004B\u0001\u0006a\u0006\u0014\u0018-\\\u0005\u0005\u0003s\t\u0019D\u0001\u0005QCJ\fW.T1qQ\u0011\t)#L\u001a\t\u000f\u0005}\u0002\u0001\"\u0011\u0002B\u0005AAo\\*ue&tw\rF\u0001#Q\u0015\ti$LA\u0005Q\r\u0001Qf\r\u0015\u0004\u0001\u0005%\u0003c\u0001\u0018\u0002L%\u0019\u0011QJ\u0018\u0003\u0019\u0015C\b/\u001a:j[\u0016tG/\u00197\b\u000f\u0005E#\u0001#\u0001\u0002T\u0005A!KR8s[Vd\u0017\rE\u0002\u0014\u0003+2a!\u0001\u0002\t\u0002\u0005]3\u0003CA+\u00033\ny&!\u001a\u0011\u0007\u0011\nY&C\u0002\u0002^\u0015\u0012a!\u00118z%\u00164\u0007\u0003\u0002\u000e\u0002bmJ1!a\u0019\u001c\u0005U!UMZ1vYR\u0004\u0016M]1ngJ+\u0017\rZ1cY\u0016\u00042\u0001JA4\u0013\r\tI'\n\u0002\r'\u0016\u0014\u0018.\u00197ju\u0006\u0014G.\u001a\u0005\bs\u0005UC\u0011AA7)\t\t\u0019\u0006\u0003\u0005\u0002r\u0005UC\u0011IA:\u0003\u0011aw.\u00193\u0015\u0007m\n)\bC\u0004\u0002x\u0005=\u0004\u0019\u0001\u0012\u0002\tA\fG\u000f\u001b\u0015\u0006\u0003_j\u0013\u0011\u0002\u0005\u000b\u0003{\n)&!A\u0005\n\u0005}\u0014a\u0003:fC\u0012\u0014Vm]8mm\u0016$\"!!!\u0011\t\u0005\r\u0015QR\u0007\u0003\u0003\u000bSA!a\"\u0002\n\u0006!A.\u00198h\u0015\t\tY)\u0001\u0003kCZ\f\u0017\u0002BAH\u0003\u000b\u0013aa\u00142kK\u000e$\b&BA+[\u0005%\u0001&BA([\u0005%\u0001")
@Experimental
/* loaded from: input_file:org/apache/spark/ml/feature/RFormula.class */
public class RFormula extends Estimator<RFormulaModel> implements RFormulaBase, DefaultParamsWritable {
    private final String uid;
    private final Param<String> formula;
    private final BooleanParam forceIndexLabel;
    private final Param<String> handleInvalid;
    private final Param<String> stringIndexerOrderType;
    private final Param<String> labelCol;
    private final Param<String> featuresCol;

    public static MLReader<RFormula> read() {
        return RFormula$.MODULE$.read();
    }

    public static RFormula load(String str) {
        return RFormula$.MODULE$.load(str);
    }

    @Override // org.apache.spark.ml.util.DefaultParamsWritable, org.apache.spark.ml.util.MLWritable
    public MLWriter write() {
        return DefaultParamsWritable.Cclass.write(this);
    }

    @Override // org.apache.spark.ml.util.MLWritable
    public void save(String str) throws IOException {
        MLWritable.Cclass.save(this, str);
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public Param<String> formula() {
        return this.formula;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public BooleanParam forceIndexLabel() {
        return this.forceIndexLabel;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase, org.apache.spark.ml.param.shared.HasHandleInvalid
    public final Param<String> handleInvalid() {
        return this.handleInvalid;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public final Param<String> stringIndexerOrderType() {
        return this.stringIndexerOrderType;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public void org$apache$spark$ml$feature$RFormulaBase$_setter_$formula_$eq(Param param) {
        this.formula = param;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public void org$apache$spark$ml$feature$RFormulaBase$_setter_$forceIndexLabel_$eq(BooleanParam booleanParam) {
        this.forceIndexLabel = booleanParam;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public final void org$apache$spark$ml$feature$RFormulaBase$_setter_$handleInvalid_$eq(Param param) {
        this.handleInvalid = param;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public final void org$apache$spark$ml$feature$RFormulaBase$_setter_$stringIndexerOrderType_$eq(Param param) {
        this.stringIndexerOrderType = param;
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public String getFormula() {
        return RFormulaBase.Cclass.getFormula(this);
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public boolean getForceIndexLabel() {
        return RFormulaBase.Cclass.getForceIndexLabel(this);
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public String getStringIndexerOrderType() {
        return RFormulaBase.Cclass.getStringIndexerOrderType(this);
    }

    @Override // org.apache.spark.ml.feature.RFormulaBase
    public boolean hasLabelCol(StructType structType) {
        return RFormulaBase.Cclass.hasLabelCol(this, structType);
    }

    @Override // org.apache.spark.ml.param.shared.HasHandleInvalid
    public void org$apache$spark$ml$param$shared$HasHandleInvalid$_setter_$handleInvalid_$eq(Param param) {
    }

    @Override // org.apache.spark.ml.param.shared.HasHandleInvalid
    public final String getHandleInvalid() {
        return HasHandleInvalid.Cclass.getHandleInvalid(this);
    }

    @Override // org.apache.spark.ml.param.shared.HasLabelCol
    public final Param<String> labelCol() {
        return this.labelCol;
    }

    @Override // org.apache.spark.ml.param.shared.HasLabelCol
    public final void org$apache$spark$ml$param$shared$HasLabelCol$_setter_$labelCol_$eq(Param param) {
        this.labelCol = param;
    }

    @Override // org.apache.spark.ml.param.shared.HasLabelCol
    public final String getLabelCol() {
        return HasLabelCol.Cclass.getLabelCol(this);
    }

    @Override // org.apache.spark.ml.param.shared.HasFeaturesCol
    public final Param<String> featuresCol() {
        return this.featuresCol;
    }

    @Override // org.apache.spark.ml.param.shared.HasFeaturesCol
    public final void org$apache$spark$ml$param$shared$HasFeaturesCol$_setter_$featuresCol_$eq(Param param) {
        this.featuresCol = param;
    }

    @Override // org.apache.spark.ml.param.shared.HasFeaturesCol
    public final String getFeaturesCol() {
        return HasFeaturesCol.Cclass.getFeaturesCol(this);
    }

    @Override // org.apache.spark.ml.util.Identifiable
    public String uid() {
        return this.uid;
    }

    public RFormula setFormula(String str) {
        return (RFormula) set((Param<Param<String>>) formula(), (Param<String>) str);
    }

    public RFormula setHandleInvalid(String str) {
        return (RFormula) set((Param<Param<String>>) handleInvalid(), (Param<String>) str);
    }

    public RFormula setFeaturesCol(String str) {
        return (RFormula) set((Param<Param<String>>) featuresCol(), (Param<String>) str);
    }

    public RFormula setLabelCol(String str) {
        return (RFormula) set((Param<Param<String>>) labelCol(), (Param<String>) str);
    }

    public RFormula setForceIndexLabel(boolean z) {
        return (RFormula) set((Param<BooleanParam>) forceIndexLabel(), (BooleanParam) BoxesRunTime.boxToBoolean(z));
    }

    public RFormula setStringIndexerOrderType(String str) {
        return (RFormula) set((Param<Param<String>>) stringIndexerOrderType(), (Param<String>) str);
    }

    public boolean hasIntercept() {
        Predef$.MODULE$.require(isDefined(formula()), new RFormula$$anonfun$hasIntercept$1(this));
        return RFormulaParser$.MODULE$.parse((String) $(formula())).hasIntercept();
    }

    /* JADX WARN: Can't rename method to resolve collision */
    @Override // org.apache.spark.ml.Estimator
    public RFormulaModel fit(Dataset<?> dataset) {
        transformSchema(dataset.schema(), true);
        Predef$.MODULE$.require(isDefined(formula()), new RFormula$$anonfun$fit$1(this));
        ResolvedRFormula resolve = RFormulaParser$.MODULE$.parse((String) $(formula())).resolve(dataset.schema());
        ArrayBuffer apply = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        ArrayBuffer apply2 = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        Map apply3 = Map$.MODULE$.apply(Nil$.MODULE$);
        ArrayBuffer apply4 = ArrayBuffer$.MODULE$.apply(Nil$.MODULE$);
        Seq seq = (Seq) resolve.terms().map(new RFormula$$anonfun$2(this, dataset, apply, apply2, apply3, apply4, ((TraversableOnce) ((TraversableLike) resolve.terms().flatten(Predef$.MODULE$.$conforms()).distinct()).map(new RFormula$$anonfun$1(this, dataset, apply, apply3, apply4), Seq$.MODULE$.canBuildFrom())).toMap(Predef$.MODULE$.$conforms()), BooleanRef.create(false)), Seq$.MODULE$.canBuildFrom());
        if (apply2.nonEmpty()) {
            Tuple2 unzip = Predef$.MODULE$.refArrayOps((Object[]) apply2.toArray(ClassTag$.MODULE$.apply(Tuple2.class))).unzip(Predef$.MODULE$.$conforms(), ClassTag$.MODULE$.apply(String.class), ClassTag$.MODULE$.apply(String.class));
            if (unzip == null) {
                throw new MatchError(unzip);
            }
            Tuple2 tuple2 = new Tuple2((String[]) unzip._1(), (String[]) unzip._2());
            String[] strArr = (String[]) tuple2._1();
            apply.$plus$eq(new OneHotEncoderEstimator(uid()).setInputCols(strArr).setOutputCols((String[]) tuple2._2()).setDropLast(true));
        } else {
            BoxedUnit boxedUnit = BoxedUnit.UNIT;
        }
        apply.$plus$eq(new VectorAssembler(uid()).setInputCols((String[]) seq.toArray(ClassTag$.MODULE$.apply(String.class))).setOutputCol((String) $(featuresCol())));
        apply.$plus$eq(new VectorAttributeRewriter((String) $(featuresCol()), apply3.toMap(Predef$.MODULE$.$conforms())));
        apply.$plus$eq(new ColumnPruner(apply4.toSet()));
        if (Predef$.MODULE$.refArrayOps(dataset.schema().fieldNames()).contains(resolve.label())) {
            DataType dataType = dataset.schema().apply(resolve.label()).dataType();
            StringType$ stringType$ = StringType$.MODULE$;
            if (dataType != null) {
                apply.$plus$eq(new StringIndexer().setInputCol(resolve.label()).setOutputCol((String) $(labelCol())).setHandleInvalid((String) $(handleInvalid())));
                return (RFormulaModel) copyValues(new RFormulaModel(uid(), resolve, new Pipeline(uid()).setStages((PipelineStage[]) apply.toArray(ClassTag$.MODULE$.apply(PipelineStage.class))).fit(dataset)).setParent(this), copyValues$default$2());
            }
            apply.$plus$eq(new StringIndexer().setInputCol(resolve.label()).setOutputCol((String) $(labelCol())).setHandleInvalid((String) $(handleInvalid())));
            return (RFormulaModel) copyValues(new RFormulaModel(uid(), resolve, new Pipeline(uid()).setStages((PipelineStage[]) apply.toArray(ClassTag$.MODULE$.apply(PipelineStage.class))).fit(dataset)).setParent(this), copyValues$default$2());
        }
        if (!BoxesRunTime.unboxToBoolean($(forceIndexLabel()))) {
            BoxedUnit boxedUnit2 = BoxedUnit.UNIT;
            return (RFormulaModel) copyValues(new RFormulaModel(uid(), resolve, new Pipeline(uid()).setStages((PipelineStage[]) apply.toArray(ClassTag$.MODULE$.apply(PipelineStage.class))).fit(dataset)).setParent(this), copyValues$default$2());
        }
        apply.$plus$eq(new StringIndexer().setInputCol(resolve.label()).setOutputCol((String) $(labelCol())).setHandleInvalid((String) $(handleInvalid())));
        return (RFormulaModel) copyValues(new RFormulaModel(uid(), resolve, new Pipeline(uid()).setStages((PipelineStage[]) apply.toArray(ClassTag$.MODULE$.apply(PipelineStage.class))).fit(dataset)).setParent(this), copyValues$default$2());
    }

    @Override // org.apache.spark.ml.PipelineStage
    public StructType transformSchema(StructType structType) {
        Predef$.MODULE$.require((hasLabelCol(structType) && BoxesRunTime.unboxToBoolean($(forceIndexLabel()))) ? false : true, new RFormula$$anonfun$transformSchema$1(this));
        return hasLabelCol(structType) ? new StructType((StructField[]) Predef$.MODULE$.refArrayOps(structType.fields()).$colon$plus(new StructField((String) $(featuresCol()), new VectorUDT(), true, StructField$.MODULE$.apply$default$4()), ClassTag$.MODULE$.apply(StructField.class))) : new StructType((StructField[]) Predef$.MODULE$.refArrayOps((Object[]) Predef$.MODULE$.refArrayOps(structType.fields()).$colon$plus(new StructField((String) $(featuresCol()), new VectorUDT(), true, StructField$.MODULE$.apply$default$4()), ClassTag$.MODULE$.apply(StructField.class))).$colon$plus(new StructField((String) $(labelCol()), DoubleType$.MODULE$, true, StructField$.MODULE$.apply$default$4()), ClassTag$.MODULE$.apply(StructField.class)));
    }

    @Override // org.apache.spark.ml.Estimator, org.apache.spark.ml.PipelineStage, org.apache.spark.ml.param.Params
    public RFormula copy(ParamMap paramMap) {
        return (RFormula) defaultCopy(paramMap);
    }

    @Override // org.apache.spark.ml.PipelineStage, org.apache.spark.ml.util.Identifiable
    public String toString() {
        return new StringContext(Predef$.MODULE$.wrapRefArray(new String[]{"RFormula(", ") (uid=", ")"})).s(Predef$.MODULE$.genericWrapArray(new Object[]{get(formula()).getOrElse(new RFormula$$anonfun$toString$1(this)), uid()}));
    }

    @Override // org.apache.spark.ml.Estimator
    public /* bridge */ /* synthetic */ RFormulaModel fit(Dataset dataset) {
        return fit((Dataset<?>) dataset);
    }

    public final String org$apache$spark$ml$feature$RFormula$$tmpColumn$1(String str, ArrayBuffer arrayBuffer) {
        String randomUID = Identifiable$.MODULE$.randomUID(str);
        arrayBuffer.$plus$eq(randomUID);
        return randomUID;
    }

    public RFormula(String str) {
        this.uid = str;
        HasFeaturesCol.Cclass.$init$(this);
        HasLabelCol.Cclass.$init$(this);
        org$apache$spark$ml$param$shared$HasHandleInvalid$_setter_$handleInvalid_$eq(new Param(this, "handleInvalid", "how to handle invalid entries. Options are skip (which will filter out rows with bad values), or error (which will throw an error). More options may be added later", ParamValidators$.MODULE$.inArray(new String[]{"skip", "error"})));
        RFormulaBase.Cclass.$init$(this);
        MLWritable.Cclass.$init$(this);
        DefaultParamsWritable.Cclass.$init$(this);
    }

    public RFormula() {
        this(Identifiable$.MODULE$.randomUID("rFormula"));
    }
}
