package com.mayabot.nlp.segment.plugins.pos;

import com.mayabot.nlp.common.FastStringBuilder;
import com.mayabot.nlp.perceptron.DATFeatureSetBuilder;
import com.mayabot.nlp.perceptron.FeatureSet;
import com.mayabot.nlp.perceptron.Perceptron;
import com.mayabot.nlp.perceptron.PerceptronTrainer;
import com.mayabot.nlp.perceptron.TrainSample;
import com.mayabot.nlp.segment.Nature;
import com.mayabot.nlp.segment.common.PerceptronUtilsKt;
import com.mayabot.nlp.segment.common.PkuWord;
import com.mayabot.nlp.utils.CharNormUtils;
import com.mayabot.t.google.common.base.Ascii;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.io.PrintStream;
import java.io.Reader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import kotlin.Metadata;
import kotlin.Pair;
import kotlin.TypeCastException;
import kotlin.Unit;
import kotlin.collections.CollectionsKt;
import kotlin.io.CloseableKt;
import kotlin.io.TextStreamsKt;
import kotlin.jvm.functions.Function1;
import kotlin.jvm.internal.Intrinsics;
import kotlin.ranges.RangesKt;
import kotlin.text.Charsets;
import kotlin.text.StringsKt;
import org.jetbrains.annotations.NotNull;

/* compiled from: POSPerceptron.kt */
@Metadata(mv = {1, 1, Ascii.CR}, bv = {1, 0, 3}, k = 1, d1 = {"��D\n\u0002\u0018\u0002\n\u0002\u0010��\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\n\u0002\u0010$\n\u0002\u0010\u000e\n\u0002\u0010\b\n\u0002\b\u0003\n\u0002\u0010 \n\u0002\u0018\u0002\n��\n\u0002\u0018\u0002\n��\n\u0002\u0010\u0002\n\u0002\b\u0002\n\u0002\u0018\u0002\n\u0002\b\u0005\u0018��2\u00020\u0001B\u0005¢\u0006\u0002\u0010\u0002J\u001a\u0010\u000f\u001a\b\u0012\u0004\u0012\u00020\u00110\u00102\f\u0010\u0012\u001a\b\u0012\u0004\u0012\u00020\u00130\u0010J\u0016\u0010\u0014\u001a\u00020\u00152\f\u0010\u0016\u001a\b\u0012\u0004\u0012\u00020\u00130\u0010H\u0002J&\u0010\u0017\u001a\u00020\u00182\u0006\u0010\u0019\u001a\u00020\u00132\u0006\u0010\u001a\u001a\u00020\u00132\u0006\u0010\u001b\u001a\u00020\f2\u0006\u0010\u001c\u001a\u00020\fR\u001a\u0010\u0003\u001a\u00020\u0004X\u0086.¢\u0006\u000e\n��\u001a\u0004\b\u0005\u0010\u0006\"\u0004\b\u0007\u0010\bR\u001d\u0010\t\u001a\u000e\u0012\u0004\u0012\u00020\u000b\u0012\u0004\u0012\u00020\f0\n¢\u0006\b\n��\u001a\u0004\b\r\u0010\u000e¨\u0006\u001d"}, d2 = {"Lcom/mayabot/nlp/segment/plugins/pos/POSPerceptronTrainer;", "", "()V", "featureSet", "Lcom/mayabot/nlp/perceptron/FeatureSet;", "getFeatureSet", "()Lcom/mayabot/nlp/perceptron/FeatureSet;", "setFeatureSet", "(Lcom/mayabot/nlp/perceptron/FeatureSet;)V", "labelMap", "", "", "", "getLabelMap", "()Ljava/util/Map;", "loadSamples", "", "Lcom/mayabot/nlp/perceptron/TrainSample;", "files", "Ljava/io/File;", "prepareFeatureSet", "", "corposFiles", "train", "Lcom/mayabot/nlp/segment/plugins/pos/POSPerceptron;", "trainFile", "evaluate", "maxIter", "threadNumber", "mynlp-segment"})
/* loaded from: input_file:com/mayabot/nlp/segment/plugins/pos/POSPerceptronTrainer.class */
public final class POSPerceptronTrainer {

    @NotNull
    public FeatureSet featureSet;

    @NotNull
    private final Map<String, Integer> labelMap;

    @NotNull
    public final FeatureSet getFeatureSet() {
        FeatureSet featureSet = this.featureSet;
        if (featureSet == null) {
            Intrinsics.throwUninitializedPropertyAccessException("featureSet");
        }
        return featureSet;
    }

    public final void setFeatureSet(@NotNull FeatureSet featureSet) {
        Intrinsics.checkParameterIsNotNull(featureSet, "<set-?>");
        this.featureSet = featureSet;
    }

    @NotNull
    public final Map<String, Integer> getLabelMap() {
        return this.labelMap;
    }

    @NotNull
    public final POSPerceptron train(@NotNull File file, @NotNull File file2, int i, int i2) {
        Intrinsics.checkParameterIsNotNull(file, "trainFile");
        Intrinsics.checkParameterIsNotNull(file2, "evaluate");
        List<File> allFiles = PerceptronUtilsKt.allFiles(file);
        prepareFeatureSet(allFiles);
        StringBuilder append = new StringBuilder().append("Feature Set Size ");
        FeatureSet featureSet = this.featureSet;
        if (featureSet == null) {
            Intrinsics.throwUninitializedPropertyAccessException("featureSet");
        }
        System.out.println((Object) append.append(featureSet.size()).toString());
        List<TrainSample> loadSamples = loadSamples(allFiles);
        List<TrainSample> loadSamples2 = Intrinsics.areEqual(file2, file) ? loadSamples : loadSamples(PerceptronUtilsKt.allFiles(file2));
        System.out.println((Object) "Start Train ... ");
        FeatureSet featureSet2 = this.featureSet;
        if (featureSet2 == null) {
            Intrinsics.throwUninitializedPropertyAccessException("featureSet");
        }
        Perceptron train = new PerceptronTrainer(featureSet2, this.labelMap.size(), loadSamples, new POSEvaluateRunner(loadSamples2), i, true).train(i2);
        System.out.println((Object) "--------------------");
        new POSEvaluateRunner(loadSamples2).run(0, train);
        List sorted = CollectionsKt.sorted(this.labelMap.keySet());
        if (sorted == null) {
            throw new TypeCastException("null cannot be cast to non-null type java.util.Collection<T>");
        }
        Object[] array = sorted.toArray(new String[0]);
        if (array == null) {
            throw new TypeCastException("null cannot be cast to non-null type kotlin.Array<T>");
        }
        return new POSPerceptron(train, (String[]) array);
    }

    /* JADX WARN: Type inference failed for: r0v1, types: [com.mayabot.nlp.segment.plugins.pos.POSPerceptronTrainer$loadSamples$1] */
    @NotNull
    public final List<TrainSample> loadSamples(@NotNull List<? extends File> list) {
        BufferedReader bufferedReader;
        Intrinsics.checkParameterIsNotNull(list, "files");
        ?? r0 = new Function1<List<? extends PkuWord>, TrainSample>() { // from class: com.mayabot.nlp.segment.plugins.pos.POSPerceptronTrainer$loadSamples$1
            @NotNull
            public final TrainSample invoke(@NotNull List<PkuWord> list2) {
                Intrinsics.checkParameterIsNotNull(list2, "line");
                FastStringBuilder fastStringBuilder = new FastStringBuilder(100);
                List<PkuWord> list3 = list2;
                ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(list3, 10));
                Iterator<T> it = list3.iterator();
                while (it.hasNext()) {
                    arrayList.add(((PkuWord) it.next()).getWord());
                }
                ArrayList arrayList2 = arrayList;
                List<PkuWord> list4 = list2;
                ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(list4, 10));
                Iterator<T> it2 = list4.iterator();
                while (it2.hasNext()) {
                    Integer num = POSPerceptronTrainer.this.getLabelMap().get(((PkuWord) it2.next()).getPos());
                    if (num == null) {
                        Intrinsics.throwNpe();
                    }
                    arrayList3.add(Integer.valueOf(num.intValue()));
                }
                int[] intArray = CollectionsKt.toIntArray(arrayList3);
                ArrayList arrayList4 = new ArrayList(arrayList2.size());
                int size = arrayList2.size();
                for (int i = 0; i < size; i++) {
                    arrayList4.add(POSPerceptronFeature.extractFeatureVector(arrayList2, arrayList2.size(), i, POSPerceptronTrainer.this.getFeatureSet(), fastStringBuilder));
                }
                return new TrainSample(arrayList4, intArray);
            }

            /* JADX INFO: Access modifiers changed from: package-private */
            {
                super(1);
            }
        };
        int i = 0;
        Iterator<T> it = list.iterator();
        while (it.hasNext()) {
            Reader inputStreamReader = new InputStreamReader(new FileInputStream((File) it.next()), Charsets.UTF_8);
            bufferedReader = inputStreamReader instanceof BufferedReader ? (BufferedReader) inputStreamReader : new BufferedReader(inputStreamReader, 8192);
            Throwable th = (Throwable) null;
            try {
                try {
                    Iterator it2 = TextStreamsKt.lineSequence(bufferedReader).iterator();
                    while (it2.hasNext()) {
                        if (!StringsKt.isBlank((String) it2.next())) {
                            i++;
                        }
                    }
                    Unit unit = Unit.INSTANCE;
                    CloseableKt.closeFinally(bufferedReader, th);
                } finally {
                }
            } finally {
            }
        }
        System.out.println((Object) ("Will load " + i + " Sample"));
        ArrayList arrayList = new ArrayList(i + 10);
        int i2 = 0;
        System.out.print("Load 0%");
        Iterator<T> it3 = list.iterator();
        while (it3.hasNext()) {
            Reader inputStreamReader2 = new InputStreamReader(new FileInputStream((File) it3.next()), Charsets.UTF_8);
            bufferedReader = inputStreamReader2 instanceof BufferedReader ? (BufferedReader) inputStreamReader2 : new BufferedReader(inputStreamReader2, 8192);
            Throwable th2 = (Throwable) null;
            try {
                try {
                    Iterator it4 = TextStreamsKt.lineSequence(bufferedReader).iterator();
                    while (it4.hasNext()) {
                        List<PkuWord> parseToFlatWords = PerceptronUtilsKt.parseToFlatWords((String) it4.next());
                        ArrayList arrayList2 = new ArrayList();
                        for (Object obj : parseToFlatWords) {
                            PkuWord pkuWord = (PkuWord) obj;
                            if ((pkuWord.getWord().length() > 0) && (Intrinsics.areEqual(pkuWord.getPos(), "") ^ true)) {
                                arrayList2.add(obj);
                            }
                        }
                        ArrayList<PkuWord> arrayList3 = arrayList2;
                        for (PkuWord pkuWord2 : arrayList3) {
                            String convert = CharNormUtils.convert(pkuWord2.getWord());
                            Intrinsics.checkExpressionValueIsNotNull(convert, "CharNormUtils.convert(it.word)");
                            pkuWord2.setWord(convert);
                        }
                        arrayList.add(r0.invoke(arrayList3));
                        i2++;
                        if (i2 % 100 == 0) {
                            PrintStream printStream = System.out;
                            StringBuilder append = new StringBuilder().append("\rLoad ");
                            Object[] objArr = {Double.valueOf((i2 * 100.0d) / arrayList.size())};
                            String format = String.format("%.2f", Arrays.copyOf(objArr, objArr.length));
                            Intrinsics.checkExpressionValueIsNotNull(format, "java.lang.String.format(this, *args)");
                            printStream.print(append.append(format).append('%').toString());
                        }
                    }
                    Unit unit2 = Unit.INSTANCE;
                    CloseableKt.closeFinally(bufferedReader, th2);
                } finally {
                }
            } finally {
            }
        }
        System.out.print("\r");
        return arrayList;
    }

    private final void prepareFeatureSet(List<? extends File> list) {
        System.out.println((Object) "开始构建POS FeatureSet");
        long currentTimeMillis = System.currentTimeMillis();
        final DATFeatureSetBuilder dATFeatureSetBuilder = new DATFeatureSetBuilder(this.labelMap.size());
        Consumer<String> consumer = new Consumer<String>() { // from class: com.mayabot.nlp.segment.plugins.pos.POSPerceptronTrainer$prepareFeatureSet$fit$1
            @Override // java.util.function.Consumer
            public final void accept(@NotNull String str) {
                Intrinsics.checkParameterIsNotNull(str, "f");
                DATFeatureSetBuilder.this.put(str);
            }
        };
        for (File file : list) {
            System.out.println((Object) file.getAbsolutePath());
            Reader inputStreamReader = new InputStreamReader(new FileInputStream(file), Charsets.UTF_8);
            BufferedReader bufferedReader = inputStreamReader instanceof BufferedReader ? (BufferedReader) inputStreamReader : new BufferedReader(inputStreamReader, 8192);
            Throwable th = (Throwable) null;
            try {
                try {
                    Iterator it = TextStreamsKt.lineSequence(bufferedReader).iterator();
                    while (it.hasNext()) {
                        List<PkuWord> parseToFlatWords = PerceptronUtilsKt.parseToFlatWords((String) it.next());
                        ArrayList arrayList = new ArrayList(CollectionsKt.collectionSizeOrDefault(parseToFlatWords, 10));
                        Iterator<T> it2 = parseToFlatWords.iterator();
                        while (it2.hasNext()) {
                            arrayList.add(CharNormUtils.convert(((PkuWord) it2.next()).getWord()));
                        }
                        ArrayList arrayList2 = arrayList;
                        ArrayList arrayList3 = new ArrayList();
                        for (Object obj : arrayList2) {
                            String str = (String) obj;
                            Intrinsics.checkExpressionValueIsNotNull(str, "it");
                            if (str.length() > 0) {
                                arrayList3.add(obj);
                            }
                        }
                        ArrayList arrayList4 = arrayList3;
                        int size = arrayList4.size();
                        for (int i = 0; i < size; i++) {
                            POSPerceptronFeature.extractFeature(arrayList4, arrayList4.size(), i, consumer);
                        }
                    }
                    Unit unit = Unit.INSTANCE;
                    CloseableKt.closeFinally(bufferedReader, th);
                } finally {
                }
            } catch (Throwable th2) {
                CloseableKt.closeFinally(bufferedReader, th);
                throw th2;
            }
        }
        System.out.println((Object) "Start build featureSet ...");
        this.featureSet = dATFeatureSetBuilder.build();
        System.out.println((Object) ("FeatureSet构建完成,用时" + (System.currentTimeMillis() - currentTimeMillis) + "ms"));
    }

    public POSPerceptronTrainer() {
        Nature[] values = Nature.values();
        ArrayList arrayList = new ArrayList();
        int length = values.length;
        for (int i = 0; i < length; i++) {
            Nature nature = values[i];
            if ((nature == Nature.newWord || nature == Nature.begin || nature == Nature.end) ? false : true) {
                arrayList.add(nature);
            }
        }
        ArrayList arrayList2 = arrayList;
        ArrayList arrayList3 = new ArrayList(CollectionsKt.collectionSizeOrDefault(arrayList2, 10));
        Iterator it = arrayList2.iterator();
        while (it.hasNext()) {
            arrayList3.add(((Nature) it.next()).name());
        }
        List sorted = CollectionsKt.sorted(arrayList3);
        HashMap hashMap = new HashMap(Nature.values().length * 3);
        for (Pair pair : CollectionsKt.zip(sorted, RangesKt.until(0, sorted.size()))) {
            hashMap.put(pair.getFirst(), pair.getSecond());
        }
        this.labelMap = hashMap;
    }
}
