package org.carrot2.text.clustering;

import com.google.common.base.Function;
import com.google.common.base.Predicate;
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableListMultimap;
import com.google.common.collect.Iterators;
import com.google.common.collect.Lists;
import com.google.common.collect.Maps;
import com.google.common.collect.Multimaps;
import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;
import org.carrot2.core.Cluster;
import org.carrot2.core.Document;
import org.carrot2.core.LanguageCode;
import org.carrot2.core.attribute.Processing;
import org.carrot2.util.attribute.Attribute;
import org.carrot2.util.attribute.Bindable;
import org.carrot2.util.attribute.Input;
import org.carrot2.util.attribute.Required;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

@Bindable(prefix = "MultilingualClustering")
/* loaded from: input_file:org/carrot2/text/clustering/MultilingualClustering.class */
public class MultilingualClustering {
    private static final Logger logger = LoggerFactory.getLogger(MultilingualClustering.class);

    @Processing
    @Required
    @Input
    @Attribute
    public LanguageAggregationStrategy languageAggregationStrategy = LanguageAggregationStrategy.FLATTEN_MAJOR_LANGUAGE;

    @Processing
    @Required
    @Input
    @Attribute
    public LanguageCode defaultLanguage = LanguageCode.ENGLISH;

    /* loaded from: input_file:org/carrot2/text/clustering/MultilingualClustering$LanguageAggregationStrategy.class */
    public enum LanguageAggregationStrategy {
        FLATTEN_ALL("Flatten clusters from all languages"),
        FLATTEN_MAJOR_LANGUAGE("Flatten clusters from the majority language"),
        FLATTEN_NONE("Dedicated parent cluster for each language");

        private String label;

        LanguageAggregationStrategy(String str) {
            this.label = str;
        }

        @Override // java.lang.Enum
        public String toString() {
            return this.label;
        }
    }

    public List<Cluster> process(List<Document> list, IMonolingualClusteringAlgorithm iMonolingualClusteringAlgorithm) {
        if (list.isEmpty()) {
            return Lists.newArrayList();
        }
        Map<LanguageCode, Cluster> clusterByLanguage = clusterByLanguage(list, iMonolingualClusteringAlgorithm);
        ArrayList newArrayList = Lists.newArrayList(clusterByLanguage.values());
        if (LanguageAggregationStrategy.FLATTEN_ALL.equals(this.languageAggregationStrategy) || clusterByLanguage.keySet().size() == 1) {
            ArrayList newArrayList2 = Lists.newArrayList();
            Iterator it = newArrayList.iterator();
            while (it.hasNext()) {
                for (Cluster cluster : ((Cluster) it.next()).getSubclusters()) {
                    if (!cluster.isOtherTopics()) {
                        newArrayList2.add(cluster);
                    }
                }
            }
            Cluster.appendOtherTopics(list, newArrayList2);
            return newArrayList2;
        }
        Collections.sort(newArrayList, Collections.reverseOrder(Cluster.BY_SIZE_COMPARATOR));
        if (!LanguageAggregationStrategy.FLATTEN_MAJOR_LANGUAGE.equals(this.languageAggregationStrategy)) {
            return newArrayList;
        }
        Iterator it2 = newArrayList.iterator();
        Cluster cluster2 = null;
        try {
            cluster2 = (Cluster) Iterators.find(it2, new Predicate<Cluster>() { // from class: org.carrot2.text.clustering.MultilingualClustering.1
                @Override // com.google.common.base.Predicate
                public boolean apply(Cluster cluster3) {
                    return !cluster3.getSubclusters().isEmpty();
                }
            });
        } catch (NoSuchElementException e) {
        }
        if (cluster2 == null) {
            return newArrayList;
        }
        it2.remove();
        ArrayList newArrayList3 = Lists.newArrayList();
        newArrayList3.addAll(cluster2.getSubclusters());
        Cluster cluster3 = new Cluster("Other Languages", new Document[0]);
        cluster3.addSubclusters(newArrayList);
        newArrayList3.add(cluster3);
        return newArrayList3;
    }

    private Map<LanguageCode, Cluster> clusterByLanguage(List<Document> list, IMonolingualClusteringAlgorithm iMonolingualClusteringAlgorithm) {
        ImmutableListMultimap index = Multimaps.index(list, new Function<Document, String>() { // from class: org.carrot2.text.clustering.MultilingualClustering.2
            @Override // com.google.common.base.Function
            public String apply(Document document) {
                LanguageCode language = document.getLanguage();
                return language != null ? language.name() : "";
            }
        });
        HashMap newHashMap = Maps.newHashMap();
        Iterator it = index.keySet().iterator();
        while (it.hasNext()) {
            String str = (String) it.next();
            ImmutableList immutableList = index.get((ImmutableListMultimap) str);
            LanguageCode valueOf = str.equals("") ? null : LanguageCode.valueOf(str);
            Cluster cluster = new Cluster(valueOf != null ? valueOf.toString() : "Unknown Language", new Document[0]);
            LanguageCode languageCode = valueOf != null ? valueOf : this.defaultLanguage;
            logger.debug("Performing monolingual clustering in: " + languageCode);
            List<Cluster> process = iMonolingualClusteringAlgorithm.process(immutableList, languageCode);
            if (process.size() == 0 || (process.size() == 1 && process.get(0).isOtherTopics())) {
                cluster.addDocuments(immutableList);
            } else {
                cluster.addSubclusters(process);
            }
            newHashMap.put(valueOf, cluster);
        }
        return newHashMap;
    }
}
