/*
 * Decompiled with CFR 0.152.
 */
package org.ansj.app.phrase;

import java.util.ArrayList;
import java.util.Collections;
import java.util.Comparator;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.ansj.app.phrase.Occurrence;
import org.ansj.domain.Term;
import org.ansj.library.StopLibrary;
import org.ansj.recognition.impl.StopRecognition;
import org.ansj.splitWord.Analysis;
import org.ansj.splitWord.analysis.NlpAnalysis;
import org.nlpcn.commons.lang.util.CollectionUtil;
import org.nlpcn.commons.lang.util.MapCount;
import org.nlpcn.commons.lang.util.StringUtil;

public class PhraseExtractor {
    public static double DEFAULT_TERM_FREQUENCY = 10.24;
    public static int TERM_MAP_CAPACITY = 100000;
    public static int OCCURRENCE_MAP_CAPACITY = 100000;
    public static float FACTOR = 0.2f;
    public static int DEDUP_THRESHOLD = 2000;
    private Analysis analysis = new NlpAnalysis();
    private StopRecognition sr;
    private int length = 10;
    private int totalTerm;
    private final Map<String, Integer> termMap = new HashMap<String, Integer>(TERM_MAP_CAPACITY);
    private final Map<String, Occurrence> occurrenceMap = new HashMap<String, Occurrence>(OCCURRENCE_MAP_CAPACITY);

    public PhraseExtractor() {
        final StopRecognition defaultSR = StopLibrary.get();
        this.sr = new StopRecognition(){

            @Override
            public boolean filter(Term term) {
                String nature = term.getNatureStr();
                return StringUtil.isBlank((CharSequence)nature) || "null".equals(nature) || "w".equals(nature) || defaultSR != null && defaultSR.filter(term);
            }
        };
    }

    public PhraseExtractor setAnalysis(Analysis analysis) {
        this.analysis = analysis;
        return this;
    }

    public PhraseExtractor setStopRecognition(StopRecognition sr) {
        this.sr = sr;
        return this;
    }

    public PhraseExtractor setLength(int length) {
        this.length = length;
        return this;
    }

    public void fromText(String text) {
        ArrayList<Term> terms = new ArrayList<Term>();
        StringBuilder sb = new StringBuilder();
        for (List<Term> sentence : this.seg2sentence(text)) {
            this.totalTerm += sentence.size();
            int len = sentence.size();
            for (int i = 0; i < len; ++i) {
                for (int j = i; j < len; ++j) {
                    Occurrence occ;
                    String str = sentence.get(j).getRealName();
                    if (this.termMap.containsKey(str)) {
                        this.termMap.put(str, this.termMap.get(str) + 1);
                    } else {
                        this.addTerm(str);
                    }
                    sb.append(str);
                    if (this.length < sb.length()) break;
                    terms.add(sentence.get(j));
                    str = sb.toString();
                    Occurrence occurrence = occ = this.occurrenceMap.containsKey(str) ? this.occurrenceMap.get(str) : new Occurrence(new ArrayList<Term>(terms));
                    if (0 < i) {
                        occ.addLeftTerm(sentence.get(i - 1).getRealName());
                    }
                    if (j < len - 1) {
                        occ.addRightTerm(sentence.get(j + 1).getRealName());
                    }
                    occ.increaseFrequency();
                    if (occ.getFrequency() != 1) continue;
                    this.addOccurrence(str, occ);
                }
                terms.clear();
                sb.delete(0, sb.length());
            }
        }
    }

    private void addTerm(String t) {
        int capacity = (int)((float)TERM_MAP_CAPACITY * (1.0f + FACTOR));
        if (capacity <= this.termMap.size()) {
            List items = CollectionUtil.sortMapByValue(this.termMap, (int)1);
            for (Map.Entry item : items.subList(TERM_MAP_CAPACITY, items.size())) {
                this.termMap.remove(item.getKey());
            }
        }
        this.termMap.put(t, 1);
    }

    private void addOccurrence(String k, Occurrence occurrence) {
        int capacity = (int)((float)OCCURRENCE_MAP_CAPACITY * (1.0f + FACTOR));
        if (capacity <= this.occurrenceMap.size()) {
            this.calculateScore();
            ArrayList<Map.Entry<String, Occurrence>> ordered = new ArrayList<Map.Entry<String, Occurrence>>(this.occurrenceMap.entrySet());
            Collections.sort(ordered, new Comparator<Map.Entry<String, Occurrence>>(){

                @Override
                public int compare(Map.Entry<String, Occurrence> o1, Map.Entry<String, Occurrence> o2) {
                    return -Double.compare(o1.getValue().getScore(), o2.getValue().getScore());
                }
            });
            for (Map.Entry item : ordered.subList(OCCURRENCE_MAP_CAPACITY, ordered.size())) {
                this.occurrenceMap.remove(item.getKey());
            }
        }
        this.occurrenceMap.put(k, occurrence);
    }

    public List<Map.Entry<String, Occurrence>> nbest(int size) {
        this.calculateScore();
        HashSet<String> toRemove = new HashSet<String>();
        for (Map.Entry<String, Occurrence> entry : this.occurrenceMap.entrySet()) {
            Occurrence occ = entry.getValue();
            List<Term> terms = occ.getTerms();
            if ((terms.size() >= 2 || terms.get(0).isNewWord() || entry.getKey().length() >= 5) && entry.getKey().length() >= 2 && Double.compare(Math.max(occ.getLeftEntropy(), occ.getRightEntropy()), 0.0) > 0) continue;
            toRemove.add(entry.getKey());
        }
        this.occurrenceMap.keySet().removeAll(toRemove);
        toRemove.clear();
        ArrayList<Map.Entry<String, Occurrence>> entryList = new ArrayList<Map.Entry<String, Occurrence>>(this.occurrenceMap.entrySet());
        if (this.occurrenceMap.size() < DEDUP_THRESHOLD) {
            this.dedup(entryList, toRemove);
            this.occurrenceMap.keySet().removeAll(toRemove);
            entryList.clear();
            entryList.addAll(this.occurrenceMap.entrySet());
        }
        Collections.sort(entryList, new Comparator<Map.Entry<String, Occurrence>>(){

            @Override
            public int compare(Map.Entry<String, Occurrence> o1, Map.Entry<String, Occurrence> o2) {
                return -Double.compare(o1.getValue().getScore(), o2.getValue().getScore());
            }
        });
        ArrayList<Map.Entry<String, Occurrence>> phraseList = new ArrayList<Map.Entry<String, Occurrence>>(size);
        for (Map.Entry entry : entryList) {
            if (phraseList.size() == size) break;
            phraseList.add(entry);
        }
        return phraseList;
    }

    private void calculateScore() {
        double temp;
        Occurrence occ;
        Set<Map.Entry<String, Occurrence>> entries = this.occurrenceMap.entrySet();
        double totalPMI = 0.0;
        double totalLeftEntropy = 0.0;
        double totalRightEntropy = 0.0;
        MapCount frequencyMC = new MapCount();
        MapCount degreeMC = new MapCount();
        for (Map.Entry<String, Occurrence> entry : entries) {
            occ = entry.getValue();
            occ.setPmi(this.calculateMutualInformation(entry.getKey(), occ.getTerms()));
            totalPMI += occ.getPmi();
            occ.setLeftEntropy(this.calculateEntropy(occ.getLeftTerms()));
            totalLeftEntropy += occ.getLeftEntropy();
            occ.setRightEntropy(this.calculateEntropy(occ.getRightTerms()));
            totalRightEntropy += occ.getRightEntropy();
            temp = occ.getTerms().size() - 1;
            for (Term term : occ.getTerms()) {
                frequencyMC.add((Object)term.getRealName());
                degreeMC.add((Object)term.getRealName(), temp);
            }
        }
        HashMap scoreMap = new HashMap(frequencyMC.size());
        for (Map.Entry entry : frequencyMC.get().entrySet()) {
            scoreMap.put(entry.getKey(), ((Double)degreeMC.get().get(entry.getKey()) + (Double)entry.getValue()) / (Double)entry.getValue());
        }
        double d = 0.0;
        for (Map.Entry<String, Occurrence> entry : entries) {
            occ = entry.getValue();
            temp = 0.0;
            for (Term term : occ.getTerms()) {
                temp += ((Double)scoreMap.get(term.getRealName())).doubleValue();
            }
            occ.setScore(temp);
            d += temp;
        }
        for (Map.Entry<String, Occurrence> entry : entries) {
            occ = entry.getValue();
            occ.setScore(occ.getPmi() / totalPMI + occ.getLeftEntropy() / totalLeftEntropy + occ.getRightEntropy() / totalRightEntropy + occ.getScore() / d);
            this.calculateScore2(occ);
        }
    }

    private void calculateScore2(Occurrence occ) {
        String nature = occ.getTerms().get(0).getNatureStr();
        if ("c".equals(nature) || "p".equals(nature) || 'u' == nature.charAt(0)) {
            occ.setScore(occ.getScore() * 0.001);
        }
        if ("c".equals(nature = occ.getTerms().get(occ.getTerms().size() - 1).getNatureStr()) || "p".equals(nature) || 'u' == nature.charAt(0)) {
            occ.setScore(occ.getScore() * 0.001);
        }
    }

    private double calculateEntropy(MapCount<String> mc) {
        double totalFrequency = 0.0;
        Set entrySet = mc.get().entrySet();
        for (Map.Entry entry : entrySet) {
            totalFrequency += ((Double)entry.getValue()).doubleValue();
        }
        double entropy = 0.0;
        for (Map.Entry entry : entrySet) {
            double p = (Double)entry.getValue() / totalFrequency;
            entropy += -p * Math.log(p);
        }
        return entropy;
    }

    private double calculateMutualInformation(String phrase, List<Term> terms) {
        int size = terms.size();
        if (size == 1) {
            return -Math.log(this.getFrequency(terms.get(0).getRealName()) / (double)this.totalTerm);
        }
        double product = 1.0;
        for (Term term : terms) {
            product *= this.getFrequency(term.getRealName());
        }
        return Math.log((double)this.occurrenceMap.get(phrase).getFrequency() * Math.pow(this.totalTerm, size - 1)) - Math.log(product);
    }

    private double getFrequency(String t) {
        return this.termMap.containsKey(t) ? (double)this.termMap.get(t).intValue() : DEFAULT_TERM_FREQUENCY;
    }

    private void dedup(List<Map.Entry<String, Occurrence>> entryList, Set<String> toRemove) {
        int size = entryList.size();
        block0: for (int i = 0; i < size; ++i) {
            Map.Entry<String, Occurrence> e1 = entryList.get(i);
            double pmi1 = e1.getValue().getPmi();
            double entropy1 = (e1.getValue().getLeftEntropy() + e1.getValue().getRightEntropy()) / 2.0;
            for (int j = i + 1; j < size; ++j) {
                Map.Entry<String, Occurrence> e2 = entryList.get(j);
                double pmi2 = e2.getValue().getPmi();
                double entropy2 = (e2.getValue().getLeftEntropy() + e2.getValue().getRightEntropy()) / 2.0;
                if (e1.getKey().contains(e2.getKey())) {
                    if (0 > Double.compare(pmi1, pmi2) || 0 > Double.compare(entropy1, entropy2)) continue;
                    toRemove.add(entryList.get(j).getKey());
                    continue;
                }
                if (e2.getKey().contains(e1.getKey())) {
                    if (0 > Double.compare(pmi2, pmi1) || 0 > Double.compare(entropy2, entropy1)) continue;
                    toRemove.add(entryList.get(i).getKey());
                    continue block0;
                }
                if (!(0.75 < this.calculateCosineSimilarity(e1.getValue().getTerms(), e2.getValue().getTerms()))) continue;
                if (e2.getKey().length() <= e1.getKey().length() && 0 <= Double.compare(pmi1, pmi2) && 0 <= Double.compare(entropy1, entropy2)) {
                    toRemove.add(entryList.get(j).getKey());
                    continue;
                }
                if (e1.getKey().length() > e2.getKey().length() || 0 > Double.compare(pmi2, pmi1) || 0 > Double.compare(entropy2, entropy1)) continue;
                toRemove.add(entryList.get(i).getKey());
                continue block0;
            }
        }
    }

    private double calculateCosineSimilarity(List<Term> left, List<Term> right) {
        MapCount leftMC = new MapCount();
        for (Term t : left) {
            leftMC.add((Object)t.getRealName());
        }
        double d1 = 0.0;
        Iterator iterator = leftMC.get().values().iterator();
        while (iterator.hasNext()) {
            double value = (Double)iterator.next();
            d1 += Math.pow(value, 2.0);
        }
        if (Double.compare(d1, 0.0) <= 0) {
            return 0.0;
        }
        MapCount rightMC = new MapCount();
        for (Term t : right) {
            rightMC.add((Object)t.getRealName());
        }
        double d2 = 0.0;
        Iterator iterator2 = rightMC.get().values().iterator();
        while (iterator2.hasNext()) {
            double value = (Double)iterator2.next();
            d2 += Math.pow(value, 2.0);
        }
        if (Double.compare(d2, 0.0) <= 0) {
            return 0.0;
        }
        double dotProduct = 0.0;
        HashSet intersection = new HashSet(leftMC.get().keySet());
        intersection.retainAll(rightMC.get().keySet());
        for (String key : intersection) {
            dotProduct += (Double)leftMC.get().get(key) * (Double)rightMC.get().get(key);
        }
        return dotProduct / (Math.sqrt(d1) * Math.sqrt(d2));
    }

    private List<List<Term>> seg2sentence(String text) {
        List<String> sentenceList = this.toSentenceList(text);
        ArrayList<List<Term>> resultList = new ArrayList<List<Term>>(sentenceList.size());
        for (String sentence : sentenceList) {
            resultList.add(this.analysis.parseStr(sentence).recognition(this.sr).getTerms());
        }
        return resultList;
    }

    private List<String> toSentenceList(String content) {
        StringBuilder sb = new StringBuilder();
        LinkedList<String> sentences = new LinkedList<String>();
        int len = content.length();
        block5: for (int i = 0; i < len; ++i) {
            char ch = content.charAt(i);
            if (sb.length() == 0 && (Character.isWhitespace(ch) || ch == ' ')) continue;
            sb.append(ch);
            switch (ch) {
                case '.': {
                    if (i >= len - 1 || content.charAt(i + 1) <= '\u0080') continue block5;
                    this.insertIntoList(sb, sentences);
                    sb = new StringBuilder();
                    continue block5;
                }
                case '\u2026': {
                    if (i >= len - 1 || content.charAt(i + 1) != '\u2026') continue block5;
                    sb.append('\u2026');
                    ++i;
                    this.insertIntoList(sb, sentences);
                    sb = new StringBuilder();
                    continue block5;
                }
                case '\t': 
                case '\n': 
                case '\r': 
                case ' ': 
                case '!': 
                case ',': 
                case ';': 
                case '?': 
                case '\u00a0': 
                case '\u3002': 
                case '\uff01': 
                case '\uff0c': 
                case '\uff1b': 
                case '\uff1f': {
                    this.insertIntoList(sb, sentences);
                    sb = new StringBuilder();
                    continue block5;
                }
            }
        }
        if (sb.length() > 0) {
            this.insertIntoList(sb, sentences);
        }
        return sentences;
    }

    private void insertIntoList(StringBuilder sb, List<String> sentences) {
        String content = sb.toString().trim();
        if (content.length() > 0) {
            sentences.add(content);
        }
    }
}

