package marytts.language.de.preprocess;

import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.regex.Pattern;
import marytts.datatypes.MaryXML;
import marytts.util.MaryUtils;
import marytts.util.dom.MaryDomUtils;
import org.apache.log4j.Level;
import org.apache.log4j.Logger;
import org.w3c.dom.Document;
import org.w3c.dom.Element;

/* loaded from: input_file:lib/marytts-lang-de-5.1-SNAPSHOT.jar:marytts/language/de/preprocess/MultiWordEP.class */
public class MultiWordEP extends ExpansionPattern {
    private final String[] _knownTypes = {"multiword"};
    private final List<String> knownTypes = Arrays.asList(this._knownTypes);
    private final Pattern reMatchingChars = null;
    private static final Map<String, String> multiWordDict = new HashMap();
    private static final Set<String> constituentWordSet = new HashSet();
    private static final Logger logger = MaryUtils.getLogger("MultiWordEP");

    static {
        try {
            loadMultiWordDict();
        } catch (FileNotFoundException e) {
            logger.warn("Could not load abbreviation file", e);
        } catch (IOException e2) {
            logger.warn("Could not load abbreviation file", e2);
        }
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    public List<String> knownTypes() {
        return this.knownTypes;
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    public Pattern reMatchingChars() {
        return this.reMatchingChars;
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected boolean isCandidate(Element element) {
        return constituentWordSet.contains(MaryDomUtils.tokenText(element));
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected int canDealWith(String str, int i) {
        return match(str, i);
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected int match(String str, int i) {
        if (str.length() > 0) {
            return i;
        }
        return -1;
    }

    @Override // marytts.language.de.preprocess.ExpansionPattern
    protected List<Element> expand(List<Element> list, String str, int i) {
        if (list == null) {
            throw new NullPointerException("Received null argument");
        }
        if (list.isEmpty()) {
            throw new IllegalArgumentException("Received empty list");
        }
        List<Element> arrayList = new ArrayList<>();
        ArrayList arrayList2 = new ArrayList(list);
        StringBuilder sb = new StringBuilder();
        String str2 = null;
        while (true) {
            if (arrayList2.isEmpty()) {
                break;
            }
            sb.setLength(0);
            Iterator it = arrayList2.iterator();
            while (it.hasNext()) {
                sb.append(MaryDomUtils.tokenText((Element) it.next()));
                sb.append(" ");
            }
            String trim = sb.toString().trim();
            logger.debug("Looking up multiword in dictionary: `" + trim + "'");
            if (multiWordDict.containsKey(trim)) {
                str2 = trim;
                break;
            }
            arrayList2.remove(arrayList2.size() - 1);
        }
        if (str2 != null) {
            arrayList.addAll(dictionaryExpandMultiWord(arrayList2, str2));
            logger.debug("Have found multiword in dictionary: `" + str2 + "'");
        }
        if (logger.getEffectiveLevel().equals(Level.DEBUG)) {
            StringBuilder sb2 = new StringBuilder();
            for (Element element : arrayList) {
                if (element.getTagName().equals(MaryXML.TOKEN)) {
                    sb2.append(MaryDomUtils.tokenText(element));
                } else {
                    sb2.append(element.getTagName());
                }
                sb2.append(" ");
            }
            logger.debug("Expanded multiword: " + sb2.toString());
        }
        if (!arrayList.isEmpty()) {
            replaceTokens(arrayList2, arrayList);
        }
        return arrayList;
    }

    private List<Element> dictionaryExpandMultiWord(List<Element> list, String str) {
        Document ownerDocument = list.get(0).getOwnerDocument();
        ArrayList arrayList = new ArrayList();
        arrayList.addAll(makeNewTokens(ownerDocument, multiWordDict.get(str), true, str));
        return arrayList;
    }

    private static void loadMultiWordDict() throws FileNotFoundException, IOException {
        BufferedReader bufferedReader = new BufferedReader(new InputStreamReader(MultiWordEP.class.getResourceAsStream("multiword.dat"), "UTF-8"));
        while (true) {
            String readLine = bufferedReader.readLine();
            if (readLine == null) {
                return;
            }
            if (!Pattern.compile("^\\#").matcher(readLine).find() && !REPattern.emptyLine.matcher(readLine).find()) {
                StringTokenizer stringTokenizer = new StringTokenizer(readLine, "/");
                String trim = stringTokenizer.nextToken().trim();
                String trim2 = stringTokenizer.nextToken().trim();
                String replaceAll = trim.replaceAll("\\s+", " ");
                multiWordDict.put(replaceAll, trim2.replaceAll("\\s+", " "));
                constituentWordSet.addAll(Arrays.asList(replaceAll.split(" ")));
            }
        }
    }
}
