package marytts.language.en;

import com.sun.speech.freetts.Token;
import com.sun.speech.freetts.Utterance;
import com.sun.speech.freetts.en.TokenizerImpl;
import com.sun.speech.freetts.en.us.USEnglish;
import java.util.ArrayList;
import java.util.Locale;
import marytts.datatypes.MaryData;
import marytts.datatypes.MaryDataType;
import marytts.language.en_US.datatypes.USEnglishDataTypes;
import marytts.modules.InternalModule;
import marytts.modules.synthesis.FreeTTSVoices;
import marytts.modules.synthesis.Voice;

/* loaded from: input_file:lib/marytts-lang-en-5.1-SNAPSHOT.jar:marytts/language/en/FreeTTSTextToTokens.class */
public class FreeTTSTextToTokens extends InternalModule {
    public FreeTTSTextToTokens() {
        super("TextToTokens", MaryDataType.TEXT, USEnglishDataTypes.FREETTS_TOKENS, Locale.ENGLISH);
    }

    @Override // marytts.modules.InternalModule, marytts.modules.MaryModule
    public void startup() throws Exception {
        super.startup();
        FreeTTSVoices.load();
    }

    @Override // marytts.modules.InternalModule, marytts.modules.MaryModule
    public MaryData process(MaryData maryData) throws Exception {
        String plainText = maryData.getPlainText();
        TokenizerImpl tokenizerImpl = new TokenizerImpl();
        tokenizerImpl.setWhitespaceSymbols(" \t\n\r");
        tokenizerImpl.setSingleCharSymbols(USEnglish.SINGLE_CHAR_SYMBOLS);
        tokenizerImpl.setPrepunctuationSymbols("\"'`({[");
        tokenizerImpl.setPostpunctuationSymbols("\"'`.,:;!?(){}[]");
        tokenizerImpl.setInputText(plainText);
        ArrayList arrayList = new ArrayList();
        Token token = null;
        boolean z = true;
        while (tokenizerImpl.hasMoreTokens()) {
            ArrayList arrayList2 = new ArrayList();
            if (token != null) {
                arrayList2.add(token);
                token = null;
            }
            while (tokenizerImpl.hasMoreTokens()) {
                Token nextToken = tokenizerImpl.getNextToken();
                if (nextToken.getWord().length() == 0 || arrayList2.size() > 500 || tokenizerImpl.isBreak()) {
                    token = nextToken;
                    break;
                }
                arrayList2.add(nextToken);
            }
            Voice defaultVoice = maryData.getDefaultVoice();
            if (defaultVoice == null || !defaultVoice.getLocale().equals(Locale.US)) {
                defaultVoice = Voice.getDefaultVoice(Locale.US);
            }
            Utterance utterance = new Utterance(FreeTTSVoices.getFreeTTSVoice(defaultVoice), arrayList2);
            utterance.setFirst(z);
            z = false;
            utterance.setLast(!tokenizerImpl.hasMoreTokens());
            arrayList.add(utterance);
        }
        MaryData maryData2 = new MaryData(outputType(), maryData.getLocale());
        maryData2.setUtterances(arrayList);
        return maryData2;
    }
}
