package org.pageseeder.diffx.load.text;

import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.List;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
import org.pageseeder.diffx.config.WhiteSpaceProcessing;
import org.pageseeder.diffx.token.TextToken;
import org.pageseeder.diffx.token.XMLToken;
import org.pageseeder.diffx.token.impl.CharactersTokenBase;
import org.pageseeder.diffx.token.impl.IgnorableSpaceToken;
import org.pageseeder.diffx.token.impl.SpaceToken;

/* loaded from: classes.dex */
public final class TokenizerBySpaceWord implements TextTokenizer {

    /* renamed from: a, reason: collision with root package name */
    public final HashMap f12633a = new HashMap();

    /* renamed from: b, reason: collision with root package name */
    public final WhiteSpaceProcessing f12634b;

    public TokenizerBySpaceWord(WhiteSpaceProcessing whiteSpaceProcessing) {
        this.f12634b = whiteSpaceProcessing;
    }

    @Override // org.pageseeder.diffx.load.text.TextTokenizer
    public final List a(CharSequence charSequence) {
        if (charSequence == null) {
            throw new NullPointerException("Character sequence is null");
        }
        if (charSequence.length() == 0) {
            return Collections.EMPTY_LIST;
        }
        ArrayList arrayList = new ArrayList(charSequence.length() / 4);
        Matcher matcher = Pattern.compile("( ?[\\p{L}\\p{M}0-9_'@/$.-]*[\\p{L}\\p{M}0-9_%])|(\\S)|( ?[\"(][^ \\t\\r\\n\\f'\"()]+[\")])").matcher(charSequence);
        int i = 0;
        while (matcher.find()) {
            if (i != matcher.start()) {
                if (this.f12634b != WhiteSpaceProcessing.g) {
                    arrayList.add(b(charSequence.subSequence(i, matcher.start()).toString()));
                }
            }
            String charSequence2 = charSequence.subSequence(matcher.start(), matcher.end()).toString();
            HashMap hashMap = this.f12633a;
            XMLToken xMLToken = (TextToken) hashMap.get(charSequence2);
            if (xMLToken == null) {
                xMLToken = new CharactersTokenBase(charSequence2);
                hashMap.put(charSequence2, xMLToken);
            }
            arrayList.add(xMLToken);
            i = matcher.end();
        }
        if (i != charSequence.length()) {
            arrayList.add(b(charSequence.subSequence(i, charSequence.length()).toString()));
        }
        return arrayList;
    }

    public final TextToken b(String str) {
        HashMap hashMap = this.f12633a;
        TextToken textToken = (TextToken) hashMap.get(str);
        if (textToken == null) {
            textToken = this.f12634b == WhiteSpaceProcessing.h ? new IgnorableSpaceToken(str) : SpaceToken.d(str);
            hashMap.put(str, textToken);
        }
        return textToken;
    }
}
