/*
 * Decompiled with CFR 0.152.
 */
package org.htmlcleaner;

import java.io.BufferedReader;
import java.io.IOException;
import java.io.Reader;
import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
import org.htmlcleaner.BaseToken;
import org.htmlcleaner.CleanerProperties;
import org.htmlcleaner.CommentToken;
import org.htmlcleaner.ContentToken;
import org.htmlcleaner.DoctypeToken;
import org.htmlcleaner.EndTagToken;
import org.htmlcleaner.HtmlCleaner;
import org.htmlcleaner.ITagInfoProvider;
import org.htmlcleaner.TagInfo;
import org.htmlcleaner.TagNode;
import org.htmlcleaner.TagToken;

public class HtmlTokenizer {
    private static final int WORKING_BUFFER_SIZE = 1024;
    private BufferedReader _reader;
    private char[] _working = new char[1024];
    private transient int _pos = 0;
    private transient int _len = -1;
    private transient StringBuffer _saved = new StringBuffer(512);
    private transient boolean _isLateForDoctype = false;
    private transient DoctypeToken _docType = null;
    private transient TagToken _currentTagToken = null;
    private transient List _tokenList = new ArrayList();
    private transient Set _namespacePrefixes = new HashSet();
    private boolean _asExpected = true;
    private boolean _isScriptContext = false;
    private HtmlCleaner cleaner;
    private CleanerProperties props;

    public HtmlTokenizer(HtmlCleaner htmlCleaner, Reader reader) throws IOException {
        this._reader = new BufferedReader(reader);
        this.cleaner = htmlCleaner;
        this.props = htmlCleaner.getProperties();
    }

    private void addToken(BaseToken baseToken) {
        this._tokenList.add(baseToken);
        this.cleaner.makeTree(this._tokenList, this._tokenList.listIterator(this._tokenList.size() - 1));
    }

    private void readIfNeeded(int n) throws IOException {
        if (this._len == -1 && this._pos + n >= 1024) {
            int n2 = 1024 - this._pos;
            System.arraycopy(this._working, this._pos, this._working, 0, n2);
            this._pos = 0;
            int n3 = 1024 - n2;
            int n4 = 0;
            int n5 = 0;
            int n6 = n2;
            do {
                if ((n5 = this._reader.read(this._working, n6, n3)) < 0) continue;
                n4 += n5;
                n6 += n5;
                n3 -= n5;
            } while (n5 >= 0 && n3 > 0);
            if (n3 > 0) {
                this._len = n4 + n2;
            }
            for (int i = 0; i < (this._len >= 0 ? this._len : 1024); ++i) {
                char c = this._working[i];
                if (c < '\u0001' || c > ' ' || c == '\n' || c == '\r') continue;
                this._working[i] = 32;
            }
        }
    }

    List getTokenList() {
        return this._tokenList;
    }

    Set getNamespacePrefixes() {
        return this._namespacePrefixes;
    }

    private void go() throws IOException {
        ++this._pos;
        this.readIfNeeded(0);
    }

    private void go(int n) throws IOException {
        this._pos += n;
        this.readIfNeeded(n - 1);
    }

    private boolean startsWith(String string) throws IOException {
        int n = string.length();
        this.readIfNeeded(n);
        if (this._len >= 0 && this._pos + n > this._len) {
            return false;
        }
        for (int i = 0; i < n; ++i) {
            char c;
            char c2 = Character.toLowerCase(string.charAt(i));
            if (c2 == (c = Character.toLowerCase(this._working[this._pos + i]))) continue;
            return false;
        }
        return true;
    }

    private boolean isWhitespace(int n) {
        if (this._len >= 0 && n >= this._len) {
            return false;
        }
        return Character.isWhitespace(this._working[n]);
    }

    private boolean isWhitespace() {
        return this.isWhitespace(this._pos);
    }

    private boolean isChar(int n, char c) {
        if (this._len >= 0 && n >= this._len) {
            return false;
        }
        return Character.toLowerCase(c) == Character.toLowerCase(this._working[n]);
    }

    private boolean isChar(char c) {
        return this.isChar(this._pos, c);
    }

    private boolean isIdentifierStartChar(int n) {
        if (this._len >= 0 && n >= this._len) {
            return false;
        }
        char c = this._working[n];
        return Character.isUnicodeIdentifierStart(c);
    }

    private boolean isIdentifierStartChar() {
        return this.isIdentifierStartChar(this._pos);
    }

    private boolean isIdentifierHelperChar(char c) {
        return ':' == c || '.' == c || '-' == c || '_' == c;
    }

    private boolean isIdentifierChar() {
        if (this._len >= 0 && this._pos >= this._len) {
            return false;
        }
        char c = this._working[this._pos];
        return Character.isUnicodeIdentifierStart(c) || Character.isDigit(c) || this.isIdentifierHelperChar(c);
    }

    private boolean isAllRead() {
        return this._len >= 0 && this._pos >= this._len;
    }

    private void save(char c) {
        this._saved.append(c);
    }

    private void saveCurrent() {
        if (!this.isAllRead()) {
            this.save(this._working[this._pos]);
        }
    }

    private void saveCurrent(int n) throws IOException {
        this.readIfNeeded(n);
        int n2 = this._pos;
        while (!this.isAllRead() && n > 0) {
            this.save(this._working[n2]);
            ++n2;
            --n;
        }
    }

    private void skipWhitespaces() throws IOException {
        while (!this.isAllRead() && this.isWhitespace()) {
            this.saveCurrent();
            this.go();
        }
    }

    private boolean addSavedAsContent() {
        if (this._saved.length() > 0) {
            this.addToken(new ContentToken(this._saved.toString()));
            this._saved.delete(0, this._saved.length());
            return true;
        }
        return false;
    }

    void start() throws IOException {
        this._currentTagToken = null;
        this._tokenList.clear();
        this._asExpected = true;
        this._isScriptContext = false;
        this._isLateForDoctype = false;
        this._namespacePrefixes.clear();
        this._pos = 1024;
        this.readIfNeeded(0);
        boolean bl = true;
        while (!this.isAllRead()) {
            this._saved.delete(0, this._saved.length());
            this._currentTagToken = null;
            this._asExpected = true;
            this.readIfNeeded(10);
            if (this._isScriptContext) {
                if (this.startsWith("</script") && (this.isWhitespace(this._pos + 8) || this.isChar(this._pos + 8, '>'))) {
                    this.tagEnd();
                } else if (bl && this.startsWith("<!--")) {
                    this.comment();
                } else {
                    String string;
                    BaseToken baseToken;
                    boolean bl2 = this.content();
                    if (bl && bl2 && (baseToken = (BaseToken)this._tokenList.get(this._tokenList.size() - 1)) != null && (string = baseToken.toString()) != null && string.trim().length() > 0) {
                        bl = false;
                    }
                }
                if (this._isScriptContext) continue;
                bl = true;
                continue;
            }
            if (this.startsWith("<!doctype")) {
                if (!this._isLateForDoctype) {
                    this.doctype();
                    this._isLateForDoctype = true;
                    continue;
                }
                this.ignoreUntil('<');
                continue;
            }
            if (this.startsWith("</") && this.isIdentifierStartChar(this._pos + 2)) {
                this._isLateForDoctype = true;
                this.tagEnd();
                continue;
            }
            if (this.startsWith("<!--")) {
                this.comment();
                continue;
            }
            if (this.startsWith("<") && this.isIdentifierStartChar(this._pos + 1)) {
                this._isLateForDoctype = true;
                this.tagStart();
                continue;
            }
            if (this.props.isIgnoreQuestAndExclam() && (this.startsWith("<!") || this.startsWith("<?"))) {
                this.ignoreUntil('>');
                if (!this.isChar('>')) continue;
                this.go();
                continue;
            }
            this.content();
        }
        this._reader.close();
    }

    private boolean isReservedTag(String string) {
        return "html".equalsIgnoreCase(string) || "head".equalsIgnoreCase(string) || "body".equalsIgnoreCase(string);
    }

    private void tagStart() throws IOException {
        this.saveCurrent();
        this.go();
        if (this.isAllRead()) {
            return;
        }
        String string = this.identifier();
        ITagInfoProvider iTagInfoProvider = this.cleaner.getTagInfoProvider();
        TagInfo tagInfo = iTagInfoProvider.getTagInfo(string);
        if (tagInfo == null && !this.props.isOmitUnknownTags() && this.props.isTreatUnknownTagsAsContent() && !this.isReservedTag(string) || tagInfo != null && tagInfo.isDeprecated() && !this.props.isOmitDeprecatedTags() && this.props.isTreatDeprecatedTagsAsContent()) {
            this.content();
            return;
        }
        this._currentTagToken = new TagNode(string, this.cleaner);
        if (this._asExpected) {
            this.skipWhitespaces();
            this.tagAttributes();
            String string2 = this._saved.toString();
            this.addToken(this._currentTagToken);
            if (this.isChar('>')) {
                this.go();
                if ("script".equalsIgnoreCase(string)) {
                    this._isScriptContext = true;
                }
                string2 = string2 + ">";
            } else if (this.startsWith("/>")) {
                this.go(2);
                string2 = string2 + "/>";
            }
            this._currentTagToken.setOriginalSource(string2);
            this._currentTagToken = null;
        } else {
            this.addSavedAsContent();
        }
    }

    private void tagEnd() throws IOException {
        this.saveCurrent(2);
        this.go(2);
        if (this.isAllRead()) {
            return;
        }
        String string = this.identifier();
        ITagInfoProvider iTagInfoProvider = this.cleaner.getTagInfoProvider();
        TagInfo tagInfo = iTagInfoProvider.getTagInfo(string);
        if (tagInfo == null && !this.props.isOmitUnknownTags() && this.props.isTreatUnknownTagsAsContent() && !this.isReservedTag(string) || tagInfo != null && tagInfo.isDeprecated() && !this.props.isOmitDeprecatedTags() && this.props.isTreatDeprecatedTagsAsContent()) {
            this.content();
            return;
        }
        this._currentTagToken = new EndTagToken(string);
        if (this._asExpected) {
            this.skipWhitespaces();
            this.tagAttributes();
            String string2 = this._saved.toString();
            this.addToken(this._currentTagToken);
            if (this.isChar('>')) {
                this.go();
                string2 = string2 + ">";
            }
            if ("script".equalsIgnoreCase(string)) {
                this._isScriptContext = false;
            }
            this._currentTagToken.setOriginalSource(string2);
            this._currentTagToken = null;
        } else {
            this.addSavedAsContent();
        }
    }

    private String identifier() throws IOException {
        this._asExpected = true;
        if (!this.isIdentifierStartChar()) {
            this._asExpected = false;
            return null;
        }
        StringBuffer stringBuffer = new StringBuffer();
        while (!this.isAllRead() && this.isIdentifierChar()) {
            this.saveCurrent();
            stringBuffer.append(this._working[this._pos]);
            this.go();
        }
        while (stringBuffer.length() > 0 && this.isIdentifierHelperChar(stringBuffer.charAt(stringBuffer.length() - 1))) {
            stringBuffer.deleteCharAt(stringBuffer.length() - 1);
        }
        if (stringBuffer.length() == 0) {
            return null;
        }
        String string = stringBuffer.toString();
        int n = string.indexOf(58);
        if (n >= 0) {
            String string2 = string.substring(0, n);
            String string3 = string.substring(n + 1);
            int n2 = string3.indexOf(58);
            if (n2 >= 0) {
                string3 = string3.substring(0, n2);
            }
            if (this.props.isNamespacesAware()) {
                string = string2 + ":" + string3;
                if (!"xmlns".equalsIgnoreCase(string2)) {
                    this._namespacePrefixes.add(string2.toLowerCase());
                }
            } else {
                string = string3;
            }
        }
        return string;
    }

    private void tagAttributes() throws IOException {
        while (!this.isAllRead() && this._asExpected && !this.isChar('>') && !this.startsWith("/>")) {
            String string;
            this.skipWhitespaces();
            String string2 = this.identifier();
            if (!this._asExpected) {
                if (!(this.isChar('<') || this.isChar('>') || this.startsWith("/>"))) {
                    this.saveCurrent();
                    this.go();
                }
                if (this.isChar('<')) continue;
                this._asExpected = true;
                continue;
            }
            this.skipWhitespaces();
            if (this.isChar('=')) {
                this.saveCurrent();
                this.go();
                string = this.attributeValue();
            } else {
                string = "empty".equals(this.props.booleanAttributeValues) ? "" : ("true".equals(this.props.booleanAttributeValues) ? "true" : string2);
            }
            if (!this._asExpected) continue;
            this._currentTagToken.addAttribute(string2, string);
        }
    }

    private String attributeValue() throws IOException {
        this.skipWhitespaces();
        if (this.isChar('<') || this.isChar('>') || this.startsWith("/>")) {
            return "";
        }
        boolean bl = false;
        boolean bl2 = false;
        StringBuffer stringBuffer = new StringBuffer();
        if (this.isChar('\'')) {
            bl2 = true;
            this.saveCurrent();
            this.go();
        } else if (this.isChar('\"')) {
            bl = true;
            this.saveCurrent();
            this.go();
        }
        boolean bl3 = this.props.isAllowMultiWordAttributes();
        boolean bl4 = this.props.isAllowHtmlInsideAttributes();
        while (!(this.isAllRead() || (!bl2 || this.isChar('\'') || !bl4 && (this.isChar('>') || this.isChar('<')) || !bl3 && this.isWhitespace()) && (!bl || this.isChar('\"') || !bl4 && (this.isChar('>') || this.isChar('<')) || !bl3 && this.isWhitespace()) && (bl2 || bl || this.isWhitespace() || this.isChar('>') || this.isChar('<')))) {
            stringBuffer.append(this._working[this._pos]);
            this.saveCurrent();
            this.go();
        }
        if (this.isChar('\'') && bl2) {
            this.saveCurrent();
            this.go();
        } else if (this.isChar('\"') && bl) {
            this.saveCurrent();
            this.go();
        }
        return stringBuffer.toString();
    }

    private boolean content() throws IOException {
        while (!this.isAllRead()) {
            this.saveCurrent();
            this.go();
            if (!this.isChar('<')) continue;
            break;
        }
        return this.addSavedAsContent();
    }

    private void ignoreUntil(char c) throws IOException {
        while (!this.isAllRead()) {
            this.go();
            if (!this.isChar(c)) continue;
            break;
        }
    }

    private void comment() throws IOException {
        this.go(4);
        while (!this.isAllRead() && !this.startsWith("-->")) {
            this.saveCurrent();
            this.go();
        }
        if (this.startsWith("-->")) {
            this.go(3);
        }
        if (this._saved.length() > 0) {
            if (!this.props.isOmitComments()) {
                int n;
                String string = this.props.getHyphenReplacementInComment();
                String string2 = this._saved.toString().replaceAll("--", string + string);
                if (string2.length() > 0 && string2.charAt(0) == '-') {
                    string2 = string + string2.substring(1);
                }
                if ((n = string2.length()) > 0 && string2.charAt(n - 1) == '-') {
                    string2 = string2.substring(0, n - 1) + string;
                }
                this.addToken(new CommentToken(string2));
            }
            this._saved.delete(0, this._saved.length());
        }
    }

    private void doctype() throws IOException {
        this.go(9);
        this.skipWhitespaces();
        String string = this.identifier();
        this.skipWhitespaces();
        String string2 = this.identifier();
        this.skipWhitespaces();
        String string3 = this.attributeValue();
        this.skipWhitespaces();
        String string4 = this.attributeValue();
        this.ignoreUntil('<');
        this._docType = new DoctypeToken(string, string2, string3, string4);
    }

    public DoctypeToken getDocType() {
        return this._docType;
    }
}

