package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksum;
import edu.harvard.hul.ois.jhove.ChecksumInputStream;
import edu.harvard.hul.ois.jhove.ChecksumType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.ExternalSignature;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.SignatureType;
import edu.harvard.hul.ois.jhove.SignatureUseType;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import edu.harvard.hul.ois.jhove.module.html.Html3_2DocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_01TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0FrameDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0StrictDocDesc;
import edu.harvard.hul.ois.jhove.module.html.Html4_0TransDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlCharStream;
import edu.harvard.hul.ois.jhove.module.html.HtmlDocDesc;
import edu.harvard.hul.ois.jhove.module.html.HtmlMetadata;
import edu.harvard.hul.ois.jhove.module.html.JHDoctype;
import edu.harvard.hul.ois.jhove.module.html.JHElement;
import edu.harvard.hul.ois.jhove.module.html.JHOpenTag;
import edu.harvard.hul.ois.jhove.module.html.JHXmlDecl;
import edu.harvard.hul.ois.jhove.module.html.ParseException;
import edu.harvard.hul.ois.jhove.module.html.ParseHtml;
import edu.harvard.hul.ois.jhove.module.html.Token;
import edu.harvard.hul.ois.jhove.module.html.TokenMgrError;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.io.UnsupportedEncodingException;
import java.util.Iterator;
import java.util.List;

/* loaded from: input_file:edu/harvard/hul/ois/jhove/module/HtmlModule.class */
public class HtmlModule extends ModuleBase {
    private static final String NAME = "HTML-hul";
    private static final String RELEASE = "1.3";
    private static final String COVERAGE = "HTML 3.2, HTML 4.0 Strict,HTML 4.0 Transitional, HTML 4.0 Frameset, HTML 4.01 Strict, HTML 4.01 Transitional, HTML 4.01 FramesetXHTML 1.0 Strict, XHTML 1.0 Transitional, XHTML 1.0 FramesetXHTML 1.1";
    private static final String WELLFORMED = "An HTML file is well-formed if it meets the criteria defined in the HTML 3.2 specification (W3C Recommendation, 14-Jan-1997), the HTML 4.0 specification (W3C Recommendation, 24-Apr-1998, the HTML 4.01 specification (W3C Recommendation, 24-Dec-1999, the XHTML 1.0 specification (W3C Recommendation, 26-Jan-2000, revised 1-Aug-2002, or the XHTML 1.1 specification (W3C Recommendation, 31-May-2001";
    private static final String VALIDITY = "An HTML file is valid if it is well-formed and has a valid DOCTYPE declaration.";
    private static final String REPINFO = "Languages, title, META tags, frames, links, scripts, images, citations, defined terms, abbreviations, entities, Unicode entity blocks";
    private static final String NOTE = "";
    private static final String RIGHTS = "Copyright 2004-2007 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    protected ChecksumInputStream _cstream;
    protected DataInputStream _dstream;
    protected String _doctype;
    public static final int HTML_3_2 = 1;
    public static final int HTML_4_0_STRICT = 2;
    public static final int HTML_4_0_FRAMESET = 3;
    public static final int HTML_4_0_TRANSITIONAL = 4;
    public static final int HTML_4_01_STRICT = 5;
    public static final int HTML_4_01_FRAMESET = 6;
    public static final int HTML_4_01_TRANSITIONAL = 7;
    public static final int XHTML_1_0_STRICT = 8;
    public static final int XHTML_1_0_TRANSITIONAL = 9;
    public static final int XHTML_1_0_FRAMESET = 10;
    public static final int XHTML_1_1 = 11;
    protected boolean _withTextMD;
    protected TextMDMetadata _textMD;
    private static final int[] DATE = {2006, 9, 5};
    private static final String[] FORMAT = {"HTML"};
    private static final String[] MIMETYPE = {"text/html"};
    private static final String STRICT = "Strict";
    private static final String FRAMESET = "Frameset";
    private static final String TRANSITIONAL = "Transitional";
    private static final String[] profileNames = {null, null, STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL, STRICT, FRAMESET, TRANSITIONAL, null};
    private static final String HTML_4_0 = "HTML 4.0";
    private static final String HTML_4_01 = "HTML 4.01";
    private static final String XHTML_1_0 = "XHTML 1.0";
    private static final String[] versionNames = {null, "HTML 3.2", HTML_4_0, HTML_4_0, HTML_4_0, HTML_4_01, HTML_4_01, HTML_4_01, XHTML_1_0, XHTML_1_0, XHTML_1_0, "XHTML 1.1"};

    public HtmlModule() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        this._withTextMD = false;
        this._vendor = Agent.harvardInstance();
        Document document = new Document("HTML 3.2 Reference Specification", DocumentType.REPORT);
        Agent newW3CInstance = Agent.newW3CInstance();
        document.setPublisher(newW3CInstance);
        Agent build = new Agent.Builder("Dave Raggett", AgentType.OTHER).build();
        document.setAuthor(build);
        document.setDate("1997-01-14");
        document.setIdentifier(new Identifier("http://www.w3c.org/TR/REC-html32-19970114", IdentifierType.URL));
        this._specification.add(document);
        Document document2 = new Document("HTML 4.0 Specification", DocumentType.REPORT);
        document2.setPublisher(newW3CInstance);
        document2.setAuthor(build);
        Agent build2 = new Agent.Builder("Arnaud Le Hors", AgentType.OTHER).build();
        document2.setAuthor(build2);
        Agent build3 = new Agent.Builder("Ian Jacobs", AgentType.OTHER).build();
        document2.setAuthor(build3);
        document2.setDate("1998-04-24");
        document2.setIdentifier(new Identifier("http://www.w3.org/TR/1998/REC-html40-19980424/", IdentifierType.URL));
        this._specification.add(document2);
        Document document3 = new Document("HTML 4.01 Specification", DocumentType.REPORT);
        document3.setPublisher(newW3CInstance);
        document3.setAuthor(build);
        document3.setAuthor(build2);
        document3.setAuthor(build3);
        document3.setDate("1999-12-24");
        document3.setIdentifier(new Identifier("http://www.w3.org/TR/1999/REC-html401-19991224/", IdentifierType.URL));
        this._specification.add(document3);
        Document document4 = new Document("XHTML(TM) 1.0 The Extensible HyperText Markup Language (Second Edition)", DocumentType.REPORT);
        document4.setPublisher(newW3CInstance);
        document4.setDate("01-08-2002");
        document4.setIdentifier(new Identifier("http://www.w3.org/TR/xhtml1/", IdentifierType.URL));
        this._specification.add(document4);
        Document document5 = new Document(" XHTML(TM) 1.1 - Module-based XHTML", DocumentType.REPORT);
        document5.setPublisher(newW3CInstance);
        document5.setDate("31-05-2001");
        document5.setIdentifier(new Identifier("http://www.w3.org/TR/2001/REC-xhtml11-20010531/", IdentifierType.URL));
        this._specification.add(document5);
        this._signature.add(new ExternalSignature(".html", SignatureType.EXTENSION, SignatureUseType.OPTIONAL));
        this._signature.add(new ExternalSignature(".htm", SignatureType.EXTENSION, SignatureUseType.OPTIONAL));
    }

    /* JADX WARN: Can't fix incorrect switch cases order, some code will duplicate */
    /* JADX WARN: Failed to find 'out' block for switch in B:83:0x02c9. Please report as an issue. */
    @Override // edu.harvard.hul.ois.jhove.ModuleBase, edu.harvard.hul.ois.jhove.Module
    public int parse(InputStream inputStream, RepInfo repInfo, int i) throws IOException {
        List HtmlDoc;
        HtmlDocDesc html4_01StrictDocDesc;
        if (i != 0) {
            if (!isXmlAvailable()) {
                repInfo.setMessage(new ErrorMessage("XML-HUL module required to validate XHTML documents"));
                repInfo.setWellFormed(false);
                return 0;
            }
            XmlModule xmlModule = new XmlModule();
            if (i == 100) {
                i = 0;
            }
            xmlModule.setApp(this._app);
            xmlModule.setBase(this._je);
            xmlModule.setDefaultParams(this._defaultParams);
            try {
                xmlModule.applyDefaultParams();
            } catch (Exception e) {
            }
            xmlModule.setXhtmlDoctype(this._doctype);
            return xmlModule.parse(inputStream, repInfo, i);
        }
        this._doctype = null;
        if (this._defaultParams != null) {
            Iterator<String> it = this._defaultParams.iterator();
            while (it.hasNext()) {
                if ("withtextmd=true".equalsIgnoreCase(it.next())) {
                    this._withTextMD = true;
                }
            }
        }
        initParse();
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        if (this._textMD == null || i == 0) {
            this._textMD = new TextMDMetadata();
        }
        Checksummer checksummer = null;
        if (this._je != null && this._je.getChecksumFlag() && repInfo.getChecksum().isEmpty()) {
            checksummer = new Checksummer();
            this._cstream = new ChecksumInputStream(inputStream, checksummer);
            this._dstream = getBufferedDataStream(this._cstream, this._je != null ? this._je.getBufferSize() : 0);
        } else {
            this._dstream = getBufferedDataStream(inputStream, this._je != null ? this._je.getBufferSize() : 0);
        }
        HtmlMetadata htmlMetadata = null;
        try {
            HtmlCharStream htmlCharStream = new HtmlCharStream(this._dstream, TextMDMetadata.CHARSET_ISO8859_1);
            int i2 = 0;
            try {
                HtmlDoc = new ParseHtml(htmlCharStream).HtmlDoc();
            } catch (ParseException e2) {
                Token token = e2.currentToken;
                repInfo.setMessage(new ErrorMessage("Parse error", "Line = " + token.beginLine + ", column = " + token.beginColumn));
                repInfo.setWellFormed(false);
            } catch (TokenMgrError e3) {
                repInfo.setMessage(new ErrorMessage("TokenMgrError: " + e3.getLocalizedMessage()));
                repInfo.setWellFormed(false);
            }
            if (HtmlDoc.isEmpty()) {
                repInfo.setWellFormed(false);
                repInfo.setMessage(new ErrorMessage("Document is empty"));
                return 0;
            }
            i2 = checkDoctype(HtmlDoc);
            if (i2 < 0) {
                repInfo.setWellFormed(false);
                repInfo.setMessage(new ErrorMessage("DOCTYPE is not HTML"));
                return 0;
            }
            boolean z = false;
            Iterator it2 = HtmlDoc.iterator();
            while (true) {
                if (!it2.hasNext()) {
                    break;
                }
                Object next = it2.next();
                if (next instanceof JHOpenTag) {
                    String name = ((JHOpenTag) next).getName();
                    if ("html".equals(name) || "head".equals(name) || "body".equals(name) || "title".equals(name)) {
                        z = true;
                    }
                }
            }
            if (!z) {
                repInfo.setMessage(new ErrorMessage("Document contains no html, head, body or title tags"));
                repInfo.setWellFormed(false);
                return 0;
            }
            String kindOfLineEnd = htmlCharStream.getKindOfLineEnd();
            if (kindOfLineEnd == null) {
                repInfo.setMessage(new InfoMessage("Not able to determine type of end of line"));
                this._textMD.setLinebreak(-1);
            } else if ("CR".equalsIgnoreCase(kindOfLineEnd)) {
                this._textMD.setLinebreak(0);
            } else if ("LF".equalsIgnoreCase(kindOfLineEnd)) {
                this._textMD.setLinebreak(1);
            } else if ("CRLF".equalsIgnoreCase(kindOfLineEnd)) {
                this._textMD.setLinebreak(2);
            }
            if (i2 == 0) {
                switch (seemsToBeXHTML(HtmlDoc)) {
                    case 0:
                        repInfo.setMessage(new ErrorMessage("Unrecognized or missing DOCTYPE declaration; validation continuing as HTML 3.2"));
                        repInfo.setValid(false);
                        break;
                    case 1:
                        repInfo.setMessage(new ErrorMessage("Document has XML declaration but no DOCTYPE; probably XML rather than HTML"));
                        repInfo.setWellFormed(false);
                        return 0;
                    case 2:
                        return 100;
                    default:
                        repInfo.setMessage(new ErrorMessage("Unrecognized or missing DOCTYPE declaration; validation continuing as HTML 3.2"));
                        repInfo.setValid(false);
                        break;
                }
            }
            switch (i2) {
                case 1:
                default:
                    html4_01StrictDocDesc = new Html3_2DocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("3.2");
                    break;
                case 2:
                    html4_01StrictDocDesc = new Html4_0StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                case 3:
                    html4_01StrictDocDesc = new Html4_0FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                case 4:
                    html4_01StrictDocDesc = new Html4_0TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.0");
                    break;
                case 5:
                    html4_01StrictDocDesc = new Html4_01StrictDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                case 6:
                    html4_01StrictDocDesc = new Html4_01FrameDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                case 7:
                    html4_01StrictDocDesc = new Html4_01TransDocDesc();
                    this._textMD.setMarkup_basis("HTML");
                    this._textMD.setMarkup_basis_version("4.01");
                    break;
                case 8:
                case 9:
                case 10:
                case 11:
                    return 100;
            }
            this._textMD.setMarkup_language(this._doctype);
            if (html4_01StrictDocDesc == null) {
                repInfo.setMessage(new InfoMessage("Code for appropriate HTML version not available yet:substituting HTML 3.2"));
                html4_01StrictDocDesc = new Html3_2DocDesc();
            }
            html4_01StrictDocDesc.validate(HtmlDoc, repInfo);
            htmlMetadata = html4_01StrictDocDesc.getMetadata();
            if (htmlMetadata.getCharset() != null) {
                this._textMD.setCharset(htmlMetadata.getCharset());
            } else {
                this._textMD.setCharset(TextMDMetadata.CHARSET_ISO8859_1);
            }
            if (this._textMD.getCharset().indexOf("UTF") != -1) {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("variable");
            } else {
                this._textMD.setByte_order(this._bigEndian ? 0 : 1);
                this._textMD.setByte_size("8");
                this._textMD.setCharacter_size("1");
            }
            if (repInfo.getWellFormed() == 0) {
                return 0;
            }
            if (i2 != 0) {
                if (profileNames[i2] != null) {
                    repInfo.setProfile(profileNames[i2]);
                }
                repInfo.setVersion(versionNames[i2]);
            }
            if (htmlMetadata != null) {
                Property property = htmlMetadata.toProperty(this._withTextMD ? this._textMD : null);
                if (property != null) {
                    repInfo.setProperty(property);
                }
            }
            if (checksummer == null) {
                return 0;
            }
            repInfo.setSize(this._cstream.getNBytes());
            repInfo.setChecksum(new Checksum(checksummer.getCRC32(), ChecksumType.CRC32));
            String md5 = checksummer.getMD5();
            if (md5 != null) {
                repInfo.setChecksum(new Checksum(md5, ChecksumType.MD5));
            }
            String sha1 = checksummer.getSHA1();
            if (sha1 == null) {
                return 0;
            }
            repInfo.setChecksum(new Checksum(sha1, ChecksumType.SHA1));
            return 0;
        } catch (UnsupportedEncodingException e4) {
            repInfo.setMessage(new ErrorMessage("Internal error: " + e4.getMessage()));
            repInfo.setWellFormed(false);
            return 0;
        }
    }

    /* JADX WARN: Multi-variable type inference failed */
    @Override // edu.harvard.hul.ois.jhove.ModuleBase, edu.harvard.hul.ois.jhove.Module
    public void checkSignatures(File file, InputStream inputStream, RepInfo repInfo) throws IOException {
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        char[] cArr = {"<!DOCTYPE HTML".toCharArray(), "<HTML".toCharArray(), "<TITLE".toCharArray()};
        int[] iArr = new int[3];
        iArr[0] = 0;
        iArr[1] = 0;
        iArr[2] = 0;
        int sigBytes = getBase().getSigBytes();
        int i = 0;
        boolean z = false;
        DataInputStream dataInputStream = new DataInputStream(inputStream);
        while (!z && i < sigBytes) {
            try {
                char upperCase = Character.toUpperCase((char) readUnsignedByte(dataInputStream, this));
                i++;
                if (!Character.isWhitespace(upperCase)) {
                    for (int i2 = 0; i2 < 3; i2++) {
                        int i3 = iArr[i2];
                        char[] cArr2 = cArr[i2];
                        if (upperCase == cArr2[i3]) {
                            int i4 = i2;
                            iArr[i4] = iArr[i4] + 1;
                            if (iArr[i2] == cArr2.length) {
                                repInfo.setSigMatch(this._name);
                                return;
                            }
                        } else {
                            iArr[i2] = 0;
                        }
                    }
                }
            } catch (EOFException e) {
                z = true;
            }
        }
        repInfo.setWellFormed(false);
    }

    protected int checkDoctype(List list) {
        JHElement jHElement = (JHElement) list.get(0);
        if ((jHElement instanceof JHXmlDecl) && list.size() >= 2) {
            jHElement = (JHElement) list.get(1);
        }
        if (!(jHElement instanceof JHDoctype)) {
            return 0;
        }
        List doctypeElements = ((JHDoctype) jHElement).getDoctypeElements();
        if (doctypeElements.size() < 3) {
            return 0;
        }
        try {
            if (!"HTML".equals(((String) doctypeElements.get(0)).toUpperCase())) {
                return -1;
            }
            if (!"PUBLIC".equals(((String) doctypeElements.get(1)).toUpperCase())) {
                return 0;
            }
            String stripQuotes = stripQuotes(((String) doctypeElements.get(2)).toUpperCase());
            this._doctype = stripQuotes;
            if ("-//W3C//DTD HTML 3.2 FINAL//EN".equals(stripQuotes) || "-//W3C//DTD HTML 3.2//EN".equals(stripQuotes)) {
                return 1;
            }
            if ("-//W3C//DTD HTML 4.0//EN".equals(stripQuotes)) {
                return 2;
            }
            if ("-//W3C//DTD HTML 4.0 TRANSITIONAL//EN".equals(stripQuotes)) {
                return 4;
            }
            if ("-//W3C//DTD HTML 4.0 FRAMESET//EN".equals(stripQuotes)) {
                return 3;
            }
            if ("-//W3C//DTD HTML 4.01//EN".equals(stripQuotes)) {
                return 5;
            }
            if ("-//W3C//DTD HTML 4.01 TRANSITIONAL//EN".equals(stripQuotes)) {
                return 7;
            }
            return "-//W3C//DTD HTML 4.01 FRAMESET//EN".equals(stripQuotes) ? 6 : 0;
        } catch (Exception e) {
            return 0;
        }
    }

    protected int seemsToBeXHTML(List list) {
        try {
            if (!(((JHElement) list.get(0)) instanceof JHXmlDecl)) {
                return 0;
            }
            Iterator it = list.iterator();
            while (it.hasNext()) {
                JHElement jHElement = (JHElement) it.next();
                if (jHElement instanceof JHOpenTag) {
                    return "html".equals(((JHOpenTag) jHElement).getName()) ? 2 : 1;
                }
            }
            return 1;
        } catch (Exception e) {
            return 0;
        }
    }

    protected String stripQuotes(String str) {
        int length = str.length();
        return (str.charAt(0) == '\"' && str.charAt(length - 1) == '\"') ? str.substring(1, length - 1) : str;
    }

    protected static boolean isXmlAvailable() {
        try {
            Class.forName("edu.harvard.hul.ois.jhove.module.XmlModule");
            return true;
        } catch (Exception e) {
            return false;
        }
    }
}
