package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksum;
import edu.harvard.hul.ois.jhove.ChecksumInputStream;
import edu.harvard.hul.ois.jhove.ChecksumType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.Map;

/* loaded from: input_file:edu/harvard/hul/ois/jhove/module/Utf8Module.class */
public class Utf8Module extends ModuleBase {
    private static final String NAME = "UTF8-hul";
    private static final String RELEASE = "1.6";
    private static final String COVERAGE = "Unicode 7.0.0";
    private static final String WELLFORMED = "An UTF-8 object is well-formed if each character is correctly encoded as a one-to-four byte sequence, as defined in the specifications";
    private static final String REPINFO = "Additional representation information includes: number of characters and Unicode 7.0.0 code blocks";
    private static final String RIGHTS = "Copyright 2003-2011 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    private static final int CR = 13;
    private static final int LF = 10;
    protected ChecksumInputStream _cstream;
    protected DataInputStream _dstream;
    protected boolean _lineEndCR;
    protected boolean _lineEndLF;
    protected boolean _lineEndCRLF;
    protected int _prevChar;
    protected Map<Integer, String> _controlCharMap;
    protected int[] initialBytes;
    protected Utf8BlockMarker blockMarker;
    protected boolean _withTextMD;
    protected TextMDMetadata _textMD;
    private static final int[] DATE = {2014, 7, 18};
    private static final String[] FORMAT = {TextMDMetadata.CHARSET_UTF8};
    private static final String[] MIMETYPE = {"text/plain; charset=UTF-8"};
    private static final String VALIDITY = null;
    private static final String NOTE = null;
    private static final String[] controlCharMnemonics = {"NUL (0x00)", "SOH (0x01)", "STX (0x02)", "ETX (0x03)", "EOT (0x04)", "ENQ (0x05)", "ACK (0x06)", "BEL (0x07)", "BS (0x08)", "TAB (0x09)", "LF (0x0A)", "VT (0x0B)", "FF (0x0C)", "CR (0x0D)", "SO (0x0E)", "SI (0x0F)", "DLE (0x10)", "DC1 (0x11)", "DC2 (0x12)", "DC3 (0x13)", "DC4 (0x14)", "NAK (0x15)", "SYN (0x16)", "ETB (0x17)", "CAN (0x18)", "EM (0x19)", "SUB (0x1A)", "ESC (0x1B)", "FS (0x1C)", "GS (0x1D)", "RS (0x1E)", "US (0x1F)"};

    public Utf8Module() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        this._withTextMD = false;
        this._vendor = Agent.harvardInstance();
        Document document = new Document("The Unicode Standard, Version 6.0", DocumentType.BOOK);
        document.setAuthor(new Agent.Builder("The Unicode Consortium", AgentType.NONPROFIT).web("http://www.unicode.org/versions/Unicode7.0.0/").address("Mountain View, California").build());
        document.setPublisher(new Agent.Builder("Addison-Wesley", AgentType.COMMERCIAL).address("Boston, Massachusetts").build());
        document.setDate("2011");
        document.setIdentifier(new Identifier("978-1-936213-01-6", IdentifierType.ISBN));
        this._specification.add(document);
        Document document2 = new Document("Information technology -- Universal Multiple-Octet Coded Character Set (UCS) -- Part 1: Architecture and Basic Multilingual Plane. Appendix R, Amendment 2", DocumentType.STANDARD);
        document2.setPublisher(Agent.newIsoInstance());
        document2.setDate("1991");
        document2.setIdentifier(new Identifier("ISO/IEC 10646-1 Amendment 2", IdentifierType.ISO));
        this._specification.add(document2);
        Document document3 = new Document("UTF-8, a transformation format of ISO 10646", DocumentType.RFC);
        document3.setAuthor(new Agent.Builder("F. Yergeau", AgentType.OTHER).build());
        document3.setPublisher(new Agent.Builder("IETF", AgentType.NONPROFIT).web("http://www.ietf.org/").build());
        document3.setDate("1998-01");
        document3.setIdentifier(new Identifier("RFC 2279", IdentifierType.RFC));
        document3.setIdentifier(new Identifier("http://www.ietf.org/rfc/rfc2279.txt", IdentifierType.URL));
        this._specification.add(document3);
    }

    @Override // edu.harvard.hul.ois.jhove.ModuleBase, edu.harvard.hul.ois.jhove.Module
    public final int parse(InputStream inputStream, RepInfo repInfo, int i) throws IOException {
        if (this._defaultParams != null) {
            Iterator<String> it = this._defaultParams.iterator();
            while (it.hasNext()) {
                if ("withtextmd=true".equalsIgnoreCase(it.next())) {
                    this._withTextMD = true;
                }
            }
        }
        initParse();
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        this.initialBytes = new int[4];
        this._lineEndCR = false;
        this._lineEndLF = false;
        this._lineEndCRLF = false;
        this._prevChar = 0;
        this._controlCharMap = new HashMap();
        this._textMD = new TextMDMetadata();
        boolean z = false;
        repInfo.setNote("Additional representation information includes the line endings: CR, LF, or CRLF");
        this._nByte = 0L;
        long j = 0;
        Checksummer checksummer = null;
        if (this._je != null && this._je.getChecksumFlag() && repInfo.getChecksum().isEmpty()) {
            checksummer = new Checksummer();
            this._cstream = new ChecksumInputStream(inputStream, checksummer);
            this._dstream = getBufferedDataStream(this._cstream, this._je != null ? this._je.getBufferSize() : 0);
        } else {
            this._dstream = getBufferedDataStream(inputStream, this._je != null ? this._je.getBufferSize() : 0);
        }
        this.blockMarker = new Utf8BlockMarker();
        boolean z2 = false;
        while (!z2) {
            try {
                boolean z3 = false;
                int[] iArr = new int[4];
                int i2 = -1;
                iArr[0] = readUnsignedByte(this._dstream, this);
                if (this._nByte < 4) {
                    z3 = checkMark(iArr[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (z3) {
                        j = 0;
                    }
                }
                int i3 = 1;
                if (192 <= iArr[0] && iArr[0] <= 223) {
                    i3 = 2;
                } else if (224 <= iArr[0] && iArr[0] <= 239) {
                    i3 = 3;
                } else if (240 <= iArr[0] && iArr[0] <= 247) {
                    i3 = 4;
                } else if ((128 <= iArr[0] && iArr[0] <= 191) || (248 <= iArr[0] && iArr[0] <= 255)) {
                    repInfo.setMessage(new ErrorMessage(Utf8MessageConstants.ERR_INVALID_FIRST_BYTE_ENCODING, "Value = " + ((char) iArr[0]) + " (0x" + Integer.toHexString(iArr[0]) + ")", this._nByte));
                    repInfo.setWellFormed(false);
                    return 0;
                }
                for (int i4 = 1; i4 < i3; i4++) {
                    iArr[i4] = readUnsignedByte(this._dstream, this);
                    if (this._nByte < 4) {
                        z3 = checkMark(iArr[i4], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (128 > iArr[i4] || iArr[i4] > 191) {
                        String str = "Value = " + ((char) iArr[i4]) + " (0x" + Integer.toHexString(iArr[i4]) + ")";
                        String str2 = "";
                        switch (i4) {
                            case 1:
                                str2 = Utf8MessageConstants.ERR_INVALID_SECOND_BYTE_ENCODING;
                                break;
                            case 2:
                                str2 = Utf8MessageConstants.ERR_INVALID_THIRD_BYTE_ENCODING;
                                break;
                            case 3:
                                str2 = Utf8MessageConstants.ERR_INVALID_FOURTH_BYTE_ENCODING;
                                break;
                        }
                        repInfo.setMessage(new ErrorMessage(str2, str, this._nByte));
                        repInfo.setWellFormed(false);
                        return 0;
                    }
                }
                if (i3 == 1) {
                    i2 = iArr[0];
                } else if (i3 == 2) {
                    i2 = ((iArr[0] & 31) << 6) + (iArr[1] & 63);
                } else if (i3 == 3) {
                    i2 = ((iArr[0] & 15) << 12) + ((iArr[1] & 63) << 6) + (iArr[2] & 63);
                } else if (i3 == 4) {
                    i2 = ((iArr[0] & 7) << 18) + ((iArr[1] & 63) << 12) + ((iArr[2] & 63) << 6) + (iArr[3] & 63);
                }
                if (!z3) {
                    this.blockMarker.markBlock(i2);
                }
                if (i2 < 32 && i2 != 13 && i2 != 10) {
                    this._controlCharMap.put(new Integer(i2), controlCharMnemonics[i2]);
                } else if (i2 == 127) {
                    this._controlCharMap.put(new Integer(i2), "DEL (0x7F)");
                }
                if (i2 > 31 && i2 != 127) {
                    z = true;
                }
                checkLineEnd(i2);
                this._prevChar = i2;
                j++;
            } catch (EOFException e) {
                z2 = true;
                checkLineEnd(0);
            }
        }
        if (checksummer != null) {
            repInfo.setSize(this._cstream.getNBytes());
            repInfo.setChecksum(new Checksum(checksummer.getCRC32(), ChecksumType.CRC32));
            String md5 = checksummer.getMD5();
            if (md5 != null) {
                repInfo.setChecksum(new Checksum(md5, ChecksumType.MD5));
            }
            String sha1 = checksummer.getSHA1();
            if (sha1 != null) {
                repInfo.setChecksum(new Checksum(sha1, ChecksumType.SHA1));
            }
        }
        if (this._nByte == 0) {
            repInfo.setMessage(new ErrorMessage(Utf8MessageConstants.ERR_ZERO_LENGTH_FILE));
            repInfo.setWellFormed(0);
            return 0;
        }
        this._textMD.setCharset(TextMDMetadata.CHARSET_UTF8);
        this._textMD.setByte_order(this._bigEndian ? 0 : 1);
        this._textMD.setByte_size("8");
        this._textMD.setCharacter_size("variable");
        ArrayList arrayList = new ArrayList(4);
        repInfo.setProperty(new Property("UTF8Metadata", PropertyType.PROPERTY, PropertyArity.LIST, arrayList));
        arrayList.add(new Property("Characters", PropertyType.LONG, new Long(j)));
        Property blocksUsedProperty = this.blockMarker.getBlocksUsedProperty("UnicodeBlocks");
        if (blocksUsedProperty != null) {
            arrayList.add(blocksUsedProperty);
        }
        if (this._lineEndCR || this._lineEndLF || this._lineEndCRLF) {
            ArrayList arrayList2 = new ArrayList(3);
            if (this._lineEndCR) {
                arrayList2.add("CR");
                this._textMD.setLinebreak(0);
            }
            if (this._lineEndLF) {
                arrayList2.add("LF");
                this._textMD.setLinebreak(1);
            }
            if (this._lineEndCRLF) {
                arrayList2.add("CRLF");
                this._textMD.setLinebreak(2);
            }
            arrayList.add(new Property("LineEndings", PropertyType.STRING, PropertyArity.LIST, arrayList2));
        }
        if (!this._controlCharMap.isEmpty()) {
            LinkedList linkedList = new LinkedList();
            for (int i5 = 0; i5 < 32; i5++) {
                String str3 = this._controlCharMap.get(new Integer(i5));
                if (str3 != null) {
                    linkedList.add(str3);
                }
            }
            String str4 = this._controlCharMap.get(new Integer(127));
            if (str4 != null) {
                linkedList.add(str4);
            }
            arrayList.add(new Property("ControlCharacters", PropertyType.STRING, PropertyArity.LIST, linkedList));
        }
        if (this._withTextMD) {
            arrayList.add(new Property("TextMDMetadata", PropertyType.TEXTMDMETADATA, PropertyArity.SCALAR, this._textMD));
        }
        if (z) {
            return 0;
        }
        repInfo.setMessage(new InfoMessage(AsciiModule.INF_PRINT_CHAR_MISS));
        return 0;
    }

    @Override // edu.harvard.hul.ois.jhove.ModuleBase, edu.harvard.hul.ois.jhove.Module
    public void checkSignatures(File file, InputStream inputStream, RepInfo repInfo) throws IOException {
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        this.initialBytes = new int[4];
        int sigBytes = getBase().getSigBytes();
        int i = 0;
        this.blockMarker = new Utf8BlockMarker();
        boolean z = false;
        this._nByte = 0L;
        DataInputStream dataInputStream = new DataInputStream(inputStream);
        while (!z && i < sigBytes) {
            int[] iArr = new int[4];
            try {
                iArr[0] = readUnsignedByte(dataInputStream, this);
                i++;
                if (this._nByte < 4) {
                    checkMark(iArr[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                }
                int i2 = 1;
                if (192 <= iArr[0] && iArr[0] <= 223) {
                    i2 = 2;
                } else if (224 <= iArr[0] && iArr[0] <= 239) {
                    i2 = 3;
                } else if (240 <= iArr[0] && iArr[0] <= 247) {
                    i2 = 4;
                } else if ((128 <= iArr[0] && iArr[0] <= 191) || (248 <= iArr[0] && iArr[0] <= 255)) {
                    repInfo.setWellFormed(false);
                    return;
                }
                for (int i3 = 1; i3 < i2; i3++) {
                    iArr[i3] = readUnsignedByte(dataInputStream, this);
                    if (this._nByte < 4) {
                        checkMark(iArr[i3], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                    if (128 > iArr[i3] || iArr[i3] > 191) {
                        repInfo.setWellFormed(false);
                        return;
                    }
                }
            } catch (EOFException e) {
                z = true;
            }
        }
        if (i > 0) {
            repInfo.setSigMatch(this._name);
        } else {
            repInfo.setWellFormed(false);
        }
    }

    protected void checkLineEnd(int i) {
        if (i != 10) {
            if (this._prevChar == 13) {
                this._lineEndCR = true;
            }
        } else if (this._prevChar == 13) {
            this._lineEndCRLF = true;
        } else {
            this._lineEndLF = true;
        }
    }

    protected boolean checkMark(int i, RepInfo repInfo) {
        this.initialBytes[((int) this._nByte) - 1] = i;
        if (this._nByte != 3) {
            return false;
        }
        if (this.initialBytes[0] == 239 && this.initialBytes[1] == 187 && this.initialBytes[2] == 191) {
            repInfo.setMessage(new InfoMessage(Utf8MessageConstants.INF_BOM_MARK_PRESENT, 0L));
            this.blockMarker.reset();
            return true;
        }
        if (this.initialBytes[0] == 255 && this.initialBytes[1] == 254) {
            repInfo.setMessage((this.initialBytes[2] == 0 && this.initialBytes[3] == 0) ? new ErrorMessage(Utf8MessageConstants.ERR_UCS4_NOT_UTF8) : new ErrorMessage(Utf8MessageConstants.ERR_UTF16LE_NOT_UTF8));
            repInfo.setWellFormed(false);
            return false;
        }
        if (this.initialBytes[0] != 254 || this.initialBytes[1] != 255) {
            return false;
        }
        repInfo.setMessage(new ErrorMessage(Utf8MessageConstants.ERR_UTF16BE_NOT_UTF8));
        repInfo.setWellFormed(false);
        return false;
    }
}
