package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import edu.harvard.hul.ois.jhove.messages.JhoveMessage;
import edu.harvard.hul.ois.jhove.module.ascii.ControlChar;
import edu.harvard.hul.ois.jhove.module.ascii.LineEnding;
import edu.harvard.hul.ois.jhove.module.utf8.MessageConstants;
import edu.harvard.hul.ois.jhove.module.utf8.Utf8BlockMarker;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.Collection;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

/* loaded from: input_file:edu/harvard/hul/ois/jhove/module/Utf8Module.class */
public class Utf8Module extends ModuleBase {
    public static final String INF_PRINT_CHAR_MISS = "No printable characters";
    private static final String NAME = "UTF8-hul";
    private static final String RELEASE = "1.7.2";
    private static final String COVERAGE = "Unicode 7.0.0";
    private static final String WELLFORMED = "An UTF-8 object is well-formed if each character is correctly encoded as a one-to-four byte sequence, as defined in the specifications";
    private static final String REPINFO = "Additional representation information includes: number of characters and Unicode 7.0.0 code blocks";
    private static final String RIGHTS = "Copyright 2003-2011 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    protected Set<ControlChar> usedCtrlChars;
    protected Set<LineEnding> usedLineEndings;
    protected int[] initialBytes;
    protected Utf8BlockMarker blockMarker;
    protected boolean _withTextMD;
    protected TextMDMetadata _textMD;
    private static final int[] DATE = {2022, 4, 22};
    private static final String[] FORMAT = {"UTF-8"};
    private static final String[] MIMETYPE = {"text/plain; charset=UTF-8"};
    private static final String VALIDITY = null;
    private static final String NOTE = null;

    public Utf8Module() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        this._withTextMD = false;
        this._vendor = Agent.harvardInstance();
        Document document = new Document("The Unicode Standard, Version 6.0", DocumentType.BOOK);
        document.setAuthor(new Agent.Builder("The Unicode Consortium", AgentType.NONPROFIT).web("http://www.unicode.org/versions/Unicode7.0.0/").address("Mountain View, California").build());
        document.setPublisher(new Agent.Builder("Addison-Wesley", AgentType.COMMERCIAL).address("Boston, Massachusetts").build());
        document.setDate("2011");
        document.setIdentifier(new Identifier("978-1-936213-01-6", IdentifierType.ISBN));
        this._specification.add(document);
        Document document2 = new Document("Information technology -- Universal Multiple-Octet Coded Character Set (UCS) -- Part 1: Architecture and Basic Multilingual Plane. Appendix R, Amendment 2", DocumentType.STANDARD);
        document2.setPublisher(Agent.newIsoInstance());
        document2.setDate("1991");
        document2.setIdentifier(new Identifier("ISO/IEC 10646-1 Amendment 2", IdentifierType.ISO));
        this._specification.add(document2);
        Document document3 = new Document("UTF-8, a transformation format of ISO 10646", DocumentType.RFC);
        document3.setAuthor(new Agent.Builder("F. Yergeau", AgentType.OTHER).build());
        document3.setPublisher(new Agent.Builder("IETF", AgentType.NONPROFIT).web("http://www.ietf.org/").build());
        document3.setDate("1998-01");
        document3.setIdentifier(new Identifier("RFC 2279", IdentifierType.RFC));
        document3.setIdentifier(new Identifier("http://www.ietf.org/rfc/rfc2279.txt", IdentifierType.URL));
        this._specification.add(document3);
    }

    public final int parse(InputStream inputStream, RepInfo repInfo, int i) throws IOException {
        LineEnding fromControlChars;
        this._withTextMD = isParamInDefaults("withtextmd=true");
        initParse();
        initInfo(repInfo);
        this.initialBytes = new int[4];
        ControlChar controlChar = null;
        this.usedCtrlChars = new HashSet();
        this.usedLineEndings = new HashSet();
        this._textMD = new TextMDMetadata();
        boolean z = false;
        repInfo.setNote("Additional representation information includes the line endings: CR, LF, or CRLF");
        this._nByte = 0L;
        long j = 0;
        setupDataStream(inputStream, repInfo);
        this.blockMarker = new Utf8BlockMarker();
        boolean z2 = false;
        while (!z2) {
            try {
                boolean z3 = false;
                int[] iArr = new int[4];
                int i2 = -1;
                iArr[0] = readUnsignedByte(this._dstream, this);
                if (this._nByte < 4) {
                    z3 = checkMark(iArr[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (z3) {
                        j = 0;
                    }
                }
                int i3 = 1;
                if (192 <= iArr[0] && iArr[0] <= 223) {
                    i3 = 2;
                } else if (224 <= iArr[0] && iArr[0] <= 239) {
                    i3 = 3;
                } else if (240 <= iArr[0] && iArr[0] <= 247) {
                    i3 = 4;
                } else if ((128 <= iArr[0] && iArr[0] <= 191) || (248 <= iArr[0] && iArr[0] <= 255)) {
                    repInfo.setMessage(new ErrorMessage(MessageConstants.UTF8_HUL_2, "Value = " + ((char) iArr[0]) + " (0x" + Integer.toHexString(iArr[0]) + ")", this._nByte));
                    repInfo.setWellFormed(false);
                    return 0;
                }
                for (int i4 = 1; i4 < i3; i4++) {
                    iArr[i4] = readUnsignedByte(this._dstream, this);
                    if (this._nByte < 4) {
                        z3 = checkMark(iArr[i4], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return 0;
                    }
                    if (128 > iArr[i4] || iArr[i4] > 191) {
                        String str = "Value = " + ((char) iArr[i4]) + " (0x" + Integer.toHexString(iArr[i4]) + ")";
                        JhoveMessage jhoveMessage = null;
                        switch (i4) {
                            case 1:
                                jhoveMessage = MessageConstants.UTF8_HUL_3;
                                break;
                            case 2:
                                jhoveMessage = MessageConstants.UTF8_HUL_4;
                                break;
                            case 3:
                                jhoveMessage = MessageConstants.UTF8_HUL_5;
                                break;
                        }
                        repInfo.setMessage(new ErrorMessage(jhoveMessage, str, this._nByte));
                        repInfo.setWellFormed(false);
                        return 0;
                    }
                }
                if (i3 == 1) {
                    i2 = iArr[0];
                } else if (i3 == 2) {
                    i2 = ((iArr[0] & 31) << 6) + (iArr[1] & 63);
                } else if (i3 == 3) {
                    i2 = ((iArr[0] & 15) << 12) + ((iArr[1] & 63) << 6) + (iArr[2] & 63);
                } else if (i3 == 4) {
                    i2 = ((iArr[0] & 7) << 18) + ((iArr[1] & 63) << 12) + ((iArr[2] & 63) << 6) + (iArr[3] & 63);
                }
                if (!z3) {
                    this.blockMarker.markBlock(i2);
                }
                if (!z) {
                    z = i2 > 31 && i2 != 127;
                }
                ControlChar asciiFromInt = ControlChar.asciiFromInt(i2);
                if (ControlChar.isLineEndChar(asciiFromInt)) {
                    LineEnding fromControlChars2 = LineEnding.fromControlChars(asciiFromInt, controlChar);
                    if (fromControlChars2 != null) {
                        this.usedLineEndings.add(fromControlChars2);
                    }
                } else if (asciiFromInt != null) {
                    this.usedCtrlChars.add(asciiFromInt);
                } else if (!z) {
                    z = 31 < i2;
                }
                if (controlChar == ControlChar.CR && asciiFromInt != ControlChar.LF && (fromControlChars = LineEnding.fromControlChars(asciiFromInt, controlChar)) != null) {
                    this.usedLineEndings.add(fromControlChars);
                }
                controlChar = asciiFromInt;
                j++;
            } catch (EOFException e) {
                z2 = true;
                LineEnding fromControlChars3 = LineEnding.fromControlChars(ControlChar.NUL, controlChar);
                if (fromControlChars3 != null) {
                    this.usedLineEndings.add(fromControlChars3);
                }
            }
        }
        setChecksums(this._ckSummer, repInfo);
        if (this._nByte == 0) {
            repInfo.setMessage(new ErrorMessage(MessageConstants.UTF8_HUL_6));
            repInfo.setWellFormed(0);
            return 0;
        }
        this._textMD.setCharset("UTF-8");
        this._textMD.setByte_order(this._bigEndian ? 0 : 1);
        this._textMD.setByte_size("8");
        this._textMD.setCharacter_size("variable");
        ArrayList arrayList = new ArrayList(4);
        repInfo.setProperty(new Property("UTF8Metadata", PropertyType.PROPERTY, PropertyArity.LIST, arrayList));
        arrayList.add(new Property("Characters", PropertyType.LONG, new Long(j)));
        Property blocksUsedProperty = this.blockMarker.getBlocksUsedProperty("UnicodeBlocks");
        if (blocksUsedProperty != null) {
            arrayList.add(blocksUsedProperty);
        }
        List<String> reportLineEndings = reportLineEndings();
        if (!reportLineEndings.isEmpty()) {
            arrayList.add(new Property("LineEndings", PropertyType.STRING, PropertyArity.LIST, reportLineEndings));
        }
        if (!this.usedCtrlChars.isEmpty()) {
            LinkedList linkedList = new LinkedList();
            Iterator it = EnumSet.copyOf((Collection) this.usedCtrlChars).iterator();
            while (it.hasNext()) {
                linkedList.add(((ControlChar) it.next()).mnemonic);
            }
            arrayList.add(new Property("ControlCharacters", PropertyType.STRING, PropertyArity.LIST, linkedList));
        }
        if (this._withTextMD) {
            arrayList.add(new Property("TextMDMetadata", PropertyType.TEXTMDMETADATA, PropertyArity.SCALAR, this._textMD));
        }
        if (z) {
            return 0;
        }
        repInfo.setMessage(new InfoMessage(INF_PRINT_CHAR_MISS));
        return 0;
    }

    public void checkSignatures(File file, InputStream inputStream, RepInfo repInfo) throws IOException {
        repInfo.setFormat(this._format[0]);
        repInfo.setMimeType(this._mimeType[0]);
        repInfo.setModule(this);
        this.initialBytes = new int[4];
        int sigBytes = getBase().getSigBytes();
        int i = 0;
        this.blockMarker = new Utf8BlockMarker();
        boolean z = false;
        this._nByte = 0L;
        DataInputStream dataInputStream = new DataInputStream(inputStream);
        while (!z && i < sigBytes) {
            int[] iArr = new int[4];
            try {
                iArr[0] = readUnsignedByte(dataInputStream, this);
                i++;
                if (this._nByte < 4) {
                    checkMark(iArr[0], repInfo);
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                }
                int i2 = 1;
                if (192 <= iArr[0] && iArr[0] <= 223) {
                    i2 = 2;
                } else if (224 <= iArr[0] && iArr[0] <= 239) {
                    i2 = 3;
                } else if (240 <= iArr[0] && iArr[0] <= 247) {
                    i2 = 4;
                } else if ((128 <= iArr[0] && iArr[0] <= 191) || (248 <= iArr[0] && iArr[0] <= 255)) {
                    repInfo.setWellFormed(false);
                    return;
                }
                for (int i3 = 1; i3 < i2; i3++) {
                    iArr[i3] = readUnsignedByte(dataInputStream, this);
                    if (this._nByte < 4) {
                        checkMark(iArr[i3], repInfo);
                    }
                    if (repInfo.getWellFormed() == 0) {
                        return;
                    }
                    if (128 > iArr[i3] || iArr[i3] > 191) {
                        repInfo.setWellFormed(false);
                        return;
                    }
                }
            } catch (EOFException e) {
                z = true;
            }
        }
        if (i > 0) {
            repInfo.setSigMatch(this._name);
        } else {
            repInfo.setWellFormed(false);
        }
    }

    protected boolean checkMark(int i, RepInfo repInfo) {
        this.initialBytes[((int) this._nByte) - 1] = i;
        if (this._nByte != 3) {
            return false;
        }
        if (this.initialBytes[0] == 239 && this.initialBytes[1] == 187 && this.initialBytes[2] == 191) {
            repInfo.setMessage(new InfoMessage(MessageConstants.UTF8_HUL_1, 0L));
            this.blockMarker.reset();
            return true;
        }
        if (this.initialBytes[0] == 255 && this.initialBytes[1] == 254) {
            repInfo.setMessage((this.initialBytes[2] == 0 && this.initialBytes[3] == 0) ? new ErrorMessage(MessageConstants.UTF8_HUL_7) : new ErrorMessage(MessageConstants.UTF8_HUL_8));
            repInfo.setWellFormed(false);
            return false;
        }
        if (this.initialBytes[0] != 254 || this.initialBytes[1] != 255) {
            return false;
        }
        repInfo.setMessage(new ErrorMessage(MessageConstants.UTF8_HUL_9));
        repInfo.setWellFormed(false);
        return false;
    }

    private List<String> reportLineEndings() {
        ArrayList arrayList = new ArrayList();
        if (!this.usedLineEndings.isEmpty()) {
            Iterator it = EnumSet.copyOf((Collection) this.usedLineEndings).iterator();
            while (it.hasNext()) {
                LineEnding lineEnding = (LineEnding) it.next();
                arrayList.add(lineEnding.toString());
                this._textMD.setLinebreak(lineEnding.textMdVal);
            }
        }
        return arrayList;
    }
}
