/*
 * Decompiled with CFR 0.152.
 */
package edu.harvard.hul.ois.jhove.module;

import edu.harvard.hul.ois.jhove.Agent;
import edu.harvard.hul.ois.jhove.AgentType;
import edu.harvard.hul.ois.jhove.Checksummer;
import edu.harvard.hul.ois.jhove.Document;
import edu.harvard.hul.ois.jhove.DocumentType;
import edu.harvard.hul.ois.jhove.ErrorMessage;
import edu.harvard.hul.ois.jhove.Identifier;
import edu.harvard.hul.ois.jhove.IdentifierType;
import edu.harvard.hul.ois.jhove.InfoMessage;
import edu.harvard.hul.ois.jhove.JhoveBase;
import edu.harvard.hul.ois.jhove.Message;
import edu.harvard.hul.ois.jhove.Module;
import edu.harvard.hul.ois.jhove.ModuleBase;
import edu.harvard.hul.ois.jhove.Property;
import edu.harvard.hul.ois.jhove.PropertyArity;
import edu.harvard.hul.ois.jhove.PropertyType;
import edu.harvard.hul.ois.jhove.RepInfo;
import edu.harvard.hul.ois.jhove.TextMDMetadata;
import edu.harvard.hul.ois.jhove.messages.JhoveMessage;
import edu.harvard.hul.ois.jhove.module.ascii.ControlChar;
import edu.harvard.hul.ois.jhove.module.ascii.LineEnding;
import edu.harvard.hul.ois.jhove.module.utf8.MessageConstants;
import edu.harvard.hul.ois.jhove.module.utf8.Utf8BlockMarker;
import java.io.DataInputStream;
import java.io.EOFException;
import java.io.File;
import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.EnumSet;
import java.util.HashSet;
import java.util.LinkedList;
import java.util.List;
import java.util.Set;

public class Utf8Module
extends ModuleBase {
    public static final String INF_PRINT_CHAR_MISS = "No printable characters";
    private static final String NAME = "UTF8-hul";
    private static final String RELEASE = "1.7.1";
    private static final int[] DATE = new int[]{2019, 4, 17};
    private static final String[] FORMAT = new String[]{"UTF-8"};
    private static final String COVERAGE = "Unicode 7.0.0";
    private static final String[] MIMETYPE = new String[]{"text/plain; charset=UTF-8"};
    private static final String WELLFORMED = "An UTF-8 object is well-formed if each character is correctly encoded as a one-to-four byte sequence, as defined in the specifications";
    private static final String VALIDITY = null;
    private static final String REPINFO = "Additional representation information includes: number of characters and Unicode 7.0.0 code blocks";
    private static final String NOTE = null;
    private static final String RIGHTS = "Copyright 2003-2011 by JSTOR and the President and Fellows of Harvard College. Released under the GNU Lesser General Public License.";
    protected Set<ControlChar> usedCtrlChars;
    protected Set<LineEnding> usedLineEndings;
    protected int[] initialBytes;
    protected Utf8BlockMarker blockMarker;
    protected boolean _withTextMD = false;
    protected TextMDMetadata _textMD;

    public Utf8Module() {
        super(NAME, RELEASE, DATE, FORMAT, COVERAGE, MIMETYPE, WELLFORMED, VALIDITY, REPINFO, NOTE, RIGHTS, false);
        this._vendor = Agent.harvardInstance();
        Document doc = new Document("The Unicode Standard, Version 6.0", DocumentType.BOOK);
        Agent agent = new Agent.Builder("The Unicode Consortium", AgentType.NONPROFIT).web("http://www.unicode.org/versions/Unicode7.0.0/").address("Mountain View, California").build();
        doc.setAuthor(agent);
        agent = new Agent.Builder("Addison-Wesley", AgentType.COMMERCIAL).address("Boston, Massachusetts").build();
        doc.setPublisher(agent);
        doc.setDate("2011");
        doc.setIdentifier(new Identifier("978-1-936213-01-6", IdentifierType.ISBN));
        this._specification.add(doc);
        doc = new Document("Information technology -- Universal Multiple-Octet Coded Character Set (UCS) -- Part 1: Architecture and Basic Multilingual Plane. Appendix R, Amendment 2", DocumentType.STANDARD);
        doc.setPublisher(Agent.newIsoInstance());
        doc.setDate("1991");
        doc.setIdentifier(new Identifier("ISO/IEC 10646-1 Amendment 2", IdentifierType.ISO));
        this._specification.add(doc);
        doc = new Document("UTF-8, a transformation format of ISO 10646", DocumentType.RFC);
        agent = new Agent.Builder("F. Yergeau", AgentType.OTHER).build();
        doc.setAuthor(agent);
        agent = new Agent.Builder("IETF", AgentType.NONPROFIT).web("http://www.ietf.org/").build();
        doc.setPublisher(agent);
        doc.setDate("1998-01");
        doc.setIdentifier(new Identifier("RFC 2279", IdentifierType.RFC));
        doc.setIdentifier(new Identifier("http://www.ietf.org/rfc/rfc2279.txt", IdentifierType.URL));
        this._specification.add(doc);
    }

    public final int parse(InputStream stream, RepInfo info, int parseIndex) throws IOException {
        List<String> propArray;
        this._withTextMD = this.isParamInDefaults("withtextmd=true");
        this.initParse();
        this.initInfo(info);
        this.initialBytes = new int[4];
        ControlChar prevChar = null;
        this.usedCtrlChars = new HashSet<ControlChar>();
        this.usedLineEndings = new HashSet<LineEnding>();
        this._textMD = new TextMDMetadata();
        boolean printableChars = false;
        info.setNote("Additional representation information includes the line endings: CR, LF, or CRLF");
        this._nByte = 0L;
        long nChar = 0L;
        this.setupDataStream(stream, info);
        this.blockMarker = new Utf8BlockMarker();
        boolean eof = false;
        while (!eof) {
            try {
                LineEnding le;
                ControlChar ctrlChar;
                boolean isMark = false;
                int[] b = new int[4];
                int ch = -1;
                b[0] = Utf8Module.readUnsignedByte((DataInputStream)this._dstream, (ModuleBase)this);
                if (this._nByte < 4L) {
                    isMark = this.checkMark(b[0], info);
                    if (info.getWellFormed() == 0) {
                        return 0;
                    }
                    if (isMark) {
                        nChar = 0L;
                    }
                }
                int nBytes = 1;
                if (192 <= b[0] && b[0] <= 223) {
                    nBytes = 2;
                } else if (224 <= b[0] && b[0] <= 239) {
                    nBytes = 3;
                } else if (240 <= b[0] && b[0] <= 247) {
                    nBytes = 4;
                } else if (128 <= b[0] && b[0] <= 191 || 248 <= b[0] && b[0] <= 255) {
                    ErrorMessage error = new ErrorMessage(MessageConstants.UTF8_HUL_2, "Value = " + (char)b[0] + " (0x" + Integer.toHexString(b[0]) + ")", this._nByte);
                    info.setMessage((Message)error);
                    info.setWellFormed(false);
                    return 0;
                }
                for (int i = 1; i < nBytes; ++i) {
                    b[i] = Utf8Module.readUnsignedByte((DataInputStream)this._dstream, (ModuleBase)this);
                    if (this._nByte < 4L) {
                        isMark = this.checkMark(b[i], info);
                    }
                    if (info.getWellFormed() == 0) {
                        return 0;
                    }
                    if (128 <= b[i] && b[i] <= 191) continue;
                    String subMessage = "Value = " + (char)b[i] + " (0x" + Integer.toHexString(b[i]) + ")";
                    JhoveMessage errMessage = null;
                    switch (i) {
                        case 1: {
                            errMessage = MessageConstants.UTF8_HUL_3;
                            break;
                        }
                        case 2: {
                            errMessage = MessageConstants.UTF8_HUL_4;
                            break;
                        }
                        case 3: {
                            errMessage = MessageConstants.UTF8_HUL_5;
                            break;
                        }
                    }
                    ErrorMessage error = new ErrorMessage(errMessage, subMessage, this._nByte);
                    info.setMessage((Message)error);
                    info.setWellFormed(false);
                    return 0;
                }
                if (nBytes == 1) {
                    ch = b[0];
                } else if (nBytes == 2) {
                    ch = ((b[0] & 0x1F) << 6) + (b[1] & 0x3F);
                } else if (nBytes == 3) {
                    ch = ((b[0] & 0xF) << 12) + ((b[1] & 0x3F) << 6) + (b[2] & 0x3F);
                } else if (nBytes == 4) {
                    ch = ((b[0] & 7) << 18) + ((b[1] & 0x3F) << 12) + ((b[2] & 0x3F) << 6) + (b[3] & 0x3F);
                }
                if (!isMark) {
                    this.blockMarker.markBlock(ch);
                }
                if (!printableChars) {
                    boolean bl = printableChars = ch > 31 && ch != 127;
                }
                if (ControlChar.isLineEndChar((ControlChar)(ctrlChar = ControlChar.asciiFromInt((int)ch)))) {
                    le = LineEnding.fromControlChars((ControlChar)ctrlChar, prevChar);
                    if (le != null) {
                        this.usedLineEndings.add(le);
                    }
                } else if (ctrlChar != null) {
                    this.usedCtrlChars.add(ctrlChar);
                } else if (!printableChars) {
                    boolean bl = printableChars = 31 < ch;
                }
                if (prevChar == ControlChar.CR && ctrlChar != ControlChar.LF && (le = LineEnding.fromControlChars((ControlChar)ctrlChar, (ControlChar)prevChar)) != null) {
                    this.usedLineEndings.add(le);
                }
                prevChar = ctrlChar;
                ++nChar;
            }
            catch (EOFException e) {
                eof = true;
                LineEnding le = LineEnding.fromControlChars((ControlChar)ControlChar.NUL, prevChar);
                if (le == null) continue;
                this.usedLineEndings.add(le);
            }
        }
        Utf8Module.setChecksums((Checksummer)this._ckSummer, (RepInfo)info);
        if (this._nByte == 0L) {
            info.setMessage((Message)new ErrorMessage(MessageConstants.UTF8_HUL_6));
            info.setWellFormed(0);
            return 0;
        }
        this._textMD.setCharset("UTF-8");
        this._textMD.setByte_order(this._bigEndian ? 0 : 1);
        this._textMD.setByte_size("8");
        this._textMD.setCharacter_size("variable");
        ArrayList<Property> metadataList = new ArrayList<Property>(4);
        info.setProperty(new Property("UTF8Metadata", PropertyType.PROPERTY, PropertyArity.LIST, metadataList));
        Property property = new Property("Characters", PropertyType.LONG, (Object)new Long(nChar));
        metadataList.add(property);
        property = this.blockMarker.getBlocksUsedProperty("UnicodeBlocks");
        if (property != null) {
            metadataList.add(property);
        }
        if (!(propArray = this.reportLineEndings()).isEmpty()) {
            property = new Property("LineEndings", PropertyType.STRING, PropertyArity.LIST, propArray);
            metadataList.add(property);
        }
        if (!this.usedCtrlChars.isEmpty()) {
            LinkedList<String> propList = new LinkedList<String>();
            for (ControlChar ctrlChar : EnumSet.copyOf(this.usedCtrlChars)) {
                propList.add(ctrlChar.mnemonic);
            }
            property = new Property("ControlCharacters", PropertyType.STRING, PropertyArity.LIST, propList);
            metadataList.add(property);
        }
        if (this._withTextMD) {
            property = new Property("TextMDMetadata", PropertyType.TEXTMDMETADATA, PropertyArity.SCALAR, (Object)this._textMD);
            metadataList.add(property);
        }
        if (!printableChars) {
            info.setMessage((Message)new InfoMessage(INF_PRINT_CHAR_MISS));
        }
        return 0;
    }

    public void checkSignatures(File file, InputStream stream, RepInfo info) throws IOException {
        info.setFormat(this._format[0]);
        info.setMimeType(this._mimeType[0]);
        info.setModule((Module)this);
        this.initialBytes = new int[4];
        JhoveBase jb = this.getBase();
        int sigBytes = jb.getSigBytes();
        int bytesRead = 0;
        this.blockMarker = new Utf8BlockMarker();
        boolean eof = false;
        this._nByte = 0L;
        DataInputStream dstream = new DataInputStream(stream);
        while (!eof && bytesRead < sigBytes) {
            int[] b = new int[4];
            try {
                b[0] = Utf8Module.readUnsignedByte((DataInputStream)dstream, (ModuleBase)this);
                ++bytesRead;
                if (this._nByte < 4L) {
                    this.checkMark(b[0], info);
                    if (info.getWellFormed() == 0) {
                        return;
                    }
                }
                int nBytes = 1;
                if (192 <= b[0] && b[0] <= 223) {
                    nBytes = 2;
                } else if (224 <= b[0] && b[0] <= 239) {
                    nBytes = 3;
                } else if (240 <= b[0] && b[0] <= 247) {
                    nBytes = 4;
                } else if (128 <= b[0] && b[0] <= 191 || 248 <= b[0] && b[0] <= 255) {
                    info.setWellFormed(false);
                    return;
                }
                for (int i = 1; i < nBytes; ++i) {
                    b[i] = Utf8Module.readUnsignedByte((DataInputStream)dstream, (ModuleBase)this);
                    if (this._nByte < 4L) {
                        this.checkMark(b[i], info);
                    }
                    if (info.getWellFormed() == 0) {
                        return;
                    }
                    if (128 <= b[i] && b[i] <= 191) continue;
                    info.setWellFormed(false);
                    return;
                }
            }
            catch (EOFException e) {
                eof = true;
            }
        }
        if (bytesRead > 0) {
            info.setSigMatch(this._name);
        } else {
            info.setWellFormed(false);
        }
    }

    protected boolean checkMark(int byt, RepInfo info) {
        this.initialBytes[(int)this._nByte - 1] = byt;
        if (this._nByte == 3L) {
            if (this.initialBytes[0] == 239 && this.initialBytes[1] == 187 && this.initialBytes[2] == 191) {
                InfoMessage im = new InfoMessage(MessageConstants.UTF8_HUL_1, 0L);
                info.setMessage((Message)im);
                this.blockMarker.reset();
                return true;
            }
            if (this.initialBytes[0] == 255 && this.initialBytes[1] == 254) {
                ErrorMessage msg = this.initialBytes[2] == 0 && this.initialBytes[3] == 0 ? new ErrorMessage(MessageConstants.UTF8_HUL_7) : new ErrorMessage(MessageConstants.UTF8_HUL_8);
                info.setMessage((Message)msg);
                info.setWellFormed(false);
                return false;
            }
            if (this.initialBytes[0] == 254 && this.initialBytes[1] == 255) {
                ErrorMessage msg = new ErrorMessage(MessageConstants.UTF8_HUL_9);
                info.setMessage((Message)msg);
                info.setWellFormed(false);
                return false;
            }
        }
        return false;
    }

    private List<String> reportLineEndings() {
        ArrayList<String> retVal = new ArrayList<String>();
        if (!this.usedLineEndings.isEmpty()) {
            for (LineEnding le : EnumSet.copyOf(this.usedLineEndings)) {
                retVal.add(le.toString());
                this._textMD.setLinebreak(le.textMdVal);
            }
        }
        return retVal;
    }
}

