/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.amd64.AMD64ComplexVectorOp;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.Value;

@Opcode(value="VECTORIZED_MISMATCH")
@SyncPort(from="https://github.com/openjdk/jdk/blob/959fa4a1a35a1bb650ec5888efaf3d0fc8cfb025/src/hotspot/cpu/x86/macroAssembler_x86.cpp#L7495-L7713", sha1="72f9b7a60b75ecabf09fc10cb01a9504be97957a")
public final class AMD64VectorizedMismatchOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64VectorizedMismatchOp> TYPE = LIRInstructionClass.create(AMD64VectorizedMismatchOp.class);
    private static final Register REG_ARRAY_A = AMD64.rsi;
    private static final Register REG_ARRAY_B = AMD64.rdi;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_STRIDE = AMD64.rcx;
    private static final int ONES_16 = 65535;
    private static final int ONES_32 = -1;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value strideValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayAValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value arrayBValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value lengthValueTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] temp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorTemp;

    private AMD64VectorizedMismatchOp(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, Value result, Value arrayA, Value arrayB, Value length, Value stride) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AVXKind.AVXSize.YMM);
        this.resultValue = result;
        this.arrayAValue = this.arrayAValueTemp = arrayA;
        this.arrayBValue = this.arrayBValueTemp = arrayB;
        this.lengthValue = this.lengthValueTemp = length;
        this.strideValue = stride;
        this.temp = this.allocateTempRegisters(tool, AMD64Kind.QWORD, 2);
        this.vectorTemp = this.allocateVectorRegisters(tool, JavaKind.Byte, 3);
    }

    public static AMD64VectorizedMismatchOp movParamsAndCreate(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, Value result, Value arrayA, Value arrayB, Value length, Value stride) {
        RegisterValue regArrayA = REG_ARRAY_A.asValue(arrayA.getValueKind());
        RegisterValue regArrayB = REG_ARRAY_B.asValue(arrayB.getValueKind());
        RegisterValue regLength = REG_LENGTH.asValue(length.getValueKind());
        RegisterValue regStride = REG_STRIDE.asValue(length.getValueKind());
        tool.emitMove((AllocatableValue)regArrayA, arrayA);
        tool.emitMove((AllocatableValue)regArrayB, arrayB);
        tool.emitMove((AllocatableValue)regLength, length);
        tool.emitMove((AllocatableValue)regStride, stride);
        return new AMD64VectorizedMismatchOp(tool, runtimeCheckedCPUFeatures, result, (Value)regArrayA, (Value)regArrayB, (Value)regLength, (Value)regStride);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        Register arrayA = ValueUtil.asRegister((Value)this.arrayAValue);
        Register arrayB = ValueUtil.asRegister((Value)this.arrayBValue);
        Register length = ValueUtil.asRegister((Value)this.lengthValue);
        Register tailLength = ValueUtil.asRegister((Value)this.temp[0]);
        Register tmp = ValueUtil.asRegister((Value)this.temp[1]);
        Label returnLabel = new Label();
        Label returnEqualLabel = new Label();
        Register vector1 = ValueUtil.asRegister((Value)this.vectorTemp[0]);
        Register vector2 = ValueUtil.asRegister((Value)this.vectorTemp[1]);
        Register vector3 = ValueUtil.asRegister((Value)this.vectorTemp[2]);
        int bytesPerVector = this.vectorSize.getBytes();
        Stride stride = Stride.S1;
        Label vectorLoop = new Label();
        Label diffFound = new Label();
        Label tail = new Label();
        Label qwordTail = new Label();
        Label qwordTail2 = new Label();
        Label dwordTail = new Label();
        Label dwordTail2 = new Label();
        Label scalarTail = new Label();
        Label scalarLoop = new Label();
        GraalError.guarantee(ValueUtil.asRegister((Value)this.strideValue).equals((Object)AMD64.rcx), "stride must be in rcx for shift op");
        asm.shlq(length);
        asm.xorq(result, result);
        asm.movq(tailLength, length);
        asm.andq(tailLength, bytesPerVector - 1);
        asm.andqAndJcc(length, -bytesPerVector, AMD64Assembler.ConditionFlag.Zero, tail, false);
        if (this.supports(AMD64.CPUFeature.AVX)) {
            asm.align(this.preferredLoopAlignment(crb));
            asm.bind(vectorLoop);
            asm.movdqu(this.vectorSize, vector1, new AMD64Address(arrayA, result, stride));
            asm.movdqu(this.vectorSize, vector2, new AMD64Address(arrayB, result, stride));
            asm.pxor(this.vectorSize, vector3, vector1, vector2);
            asm.ptest(this.vectorSize, vector3, vector3);
            asm.jccb(AMD64Assembler.ConditionFlag.NotZero, diffFound);
            asm.addq(result, bytesPerVector);
            asm.subqAndJcc(length, bytesPerVector, AMD64Assembler.ConditionFlag.NotZero, vectorLoop, true);
            asm.leaq(result, new AMD64Address(result, tailLength, stride, -bytesPerVector));
            asm.movdqu(this.vectorSize, vector1, new AMD64Address(arrayA, result, stride));
            asm.movdqu(this.vectorSize, vector2, new AMD64Address(arrayB, result, stride));
            asm.pxor(this.vectorSize, vector3, vector1, vector2);
            asm.ptest(this.vectorSize, vector3, vector3);
            asm.jcc(AMD64Assembler.ConditionFlag.Zero, returnEqualLabel);
            asm.align(this.preferredBranchTargetAlignment(crb));
            asm.bind(diffFound);
            AMD64Assembler.VexRVMOp.VPCMPEQB.emit((AMD64Assembler)asm, this.vectorSize, vector3, vector1, vector2);
            asm.pmovmsk(this.vectorSize, tmp, vector3);
            asm.notq(tmp);
            this.bsfq(asm, tmp, tmp);
            asm.addq(result, tmp);
            asm.jmp(returnLabel);
        } else {
            asm.align(this.preferredLoopAlignment(crb));
            asm.bind(vectorLoop);
            asm.movdqu(this.vectorSize, vector1, new AMD64Address(arrayA, result, stride));
            asm.movdqu(this.vectorSize, vector2, new AMD64Address(arrayB, result, stride));
            asm.pcmpeq(this.vectorSize, stride, vector1, vector2);
            asm.pmovmsk(this.vectorSize, tmp, vector1);
            asm.xorlAndJcc(tmp, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.NotZero, diffFound, true);
            asm.addq(result, bytesPerVector);
            asm.subqAndJcc(length, bytesPerVector, AMD64Assembler.ConditionFlag.NotZero, vectorLoop, true);
            asm.leaq(result, new AMD64Address(result, tailLength, stride, -bytesPerVector));
            asm.movdqu(this.vectorSize, vector1, new AMD64Address(arrayA, result, stride));
            asm.movdqu(this.vectorSize, vector2, new AMD64Address(arrayB, result, stride));
            asm.pcmpeq(this.vectorSize, stride, vector1, vector2);
            asm.pmovmsk(this.vectorSize, tmp, vector1);
            asm.xorlAndJcc(tmp, this.vectorSize == AVXKind.AVXSize.XMM ? 65535 : -1, AMD64Assembler.ConditionFlag.Zero, returnEqualLabel, false);
            asm.align(this.preferredBranchTargetAlignment(crb));
            asm.bind(diffFound);
            this.bsfq(asm, tmp, tmp);
            asm.addq(result, tmp);
            asm.jmp(returnLabel);
        }
        asm.align(this.preferredBranchTargetAlignment(crb));
        asm.bind(tail);
        if (this.supportsAVX2AndYMM()) {
            asm.cmpqAndJcc(tailLength, AVXKind.AVXSize.XMM.getBytes(), AMD64Assembler.ConditionFlag.Less, qwordTail, false);
            asm.movdqu(AVXKind.AVXSize.XMM, vector1, new AMD64Address(arrayA));
            asm.pcmpeq(AVXKind.AVXSize.XMM, stride, vector1, new AMD64Address(arrayB));
            asm.movdqu(AVXKind.AVXSize.XMM, vector2, new AMD64Address(arrayA, tailLength, stride, -AVXKind.AVXSize.XMM.getBytes()));
            asm.pcmpeq(AVXKind.AVXSize.XMM, stride, vector2, new AMD64Address(arrayB, tailLength, stride, -AVXKind.AVXSize.XMM.getBytes()));
            AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vector1, vector2, vector1, 2);
            asm.pmovmsk(AVXKind.AVXSize.YMM, result, vector1);
            asm.xorlAndJcc(result, -1, AMD64Assembler.ConditionFlag.Zero, returnEqualLabel, false);
            this.bsfq(asm, result, result);
            asm.leaq(tmp, new AMD64Address(result, tailLength, Stride.S1, -AVXKind.AVXSize.YMM.getBytes()));
            asm.cmpq(result, AVXKind.AVXSize.XMM.getBytes());
            asm.cmovq(AMD64Assembler.ConditionFlag.Greater, result, tmp);
            asm.jmp(returnLabel);
        }
        asm.bind(qwordTail);
        asm.cmpqAndJcc(tailLength, AVXKind.AVXSize.QWORD.getBytes(), AMD64Assembler.ConditionFlag.Less, dwordTail, true);
        asm.movq(result, new AMD64Address(arrayA));
        asm.xorqAndJcc(result, new AMD64Address(arrayB), AMD64Assembler.ConditionFlag.Zero, qwordTail2, true);
        this.bsfq(asm, result, result);
        asm.shrq(result, 3);
        asm.jmp(returnLabel);
        asm.bind(qwordTail2);
        asm.movq(result, new AMD64Address(arrayA, tailLength, stride, -AVXKind.AVXSize.QWORD.getBytes()));
        asm.xorqAndJcc(result, new AMD64Address(arrayB, tailLength, stride, -AVXKind.AVXSize.QWORD.getBytes()), AMD64Assembler.ConditionFlag.Zero, returnEqualLabel, true);
        this.bsfq(asm, result, result);
        asm.shrl(result, 3);
        asm.leaq(result, new AMD64Address(result, tailLength, Stride.S1, -AVXKind.AVXSize.QWORD.getBytes()));
        asm.jmpb(returnLabel);
        asm.bind(dwordTail);
        asm.cmpqAndJcc(tailLength, AVXKind.AVXSize.DWORD.getBytes(), AMD64Assembler.ConditionFlag.Less, scalarTail, true);
        asm.movl(result, new AMD64Address(arrayA));
        asm.xorlAndJcc(result, new AMD64Address(arrayB), AMD64Assembler.ConditionFlag.Zero, dwordTail2, true);
        this.bsfq(asm, result, result);
        asm.shrl(result, 3);
        asm.jmpb(returnLabel);
        asm.bind(dwordTail2);
        asm.movl(result, new AMD64Address(arrayA, tailLength, stride, -AVXKind.AVXSize.DWORD.getBytes()));
        asm.xorlAndJcc(result, new AMD64Address(arrayB, tailLength, stride, -AVXKind.AVXSize.DWORD.getBytes()), AMD64Assembler.ConditionFlag.Zero, returnEqualLabel, true);
        this.bsfq(asm, result, result);
        asm.shrl(result, 3);
        asm.leaq(result, new AMD64Address(result, tailLength, Stride.S1, -AVXKind.AVXSize.DWORD.getBytes()));
        asm.jmpb(returnLabel);
        asm.bind(scalarTail);
        asm.testqAndJcc(tailLength, tailLength, AMD64Assembler.ConditionFlag.Zero, returnEqualLabel, true);
        asm.bind(scalarLoop);
        asm.movzbl(tmp, new AMD64Address(arrayA, result, stride));
        asm.movzbl(length, new AMD64Address(arrayB, result, stride));
        asm.cmplAndJcc(tmp, length, AMD64Assembler.ConditionFlag.NotEqual, returnLabel, true);
        asm.incl(result);
        asm.decqAndJcc(tailLength, AMD64Assembler.ConditionFlag.NotZero, scalarLoop, true);
        asm.align(this.preferredBranchTargetAlignment(crb));
        asm.bind(returnEqualLabel);
        asm.movq(result, -1L);
        asm.align(this.preferredBranchTargetAlignment(crb));
        asm.bind(returnLabel);
        asm.sarq(result);
    }
}

