/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64BaseAssembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.Assertions;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.ConstantValue;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.amd64.AMD64ComplexVectorOp;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.RegisterValue;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.JavaConstant;
import jdk.vm.ci.meta.Value;

@Opcode(value="AMD64_ARRAY_INDEX_OF")
public final class AMD64ArrayIndexOfOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64ArrayIndexOfOp> TYPE = LIRInstructionClass.create(AMD64ArrayIndexOfOp.class);
    private static final Register REG_ARRAY = AMD64.rsi;
    private static final Register REG_OFFSET = AMD64.rax;
    private static final Register REG_LENGTH = AMD64.rdx;
    private static final Register REG_FROM_INDEX = AMD64.rdi;
    private static final Register REG_SEARCH_VALUE_1 = AMD64.rcx;
    private static final Register REG_SEARCH_VALUE_2 = AMD64.r8;
    private final Stride stride;
    private final int nValues;
    private final LIRGeneratorTool.ArrayIndexOfVariant variant;
    private final boolean findTwoConsecutive;
    private final boolean withMask;
    private final AMD64Kind vectorKind;
    private final Stride arrayIndexStride;
    private final int constOffset;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    Value resultValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value arrayReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value offsetReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value lengthReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value fromIndexReg;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    Value searchValue1;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value arrayTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value offsetTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value lengthTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value fromIndexTmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value searchValue1Tmp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue2Tmp;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue3;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK, LIRInstruction.OperandFlag.ILLEGAL})
    Value searchValue4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorCompareVal;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorArray;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    Value[] vectorTemp;

    private AMD64ArrayIndexOfOp(Stride stride, LIRGeneratorTool.ArrayIndexOfVariant variant, int constOffset, int nValues, LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, Value result, Value arrayPtr, Value arrayOffset, Value arrayLength, Value fromIndex, Value searchValue1, Value searchValue2, Value searchValue3, Value searchValue4) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AVXKind.AVXSize.YMM);
        this.stride = stride;
        this.arrayIndexStride = stride;
        this.variant = variant;
        this.findTwoConsecutive = variant == LIRGeneratorTool.ArrayIndexOfVariant.FindTwoConsecutive || variant == LIRGeneratorTool.ArrayIndexOfVariant.FindTwoConsecutiveWithMask;
        this.withMask = variant == LIRGeneratorTool.ArrayIndexOfVariant.WithMask || variant == LIRGeneratorTool.ArrayIndexOfVariant.FindTwoConsecutiveWithMask;
        this.constOffset = constOffset;
        this.nValues = nValues;
        GraalError.guarantee(AMD64ArrayIndexOfOp.supports(tool.target(), runtimeCheckedCPUFeatures, AMD64.CPUFeature.SSE2, new AMD64.CPUFeature[0]), "needs at least SSE2 support");
        this.resultValue = result;
        this.arrayTmp = this.arrayReg = arrayPtr;
        this.offsetTmp = this.offsetReg = arrayOffset;
        this.lengthTmp = this.lengthReg = arrayLength;
        this.fromIndexTmp = this.fromIndexReg = fromIndex;
        this.searchValue1Tmp = this.searchValue1 = searchValue1;
        if (variant == LIRGeneratorTool.ArrayIndexOfVariant.Table) {
            this.searchValue2Tmp = searchValue2;
            this.searchValue2 = Value.ILLEGAL;
        } else {
            this.searchValue2Tmp = this.searchValue2 = searchValue2;
        }
        this.searchValue3 = searchValue3;
        this.searchValue4 = searchValue4;
        this.vectorKind = this.getVectorKind(stride);
        this.vectorCompareVal = this.allocateVectorRegisters(tool, stride, variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? 2 : nValues);
        this.vectorArray = this.allocateVectorRegisters(tool, stride, variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? stride.value : 4);
        this.vectorTemp = this.allocateVectorRegisters(tool, stride, AMD64ArrayIndexOfOp.getNumberOfRequiredTempVectors(variant, nValues));
    }

    private static int getNumberOfRequiredTempVectors(LIRGeneratorTool.ArrayIndexOfVariant variant, int nValues) {
        switch (variant) {
            case MatchRange: {
                return nValues / 2 + 1;
            }
            case Table: {
                return 4;
            }
        }
        return 0;
    }

    private static Register[] asRegisters(Value[] values) {
        Register[] registers = new Register[values.length];
        for (int i = 0; i < registers.length; ++i) {
            registers[i] = ValueUtil.asRegister((Value)values[i]);
        }
        return registers;
    }

    public static AMD64ArrayIndexOfOp movParamsAndCreate(Stride stride, LIRGeneratorTool.ArrayIndexOfVariant variant, LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, Value result, Value arrayPtr, Value arrayOffset, Value arrayLength, Value fromIndex, Value ... searchValues) {
        int nValues = searchValues.length;
        RegisterValue regArray = REG_ARRAY.asValue(arrayPtr.getValueKind());
        RegisterValue regOffset = REG_OFFSET.asValue(arrayOffset.getValueKind());
        RegisterValue regLength = REG_LENGTH.asValue(arrayLength.getValueKind());
        RegisterValue regFromIndex = REG_FROM_INDEX.asValue(fromIndex.getValueKind());
        RegisterValue regSearchValue1 = REG_SEARCH_VALUE_1.asValue(searchValues[0].getValueKind());
        AllocatableValue regSearchValue2 = nValues > 1 ? REG_SEARCH_VALUE_2.asValue(searchValues[1].getValueKind()) : (variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? REG_SEARCH_VALUE_2.asValue() : Value.ILLEGAL);
        AllocatableValue regSearchValue3 = nValues > 2 ? tool.asAllocatable(searchValues[2]) : Value.ILLEGAL;
        AllocatableValue regSearchValue4 = nValues > 3 ? tool.asAllocatable(searchValues[3]) : Value.ILLEGAL;
        tool.emitConvertNullToZero((AllocatableValue)regArray, arrayPtr);
        tool.emitMove((AllocatableValue)regOffset, arrayOffset);
        tool.emitMove((AllocatableValue)regLength, arrayLength);
        tool.emitMove((AllocatableValue)regFromIndex, fromIndex);
        tool.emitMove((AllocatableValue)regSearchValue1, searchValues[0]);
        if (nValues > 1) {
            tool.emitMove((AllocatableValue)((RegisterValue)regSearchValue2), searchValues[1]);
        }
        int constOffset = AMD64ArrayIndexOfOp.isConstant(arrayOffset) && AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() >= 0L && AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() <= Integer.MAX_VALUE ? (int)AMD64ArrayIndexOfOp.asConstant(arrayOffset).asLong() : -1;
        return new AMD64ArrayIndexOfOp(stride, variant, constOffset, nValues, tool, runtimeCheckedCPUFeatures, result, (Value)regArray, (Value)regOffset, (Value)regLength, (Value)regFromIndex, (Value)regSearchValue1, (Value)regSearchValue2, (Value)regSearchValue3, (Value)regSearchValue4);
    }

    private boolean useConstantOffset() {
        return this.constOffset >= 0;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler asm) {
        Register cmpResult;
        int bulkSize;
        int vectorLength;
        Label bsfAdd;
        Label skipBulkVectorLoop;
        Label elementWiseNotFound;
        Label elementWiseFound;
        Label elementWiseLoop;
        Label elementWise;
        Label qWordWise;
        Label runVectorized;
        Label[] vectorFound;
        Label singleVectorLoop;
        Label bulkVectorLoop;
        Label ret;
        Register[] vecTmp;
        Register[] vecArray;
        Register[] vecCmp;
        Value[] searchValue;
        Register index;
        Register arrayLength;
        Register arrayPtr;
        int nVectors;
        block33: {
            block32: {
                nVectors = this.getNumberOfVectorsInBulkLoop();
                arrayPtr = ValueUtil.asRegister((Value)this.arrayReg);
                arrayLength = ValueUtil.asRegister((Value)this.lengthReg);
                index = ValueUtil.asRegister((Value)this.resultValue);
                searchValue = new Value[]{this.nValues > 0 ? this.searchValue1 : null, this.nValues > 1 ? this.searchValue2 : null, this.nValues > 2 ? this.searchValue3 : null, this.nValues > 3 ? this.searchValue4 : null};
                vecCmp = AMD64ArrayIndexOfOp.asRegisters(this.vectorCompareVal);
                vecArray = AMD64ArrayIndexOfOp.asRegisters(this.vectorArray);
                vecTmp = AMD64ArrayIndexOfOp.asRegisters(this.vectorTemp);
                ret = new Label();
                bulkVectorLoop = new Label();
                singleVectorLoop = new Label();
                vectorFound = new Label[]{new Label(), new Label(), new Label(), new Label()};
                runVectorized = new Label();
                qWordWise = new Label();
                elementWise = new Label();
                elementWiseLoop = new Label();
                elementWiseFound = new Label();
                elementWiseNotFound = new Label();
                skipBulkVectorLoop = new Label();
                bsfAdd = new Label();
                vectorLength = this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? this.vectorKind.getSizeInBytes() : this.vectorKind.getVectorLength();
                bulkSize = vectorLength * nVectors;
                if (this.useConstantOffset()) {
                    asm.leaq(arrayPtr, new AMD64Address(arrayPtr, this.constOffset));
                } else {
                    asm.leaq(arrayPtr, new AMD64Address(arrayPtr, ValueUtil.asRegister((Value)this.offsetReg), Stride.S1));
                }
                asm.leaq(index, new AMD64Address(ValueUtil.asRegister((Value)this.fromIndexReg), vectorLength + (this.findTwoConsecutive ? 1 : 0)));
                cmpResult = ValueUtil.asRegister((Value)this.fromIndexReg);
                if (this.variant != LIRGeneratorTool.ArrayIndexOfVariant.Table) break block32;
                asm.movdqu(AVXKind.AVXSize.XMM, vecCmp[0], new AMD64Address(ValueUtil.asRegister((Value)searchValue[0])));
                asm.movdqu(AVXKind.AVXSize.XMM, vecCmp[1], new AMD64Address(ValueUtil.asRegister((Value)searchValue[0]), AVXKind.AVXSize.XMM.getBytes()));
                this.loadMask(crb, asm, Stride.S1, vecTmp[0], 15);
                if (this.vectorSize != AVXKind.AVXSize.YMM) break block33;
                AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vecCmp[0], vecCmp[0], vecCmp[0], 0);
                AMD64Assembler.VexRVMIOp.VPERM2I128.emit((AMD64Assembler)asm, AVXKind.AVXSize.YMM, vecCmp[1], vecCmp[1], vecCmp[1], 0);
                if (this.stride != Stride.S4) break block33;
                this.loadMask(crb, asm, vecTmp[3], AMD64ArrayIndexOfOp.getAVX2IntToBytePackingUnscrambleMap());
                break block33;
            }
            for (int i = 0; i < this.nValues; ++i) {
                this.broadcastSearchValue(crb, asm, vecCmp[i], searchValue[i], cmpResult, vecArray[0]);
            }
        }
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, runVectorized, false);
        if (this.supportsAVX2AndYMM()) {
            Label[] xmmFound = new Label[]{new Label()};
            asm.subq(index, vectorLength / 2);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? elementWise : qWordWise, false);
            this.emitVectorCompare(asm, AVXKind.AVXSize.XMM, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, xmmFound, this.variant != LIRGeneratorTool.ArrayIndexOfVariant.Table);
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, AVXKind.AVXSize.XMM, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, xmmFound, true);
            asm.jmp(elementWiseNotFound);
            asm.bind(xmmFound[0]);
            asm.subq(index, vectorLength / 2 + (this.findTwoConsecutive ? 1 : 0));
            asm.jmp(bsfAdd);
        }
        int vectorLengthQWord = AVXKind.AVXSize.QWORD.getBytes() / this.stride.value;
        if (this.variant != LIRGeneratorTool.ArrayIndexOfVariant.Table) {
            asm.bind(qWordWise);
            Label[] qWordFound = new Label[]{new Label()};
            asm.subq(index, vectorLengthQWord);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, elementWise, false);
            this.emitVectorCompare(asm, AVXKind.AVXSize.QWORD, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, qWordFound, true);
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, AVXKind.AVXSize.QWORD, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, qWordFound, true);
            asm.jmpb(elementWiseNotFound);
            asm.bind(qWordFound[0]);
            asm.subq(index, vectorLengthQWord + (this.findTwoConsecutive ? 1 : 0));
            asm.jmp(bsfAdd);
        }
        asm.bind(elementWise);
        asm.subq(index, this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? (this.supportsAVX2AndYMM() ? vectorLength / 2 : vectorLength) : vectorLengthQWord);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.GreaterEqual, elementWiseNotFound, true);
        AMD64BaseAssembler.OperandSize valueSize = AMD64ArrayIndexOfOp.getOpSize(this.stride);
        if (this.findTwoConsecutive) {
            asm.shlq(ValueUtil.asRegister((Value)searchValue[1]), this.stride.getBitCount());
            asm.orq(ValueUtil.asRegister((Value)searchValue[0]), ValueUtil.asRegister((Value)searchValue[1]));
            if (this.withMask) {
                if (ValueUtil.isStackSlot((Value)searchValue[3])) {
                    asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, ValueUtil.asRegister((Value)searchValue[1]), (AMD64Address)crb.asAddress(searchValue[3]));
                } else {
                    asm.movq(ValueUtil.asRegister((Value)searchValue[1]), ValueUtil.asRegister((Value)searchValue[3]));
                }
                asm.shlq(ValueUtil.asRegister((Value)searchValue[1]), this.stride.getBitCount());
                if (ValueUtil.isStackSlot((Value)searchValue[2])) {
                    asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, (AMD64Address)crb.asAddress(searchValue[2]));
                    asm.orq(ValueUtil.asRegister((Value)searchValue[1]), cmpResult);
                } else {
                    asm.orq(ValueUtil.asRegister((Value)searchValue[1]), ValueUtil.asRegister((Value)searchValue[2]));
                }
            }
        }
        asm.bind(elementWiseLoop);
        boolean valuesOnStack = this.searchValuesOnStack(searchValue);
        AMD64Address arrayAddr = new AMD64Address(arrayPtr, index, this.arrayIndexStride, this.findTwoConsecutive ? -this.stride.value : 0);
        switch (this.variant) {
            case MatchAny: {
                asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                for (int i = 0; i < this.nValues; ++i) {
                    AMD64ArrayIndexOfOp.cmpqAndJcc(crb, asm, cmpResult, searchValue[i], elementWiseFound, AMD64Assembler.ConditionFlag.Equal, true);
                }
                break;
            }
            case MatchRange: {
                asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                for (int i = 0; i < this.nValues; i += 2) {
                    Label noMatch = new Label();
                    AMD64ArrayIndexOfOp.cmpqAndJcc(crb, asm, cmpResult, searchValue[i], noMatch, AMD64Assembler.ConditionFlag.Below, true);
                    AMD64ArrayIndexOfOp.cmpqAndJcc(crb, asm, cmpResult, searchValue[i + 1], elementWiseFound, AMD64Assembler.ConditionFlag.BelowEqual, true);
                    asm.bind(noMatch);
                }
                break;
            }
            case WithMask: {
                assert (!valuesOnStack);
                assert (this.nValues == 2) : this.nValues;
                asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                asm.orq(cmpResult, ValueUtil.asRegister((Value)searchValue[1]));
                asm.cmpqAndJcc(cmpResult, ValueUtil.asRegister((Value)searchValue[0]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                break;
            }
            case FindTwoConsecutive: {
                asm.cmpAndJcc(AMD64ArrayIndexOfOp.getDoubleOpSize(this.stride), ValueUtil.asRegister((Value)searchValue[0]), arrayAddr, AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                break;
            }
            case FindTwoConsecutiveWithMask: {
                asm.movSZx(AMD64ArrayIndexOfOp.getDoubleOpSize(this.stride), AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                asm.orq(cmpResult, ValueUtil.asRegister((Value)searchValue[1]));
                asm.cmpqAndJcc(cmpResult, ValueUtil.asRegister((Value)searchValue[0]), AMD64Assembler.ConditionFlag.Equal, elementWiseFound, true);
                break;
            }
            case Table: {
                Label greaterThan0xff = new Label();
                Register tmp = ValueUtil.asRegister((Value)this.searchValue2Tmp);
                asm.movSZx(valueSize, AMD64MacroAssembler.ExtendMode.ZERO_EXTEND, cmpResult, arrayAddr);
                if (this.stride.value > 1) {
                    asm.cmpqAndJcc(cmpResult, 255, AMD64Assembler.ConditionFlag.Above, greaterThan0xff, true);
                }
                asm.movq(tmp, cmpResult);
                asm.sarq(cmpResult, 4);
                asm.andq(tmp, 15);
                asm.movzbq(cmpResult, new AMD64Address(ValueUtil.asRegister((Value)searchValue[0]), cmpResult, Stride.S1, 0));
                asm.movzbq(tmp, new AMD64Address(ValueUtil.asRegister((Value)searchValue[0]), tmp, Stride.S1, 16));
                asm.andqAndJcc(cmpResult, tmp, AMD64Assembler.ConditionFlag.NotZero, elementWiseFound, true);
                asm.bind(greaterThan0xff);
            }
        }
        asm.incrementq(index, 1);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, elementWiseLoop, true);
        asm.bind(elementWiseNotFound);
        asm.xorq(index, index);
        if (this.findTwoConsecutive) {
            asm.bind(elementWiseFound);
            asm.decrementq(index, 1);
        } else {
            asm.decrementq(index, 1);
            asm.bind(elementWiseFound);
        }
        asm.jmp(ret);
        asm.bind(runVectorized);
        this.emitVectorCompare(asm, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, vectorFound, false);
        asm.movl(cmpResult, arrayPtr);
        if (this.stride.value > 1) {
            asm.shrl(cmpResult, this.stride.log2);
        }
        asm.addq(index, cmpResult);
        asm.andq(index, -vectorLength);
        asm.subq(index, cmpResult);
        asm.addq(index, bulkSize);
        boolean bulkLoopShortJmp = (this.variant != LIRGeneratorTool.ArrayIndexOfVariant.MatchRange || this.nValues != 4) && this.variant != LIRGeneratorTool.ArrayIndexOfVariant.Table || this.stride.value <= 1;
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Greater, skipBulkVectorLoop, false);
        asm.align(this.preferredLoopAlignment(crb));
        asm.bind(bulkVectorLoop);
        this.emitVectorCompare(asm, this.vectorSize, nVectors, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, vectorFound, false);
        asm.addq(index, bulkSize);
        asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.LessEqual, bulkVectorLoop, bulkLoopShortJmp);
        asm.bind(skipBulkVectorLoop);
        if (nVectors == 1) {
            asm.movq(index, arrayLength);
            this.emitVectorCompare(asm, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, vectorFound, true);
        } else {
            asm.subq(index, bulkSize);
            asm.align(this.preferredLoopAlignment(crb));
            asm.bind(singleVectorLoop);
            asm.addq(index, vectorLength);
            asm.cmpq(index, arrayLength);
            asm.cmovq(AMD64Assembler.ConditionFlag.Greater, index, arrayLength);
            this.emitVectorCompare(asm, this.vectorSize, 1, arrayPtr, index, vecCmp, vecArray, vecTmp, cmpResult, vectorFound, true);
            asm.cmpqAndJcc(index, arrayLength, AMD64Assembler.ConditionFlag.Less, singleVectorLoop, true);
        }
        asm.movl(index, -1);
        asm.jmpb(ret);
        for (int i = 0; i < nVectors; ++i) {
            asm.bind(vectorFound[i]);
            asm.subq(index, this.getResultIndexDelta(i));
            if (i >= nVectors - 1) continue;
            asm.jmpb(bsfAdd);
        }
        asm.bind(bsfAdd);
        if (this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table) {
            asm.pxor(this.vectorSize, vecTmp[1], vecTmp[1]);
            asm.pcmpeqb(this.vectorSize, vecTmp[2], vecTmp[1]);
            asm.pmovmsk(this.vectorSize, cmpResult, vecTmp[2]);
            asm.notq(cmpResult);
        }
        this.bsfq(asm, cmpResult, cmpResult);
        if (this.stride.value > 1 && this.variant != LIRGeneratorTool.ArrayIndexOfVariant.Table) {
            asm.shrq(cmpResult, this.stride.log2);
        }
        asm.addq(index, cmpResult);
        asm.bind(ret);
    }

    private int getNumberOfVectorsInBulkLoop() {
        switch (this.variant) {
            case MatchAny: {
                return this.nValues == 1 ? 4 : (this.nValues == 2 ? 2 : 1);
            }
            case FindTwoConsecutive: {
                return 2;
            }
        }
        return 1;
    }

    private static void cmpqAndJcc(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register src1, Value src2, Label branchTarget, AMD64Assembler.ConditionFlag cc, boolean isShortJmp) {
        if (ValueUtil.isStackSlot((Value)src2)) {
            asm.cmpqAndJcc(src1, (AMD64Address)crb.asAddress(src2), cc, branchTarget, isShortJmp);
        } else {
            asm.cmpqAndJcc(src1, ValueUtil.asRegister((Value)src2), cc, branchTarget, isShortJmp);
        }
    }

    private boolean searchValuesOnStack(Value[] searchValue) {
        for (int i = 0; i < this.nValues; ++i) {
            if (!ValueUtil.isStackSlot((Value)searchValue[i])) continue;
            return true;
        }
        return false;
    }

    private int getResultIndexDelta(int i) {
        if (this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table) {
            return this.vectorSize.getBytes();
        }
        return (i + 1) * this.vectorKind.getVectorLength() + (this.findTwoConsecutive ? 1 : 0);
    }

    private int getVectorOffset(int i, int j, AVXKind.AVXSize targetVectorSize) {
        if (this.findTwoConsecutive) {
            return -((i + 1) * targetVectorSize.getBytes() + (j ^ 1) * this.stride.value);
        }
        return -((i + 1) * targetVectorSize.getBytes());
    }

    private void broadcastSearchValue(CompilationResultBuilder crb, AMD64MacroAssembler asm, Register dst, Value srcVal, Register tmpReg, Register tmpVector) {
        Register src = AMD64ArrayIndexOfOp.asRegOrTmpReg(crb, asm, srcVal, tmpReg);
        asm.movdl(dst, src);
        AMD64ArrayIndexOfOp.emitBroadcast(asm, this.stride, dst, tmpVector, this.vectorSize);
    }

    private static boolean isConstant(Value val) {
        assert (!(val instanceof ConstantValue) || ((ConstantValue)val).isJavaConstant());
        return val instanceof ConstantValue;
    }

    private static JavaConstant asConstant(Value val) {
        return ((ConstantValue)val).getJavaConstant();
    }

    private static Register asRegOrTmpReg(CompilationResultBuilder crb, AMD64MacroAssembler asm, Value val, Register tmpReg) {
        if (ValueUtil.isRegister((Value)val)) {
            return ValueUtil.asRegister((Value)val);
        }
        if (ValueUtil.isStackSlot((Value)val)) {
            asm.movl(tmpReg, (AMD64Address)crb.asAddress(val));
            return tmpReg;
        }
        assert (AMD64ArrayIndexOfOp.isConstant(val));
        asm.movl(tmpReg, AMD64ArrayIndexOfOp.asConstant(val).asInt());
        return tmpReg;
    }

    private static void emitBroadcast(AMD64MacroAssembler asm, Stride stride, Register vecDst, Register vecTmp, AVXKind.AVXSize targetVectorSize) {
        switch (stride) {
            case S1: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTB.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRVMOp.VPXOR.emit((AMD64Assembler)asm, targetVectorSize, vecTmp, vecTmp, vecTmp);
                    AMD64Assembler.VexRVMOp.VPSHUFB.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, vecTmp);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.SSSE3)) {
                    asm.pxor(vecTmp, vecTmp);
                    asm.pshufb(vecDst, vecTmp);
                    break;
                }
                asm.punpcklbw(vecDst, vecDst);
                asm.punpcklbw(vecDst, vecDst);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case S2: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTW.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFLW.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshuflw(vecDst, vecDst, 0);
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            case S4: {
                if (asm.supports(AMD64.CPUFeature.AVX2)) {
                    AMD64Assembler.VexRMOp.VPBROADCASTD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst);
                    break;
                }
                if (asm.supports(AMD64.CPUFeature.AVX)) {
                    AMD64Assembler.VexRMIOp.VPSHUFD.emit((AMD64Assembler)asm, targetVectorSize, vecDst, vecDst, 0);
                    break;
                }
                asm.pshufd(vecDst, vecDst, 0);
                break;
            }
            default: {
                throw new UnsupportedOperationException();
            }
        }
    }

    private void emitVectorCompare(AMD64MacroAssembler asm, AVXKind.AVXSize vSize, int nVectors, Register arrayPtr, Register index, Register[] vecCmp, Register[] vecArray, Register[] vecTmp, Register cmpResult, Label[] vectorFound, boolean shortJmp) {
        int j;
        int base;
        int i;
        int nVectorLoads = this.variant == LIRGeneratorTool.ArrayIndexOfVariant.Table ? this.stride.value : nVectors;
        for (i = 0; i < nVectorLoads; ++i) {
            base = i * this.nValues;
            for (j = 0; j < (this.withMask || this.variant == LIRGeneratorTool.ArrayIndexOfVariant.MatchRange ? this.nValues / 2 : this.nValues); ++j) {
                this.emitArrayLoad(asm, vSize, vecArray[base + j], arrayPtr, index, this.getVectorOffset(nVectorLoads - (i + 1), j, vSize));
            }
        }
        switch (this.variant) {
            case MatchAny: {
                for (i = 0; i < nVectors; ++i) {
                    base = i * this.nValues;
                    for (j = 0; j < this.nValues; ++j) {
                        asm.pcmpeq(vSize, this.stride, vecArray[base + j], vecCmp[j]);
                        if ((j & 1) != 1) continue;
                        asm.por(vSize, vecArray[base + j - 1], vecArray[base + j]);
                    }
                    if (this.nValues > 2) {
                        asm.por(vSize, vecArray[base], vecArray[base + 2]);
                    }
                    asm.pmovmsk(vSize, cmpResult, vecArray[base]);
                    AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, vSize, cmpResult, vectorFound[nVectors - (i + 1)], shortJmp);
                }
                break;
            }
            case MatchRange: {
                assert (nVectors == 1) : nVectors;
                if (this.nValues == 2) {
                    asm.pminu(vSize, this.stride, vecTmp[0], vecCmp[0], vecArray[0]);
                    asm.pminu(vSize, this.stride, vecTmp[1], vecArray[0], vecCmp[1]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[0], vecCmp[0]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[1], vecArray[0]);
                    asm.pand(vSize, vecTmp[0], vecTmp[1]);
                } else {
                    assert (this.nValues == 4) : this.nValues;
                    asm.pminu(vSize, this.stride, vecTmp[0], vecCmp[0], vecArray[0]);
                    asm.pminu(vSize, this.stride, vecTmp[1], vecArray[0], vecCmp[1]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[0], vecCmp[0]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[1], vecArray[0]);
                    asm.pand(vSize, vecTmp[0], vecTmp[1]);
                    asm.pminu(vSize, this.stride, vecTmp[1], vecCmp[2], vecArray[0]);
                    asm.pminu(vSize, this.stride, vecTmp[2], vecArray[0], vecCmp[3]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[1], vecCmp[2]);
                    asm.pcmpeq(vSize, this.stride, vecTmp[2], vecArray[0]);
                    asm.pand(vSize, vecTmp[1], vecTmp[2]);
                    asm.por(vSize, vecTmp[0], vecTmp[1]);
                }
                asm.pmovmsk(vSize, cmpResult, vecTmp[0]);
                AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, vSize, cmpResult, vectorFound[0], shortJmp);
                break;
            }
            case WithMask: {
                assert (this.nValues == 2 && nVectors == 1) : Assertions.errorMessage(this.nValues, nVectors);
                asm.por(vSize, vecArray[0], vecCmp[1]);
                asm.pcmpeq(vSize, this.stride, vecArray[0], vecCmp[0]);
                asm.pmovmsk(vSize, cmpResult, vecArray[0]);
                AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, vSize, cmpResult, vectorFound[0], shortJmp);
                break;
            }
            case FindTwoConsecutive: {
                for (i = 0; i < nVectors << 1; i += 2) {
                    asm.pcmpeq(vSize, this.stride, vecArray[i], vecCmp[0]);
                    asm.pcmpeq(vSize, this.stride, vecArray[i + 1], vecCmp[1]);
                    asm.pand(vSize, vecArray[i], vecArray[i + 1]);
                    asm.pmovmsk(vSize, cmpResult, vecArray[i]);
                    AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, vSize, cmpResult, vectorFound[nVectors - (i / 2 + 1)], shortJmp);
                }
                break;
            }
            case FindTwoConsecutiveWithMask: {
                for (i = 0; i < nVectors << 1; i += 2) {
                    asm.por(vSize, vecArray[i], vecCmp[2]);
                    asm.por(vSize, vecArray[i + 1], vecCmp[3]);
                    asm.pcmpeq(vSize, this.stride, vecArray[i], vecCmp[0]);
                    asm.pcmpeq(vSize, this.stride, vecArray[i + 1], vecCmp[1]);
                    asm.pand(vSize, vecArray[i], vecArray[i + 1]);
                    asm.pmovmsk(vSize, cmpResult, vecArray[i]);
                    AMD64ArrayIndexOfOp.emitVectorCompareCheckVectorFound(asm, vSize, cmpResult, vectorFound[nVectors - (i / 2 + 1)], shortJmp);
                }
                break;
            }
            case Table: {
                Register mask0xf = vecTmp[0];
                Register tableHi = vecCmp[0];
                Register tableLo = vecCmp[1];
                switch (this.stride) {
                    case S1: {
                        AMD64ArrayIndexOfOp.performTableLookup(asm, vSize, mask0xf, tableHi, tableLo, vecArray[0], vecTmp[1], vecTmp[2], vecTmp[3]);
                        break;
                    }
                    case S2: {
                        asm.packuswb(vSize, vecTmp[1], vecArray[0], vecArray[1]);
                        asm.psrlw(vSize, vecArray[0], vecArray[0], 8);
                        asm.psrlw(vSize, vecArray[1], vecArray[1], 8);
                        asm.pxor(vSize, vecTmp[2], vecTmp[2]);
                        asm.packuswb(vSize, vecArray[0], vecArray[1]);
                        asm.pcmpeqb(vSize, vecArray[0], vecTmp[2]);
                        AMD64ArrayIndexOfOp.performTableLookup(asm, vSize, mask0xf, tableHi, tableLo, vecTmp[1], vecArray[1], vecTmp[2], vecTmp[3]);
                        asm.pand(vSize, vecTmp[2], vecArray[0]);
                        if (vSize != AVXKind.AVXSize.YMM) break;
                        AMD64Assembler.VexRMIOp.VPERMQ.emit((AMD64Assembler)asm, vSize, vecTmp[2], vecTmp[2], 216);
                        break;
                    }
                    case S4: {
                        asm.packusdw(vSize, vecTmp[1], vecArray[0], vecArray[1]);
                        asm.packusdw(vSize, vecTmp[2], vecArray[2], vecArray[3]);
                        asm.psrld(vSize, vecArray[0], vecArray[0], 8);
                        asm.psrld(vSize, vecArray[1], vecArray[1], 8);
                        asm.psrld(vSize, vecArray[2], vecArray[2], 8);
                        asm.psrld(vSize, vecArray[3], vecArray[3], 8);
                        asm.packuswb(vSize, vecTmp[1], vecTmp[2]);
                        asm.packusdw(vSize, vecArray[0], vecArray[1]);
                        asm.packusdw(vSize, vecArray[2], vecArray[3]);
                        asm.pxor(vSize, vecTmp[2], vecTmp[2]);
                        asm.packuswb(vSize, vecArray[0], vecArray[2]);
                        asm.pcmpeqb(vSize, vecArray[0], vecTmp[2]);
                        AMD64ArrayIndexOfOp.performTableLookup(asm, vSize, mask0xf, tableHi, tableLo, vecTmp[1], vecArray[1], vecTmp[2], vecArray[2]);
                        asm.pand(vSize, vecTmp[2], vecArray[0]);
                        if (vSize != AVXKind.AVXSize.YMM) break;
                        AMD64Assembler.VexRVMOp.VPERMD.emit((AMD64Assembler)asm, vSize, vecTmp[2], vecTmp[3], vecTmp[2]);
                        break;
                    }
                    default: {
                        throw GraalError.shouldNotReachHereUnexpectedValue((Object)this.stride);
                    }
                }
                asm.ptest(vSize, vecTmp[2], vecTmp[2]);
                asm.jcc(AMD64Assembler.ConditionFlag.NotZero, vectorFound[0], shortJmp);
            }
        }
    }

    private static void performTableLookup(AMD64MacroAssembler asm, AVXKind.AVXSize vSize, Register vMask0xf, Register vTableHi, Register vTableLo, Register vArray, Register vTmp1, Register vTmp2, Register vTmp3) {
        asm.psrlw(vSize, vTmp1, vArray, 4);
        asm.pand(vSize, vArray, vMask0xf);
        asm.pand(vSize, vTmp1, vMask0xf);
        asm.pshufb(vSize, vTmp2, vTableHi, vTmp1);
        asm.pshufb(vSize, vTmp3, vTableLo, vArray);
        asm.pand(vSize, vTmp2, vTmp3);
    }

    private static void emitVectorCompareCheckVectorFound(AMD64MacroAssembler asm, AVXKind.AVXSize targetVectorSize, Register cmpResult, Label branchTarget, boolean shortJmp) {
        switch (targetVectorSize) {
            case DWORD: {
                asm.andlAndJcc(cmpResult, 15, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case QWORD: {
                asm.andlAndJcc(cmpResult, 255, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case XMM: 
            case YMM: {
                asm.testlAndJcc(cmpResult, cmpResult, AMD64Assembler.ConditionFlag.NotZero, branchTarget, shortJmp);
                break;
            }
            case ZMM: {
                throw GraalError.shouldNotReachHereUnexpectedValue(targetVectorSize);
            }
        }
    }

    private void emitArrayLoad(AMD64MacroAssembler asm, AVXKind.AVXSize targetVectorSize, Register vecDst, Register array, Register index, int displacement) {
        AMD64Address src = new AMD64Address(array, index, this.arrayIndexStride, displacement);
        if (asm.supports(AMD64.CPUFeature.AVX)) {
            switch (targetVectorSize) {
                case DWORD: {
                    AMD64Assembler.VexMoveOp.VMOVD.emit((AMD64Assembler)asm, AVXKind.AVXSize.XMM, vecDst, src);
                    break;
                }
                case QWORD: {
                    AMD64Assembler.VexMoveOp.VMOVQ.emit((AMD64Assembler)asm, AVXKind.AVXSize.XMM, vecDst, src);
                    break;
                }
                case XMM: 
                case YMM: {
                    AMD64Assembler.VexMoveOp.VMOVDQU32.emit((AMD64Assembler)asm, targetVectorSize, vecDst, src);
                    break;
                }
                case ZMM: {
                    AMD64Assembler.VexMoveOp.VMOVDQU64.emit((AMD64Assembler)asm, targetVectorSize, vecDst, src);
                }
            }
        } else {
            switch (targetVectorSize) {
                case DWORD: {
                    asm.movdl(vecDst, src);
                    break;
                }
                case QWORD: {
                    asm.movdq(vecDst, src);
                    break;
                }
                case XMM: 
                case YMM: {
                    asm.movdqu(vecDst, src);
                    break;
                }
                case ZMM: {
                    throw GraalError.shouldNotReachHereUnexpectedValue(targetVectorSize);
                }
            }
        }
    }

    private static AMD64BaseAssembler.OperandSize getOpSize(Stride stride) {
        switch (stride) {
            case S1: {
                return AMD64BaseAssembler.OperandSize.BYTE;
            }
            case S2: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case S4: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
        }
        return AMD64BaseAssembler.OperandSize.QWORD;
    }

    private static AMD64BaseAssembler.OperandSize getDoubleOpSize(Stride stride) {
        switch (stride) {
            case S1: {
                return AMD64BaseAssembler.OperandSize.WORD;
            }
            case S2: {
                return AMD64BaseAssembler.OperandSize.DWORD;
            }
        }
        assert (stride == Stride.S4) : stride;
        return AMD64BaseAssembler.OperandSize.QWORD;
    }
}

