/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64.vector;

import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.debug.Assertions;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
import jdk.graal.compiler.lir.amd64.vector.AVX512Support;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

public class AMD64VectorShuffle {

    public static final class ShuffleIntegerLanesOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ShuffleIntegerLanesOp> TYPE = LIRInstructionClass.create(ShuffleIntegerLanesOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vector;
        private final int selector;

        public ShuffleIntegerLanesOp(AllocatableValue result, AllocatableValue vector, int selector) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.vector = vector;
            this.selector = selector;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AVXKind.AVXSize avxSize = AVXKind.getRegisterSize((Value)this.vector);
            assert (avxSize.getBytes() >= AVXKind.AVXSize.YMM.getBytes()) : "expected size is YMM or ZMM, got " + String.valueOf(avxSize);
            AMD64Assembler.VexRVMIOp.EVSHUFI64X2.emit((AMD64Assembler)masm, AVXKind.getRegisterSize((Value)this.vector), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector), ValueUtil.asRegister((Value)this.vector), this.selector);
        }
    }

    public static final class InsertOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<InsertOp> TYPE = LIRInstructionClass.create(InsertOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vec;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue val;
        private final int offset;
        AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public InsertOp(AllocatableValue result, AllocatableValue vec, AllocatableValue val, int offset, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.vec = vec;
            this.val = val;
            this.offset = offset;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            block20: {
                int valBits;
                int bitOffset;
                block19: {
                    AMD64Kind vecKind = (AMD64Kind)this.vec.getPlatformKind();
                    AMD64Kind valKind = (AMD64Kind)this.val.getPlatformKind();
                    GraalError.guarantee(vecKind.getScalar() == valKind.getScalar() || vecKind.getScalar().getSizeInBytes() < 4 && valKind == AMD64Kind.DWORD, "element types must match %s, %s", (Object)vecKind, (Object)valKind);
                    AMD64Kind elementKind = vecKind.getScalar();
                    AVXKind.AVXSize size = AVXKind.getRegisterSize((Value)this.vec);
                    bitOffset = this.offset * elementKind.getSizeInBytes() * 8;
                    valBits = elementKind.getSizeInBytes() * valKind.getVectorLength() * 8;
                    GraalError.guarantee(vecKind.getSizeInBytes() <= 16 || valKind.getSizeInBytes() >= 16, "must be insertable");
                    if (valKind.isInteger()) {
                        AMD64Assembler.VexRVMIOp op = switch (valBits) {
                            case 8 -> AMD64Assembler.VexRVMIOp.VPINSRB;
                            case 16 -> AMD64Assembler.VexRVMIOp.VPINSRW;
                            case 32 -> AMD64Assembler.VexRVMIOp.VPINSRD;
                            case 64 -> AMD64Assembler.VexRVMIOp.VPINSRQ;
                            default -> throw GraalError.shouldNotReachHereUnexpectedValue(valKind);
                        };
                        InsertOp.emitHelper(crb, op.encoding(this.encoding), masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), this.val, this.offset);
                        return;
                    }
                    if (valBits == 128) {
                        AMD64Assembler.VexRVMIOp op = vecKind.getScalar().isInteger() && masm.supports(AMD64.CPUFeature.AVX2) ? AMD64Assembler.VexRVMIOp.VINSERTI128 : AMD64Assembler.VexRVMIOp.VINSERTF128;
                        InsertOp.emitHelper(crb, op.encoding(this.encoding), masm, size, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), this.val, bitOffset / 128);
                        return;
                    }
                    if (valBits == 256) {
                        AMD64Assembler.VexRVMIOp op = vecKind.getScalar().isInteger() ? AMD64Assembler.VexRVMIOp.EVINSERTI64X4 : AMD64Assembler.VexRVMIOp.EVINSERTF64X4;
                        InsertOp.emitHelper(crb, op, masm, size, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), this.val, bitOffset / 256);
                        return;
                    }
                    if (!ValueUtil.isRegister((Value)this.val)) break block19;
                    switch (valBits) {
                        case 32: {
                            if (bitOffset == 0) {
                                AMD64Assembler.VexRVMOp.VMOVSS.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), ValueUtil.asRegister((Value)this.val));
                            } else {
                                AMD64Assembler.VexRVMIOp.VINSERTPS.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), ValueUtil.asRegister((Value)this.val), bitOffset >>> 1);
                            }
                            break block20;
                        }
                        case 64: {
                            AMD64Assembler.VexRVMOp op = bitOffset == 0 ? AMD64Assembler.VexRVMOp.VMOVSD : AMD64Assembler.VexRVMOp.VMOVLHPS;
                            op.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), ValueUtil.asRegister((Value)this.val));
                            break block20;
                        }
                        default: {
                            throw GraalError.shouldNotReachHereUnexpectedValue(valKind);
                        }
                    }
                }
                AMD64Address addr = (AMD64Address)crb.asAddress((Value)this.val);
                switch (valBits) {
                    case 32: {
                        AMD64Assembler.VexRVMIOp.VINSERTPS.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), addr, bitOffset >>> 1);
                        break;
                    }
                    case 64: {
                        AMD64Assembler.VexRVMOp op = bitOffset == 0 ? AMD64Assembler.VexRVMOp.VMOVLPD : AMD64Assembler.VexRVMOp.VMOVHPD;
                        op.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vec), addr);
                    }
                }
            }
        }

        private static void emitHelper(CompilationResultBuilder crb, AMD64Assembler.VexRVMIOp op, AMD64MacroAssembler masm, AVXKind.AVXSize size, Register dst, Register nds, AllocatableValue src, int imm8) {
            if (ValueUtil.isRegister((Value)src)) {
                op.emit((AMD64Assembler)masm, size, dst, nds, ValueUtil.asRegister((Value)src), imm8);
            } else {
                op.emit((AMD64Assembler)masm, size, dst, nds, (AMD64Address)crb.asAddress((Value)src), imm8);
            }
        }
    }

    public static final class ExtractLongOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ExtractLongOp> TYPE = LIRInstructionClass.create(ExtractLongOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vector;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ExtractLongOp(AllocatableValue result, AllocatableValue vector, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (result.getPlatformKind() == AMD64Kind.QWORD) : result;
            assert (((AMD64Kind)vector.getPlatformKind()).getScalar() == AMD64Kind.QWORD) : vector;
            this.result = result;
            this.vector = vector;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            if (ValueUtil.isRegister((Value)this.result)) {
                if (this.selector == 0) {
                    AMD64Assembler.VexMoveOp.VMOVQ.encoding(this.encoding).emitReverse(masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector));
                } else {
                    AMD64Assembler.VexMRIOp.VPEXTRQ.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
                }
            } else {
                assert (ValueUtil.isStackSlot((Value)this.result));
                if (this.selector == 0) {
                    AMD64Assembler.VexMoveOp.VMOVQ.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.vector));
                } else {
                    AMD64Assembler.VexMRIOp.VPEXTRQ.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
                }
            }
        }
    }

    public static final class ExtractIntOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ExtractIntOp> TYPE = LIRInstructionClass.create(ExtractIntOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vector;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ExtractIntOp(AllocatableValue result, AllocatableValue vector, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (result.getPlatformKind() == AMD64Kind.DWORD) : result;
            assert (((AMD64Kind)vector.getPlatformKind()).getScalar() == AMD64Kind.DWORD) : vector;
            this.result = result;
            this.vector = vector;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            if (ValueUtil.isRegister((Value)this.result)) {
                if (this.selector == 0) {
                    AMD64Assembler.VexMoveOp.VMOVD.encoding(this.encoding).emitReverse(masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector));
                } else {
                    AMD64Assembler.VexMRIOp.VPEXTRD.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
                }
            } else {
                assert (ValueUtil.isStackSlot((Value)this.result));
                if (this.selector == 0) {
                    AMD64Assembler.VexMoveOp.VMOVD.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.vector));
                } else {
                    AMD64Assembler.VexMRIOp.VPEXTRD.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
                }
            }
        }
    }

    public static final class ExtractShortOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ExtractShortOp> TYPE = LIRInstructionClass.create(ExtractShortOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vector;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ExtractShortOp(AllocatableValue result, AllocatableValue vector, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (result.getPlatformKind() == AMD64Kind.DWORD) : result;
            assert (((AMD64Kind)vector.getPlatformKind()).getScalar() == AMD64Kind.WORD) : vector;
            this.result = result;
            this.vector = vector;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexMRIOp.VPEXTRW.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
        }
    }

    public static final class ExtractByteOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ExtractByteOp> TYPE = LIRInstructionClass.create(ExtractByteOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue vector;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ExtractByteOp(AllocatableValue result, AllocatableValue vector, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (result.getPlatformKind() == AMD64Kind.DWORD) : result;
            assert (((AMD64Kind)vector.getPlatformKind()).getScalar() == AMD64Kind.BYTE) : Assertions.errorMessage(vector);
            this.result = result;
            this.vector = vector;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexMRIOp.VPEXTRB.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.vector), this.selector);
        }
    }

    public static final class Insert256Op
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<Insert256Op> TYPE = LIRInstructionClass.create(Insert256Op.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source1;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue source2;
        private final int selector;

        public Insert256Op(AllocatableValue result, AllocatableValue source1, AllocatableValue source2, int selector) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source1 = source1;
            this.source2 = source2;
            this.selector = selector;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexRVMIOp op;
            AMD64Kind kind = (AMD64Kind)this.result.getPlatformKind();
            assert (AVXKind.getRegisterSize(kind) == AVXKind.AVXSize.ZMM) : "Can only extract 256 bits from ZMM register";
            switch (kind.getScalar()) {
                case DOUBLE: {
                    op = AMD64Assembler.VexRVMIOp.EVINSERTF64X4;
                    break;
                }
                case DWORD: {
                    op = masm.supports(AMD64.CPUFeature.AVX512DQ) ? AMD64Assembler.VexRVMIOp.EVINSERTI32X8 : AMD64Assembler.VexRVMIOp.EVINSERTI64X4;
                    break;
                }
                case QWORD: {
                    op = AMD64Assembler.VexRVMIOp.EVINSERTI64X4;
                    break;
                }
                default: {
                    AMD64Assembler.VexRVMIOp vexRVMIOp = op = masm.supports(AMD64.CPUFeature.AVX512DQ) ? AMD64Assembler.VexRVMIOp.EVINSERTF32X8 : AMD64Assembler.VexRVMIOp.EVINSERTF64X4;
                }
            }
            if (ValueUtil.isRegister((Value)this.source2)) {
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), ValueUtil.asRegister((Value)this.source2), this.selector);
            } else {
                assert (ValueUtil.isStackSlot((Value)this.source2));
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), (AMD64Address)crb.asAddress((Value)this.source2), this.selector);
            }
        }
    }

    public static final class Insert128Op
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<Insert128Op> TYPE = LIRInstructionClass.create(Insert128Op.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source1;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue source2;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public Insert128Op(AllocatableValue result, AllocatableValue source1, AllocatableValue source2, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source1 = source1;
            this.source2 = source2;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexRVMIOp op;
            AMD64Kind kind = (AMD64Kind)this.result.getPlatformKind();
            AVXKind.AVXSize size = AVXKind.getRegisterSize(kind);
            if (this.encoding == AMD64Assembler.AMD64SIMDInstructionEncoding.EVEX) {
                GraalError.guarantee(size.getBytes() >= AVXKind.AVXSize.YMM.getBytes(), "Unexpected vector size %s for extract-128-bits op", (Object)size);
                op = switch (kind.getScalar()) {
                    case AMD64Kind.DOUBLE -> {
                        if (masm.supports(AMD64.CPUFeature.AVX512DQ)) {
                            yield AMD64Assembler.VexRVMIOp.EVINSERTF64X2;
                        }
                        yield AMD64Assembler.VexRVMIOp.EVINSERTF32X4;
                    }
                    case AMD64Kind.DWORD -> AMD64Assembler.VexRVMIOp.EVINSERTI32X4;
                    case AMD64Kind.QWORD -> {
                        if (masm.supports(AMD64.CPUFeature.AVX512DQ)) {
                            yield AMD64Assembler.VexRVMIOp.EVINSERTI64X2;
                        }
                        yield AMD64Assembler.VexRVMIOp.EVINSERTI32X4;
                    }
                    default -> AMD64Assembler.VexRVMIOp.EVINSERTF32X4;
                };
            } else {
                GraalError.guarantee(size.getBytes() == AVXKind.AVXSize.YMM.getBytes(), "Unexpected vector size %s for extract-128-bits op", (Object)size);
                switch (kind.getScalar()) {
                    case SINGLE: 
                    case DOUBLE: {
                        AMD64Assembler.VexRVMIOp vexRVMIOp = AMD64Assembler.VexRVMIOp.VINSERTF128;
                        break;
                    }
                    default: {
                        AMD64Assembler.VexRVMIOp vexRVMIOp = op = masm.supports(AMD64.CPUFeature.AVX2) ? AMD64Assembler.VexRVMIOp.VINSERTI128 : AMD64Assembler.VexRVMIOp.VINSERTF128;
                    }
                }
            }
            if (ValueUtil.isRegister((Value)this.source2)) {
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), ValueUtil.asRegister((Value)this.source2), this.selector);
            } else {
                assert (ValueUtil.isStackSlot((Value)this.source2));
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), (AMD64Address)crb.asAddress((Value)this.source2), this.selector);
            }
        }
    }

    public static class Extract256Op
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<Extract256Op> TYPE = LIRInstructionClass.create(Extract256Op.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source;
        private final int selector;

        public Extract256Op(AllocatableValue result, AllocatableValue source, int selector) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source = source;
            this.selector = selector;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexMRIOp op;
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            assert (AVXKind.getRegisterSize(kind) == AVXKind.AVXSize.ZMM) : "Can only extract 256 bits from ZMM register";
            switch (kind.getScalar()) {
                case DOUBLE: {
                    op = AMD64Assembler.VexMRIOp.EVEXTRACTF64X4;
                    break;
                }
                case DWORD: {
                    op = masm.supports(AMD64.CPUFeature.AVX512DQ) ? AMD64Assembler.VexMRIOp.EVEXTRACTI32X8 : AMD64Assembler.VexMRIOp.EVEXTRACTI64X4;
                    break;
                }
                case QWORD: {
                    op = AMD64Assembler.VexMRIOp.EVEXTRACTI64X4;
                    break;
                }
                default: {
                    AMD64Assembler.VexMRIOp vexMRIOp = op = masm.supports(AMD64.CPUFeature.AVX512DQ) ? AMD64Assembler.VexMRIOp.EVEXTRACTF32X8 : AMD64Assembler.VexMRIOp.EVEXTRACTF64X4;
                }
            }
            if (ValueUtil.isRegister((Value)this.result)) {
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector);
            } else {
                assert (ValueUtil.isStackSlot((Value)this.result));
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector);
            }
        }
    }

    public static final class Extract128Op
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<Extract128Op> TYPE = LIRInstructionClass.create(Extract128Op.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public Extract128Op(AllocatableValue result, AllocatableValue source, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source = source;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Assembler.VexMRIOp op;
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            AVXKind.AVXSize size = AVXKind.getRegisterSize(kind);
            switch (kind.getScalar()) {
                case SINGLE: 
                case DOUBLE: {
                    AMD64Assembler.VexMRIOp vexMRIOp = AMD64Assembler.VexMRIOp.VEXTRACTF128;
                    break;
                }
                default: {
                    AMD64Assembler.VexMRIOp vexMRIOp = op = masm.supports(AMD64.CPUFeature.AVX2) ? AMD64Assembler.VexMRIOp.VEXTRACTI128 : AMD64Assembler.VexMRIOp.VEXTRACTF128;
                }
            }
            if (ValueUtil.isRegister((Value)this.result)) {
                op.encoding(this.encoding).emit((AMD64Assembler)masm, size, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector);
            } else {
                assert (ValueUtil.isStackSlot((Value)this.result));
                op.encoding(this.encoding).emit((AMD64Assembler)masm, size, (AMD64Address)crb.asAddress((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector);
            }
        }
    }

    public static class ShuffleFloatOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ShuffleFloatOp> TYPE = LIRInstructionClass.create(ShuffleFloatOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source1;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue source2;
        private final int selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ShuffleFloatOp(AllocatableValue result, AllocatableValue source1, AllocatableValue source2, int selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source1 = source1;
            this.source2 = source2;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.result.getPlatformKind();
            AMD64Assembler.VexRVMIOp op = switch (kind.getScalar()) {
                case AMD64Kind.SINGLE -> AMD64Assembler.VexRVMIOp.VSHUFPS.encoding(this.encoding);
                case AMD64Kind.DOUBLE -> AMD64Assembler.VexRVMIOp.VSHUFPD.encoding(this.encoding);
                default -> throw GraalError.shouldNotReachHereUnexpectedValue(kind.getScalar());
            };
            if (ValueUtil.isRegister((Value)this.source2)) {
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), ValueUtil.asRegister((Value)this.source2), this.selector);
            } else {
                assert (ValueUtil.isStackSlot((Value)this.source2));
                op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source1), (AMD64Address)crb.asAddress((Value)this.source2), this.selector);
            }
        }
    }

    public static class ShuffleWordOpWithMask
    extends ShuffleWordOp
    implements AVX512Support {
        public static final LIRInstructionClass<ShuffleWordOpWithMask> TYPE = LIRInstructionClass.create(ShuffleWordOpWithMask.class);
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue mask;

        public ShuffleWordOpWithMask(AMD64Assembler.VexRMIOp op, AllocatableValue result, AllocatableValue source, int selector, AllocatableValue mask) {
            super(TYPE, op, result, source, selector);
            this.mask = mask;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            if (ValueUtil.isRegister((Value)this.source)) {
                this.op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector, ValueUtil.asRegister((Value)this.mask), 1, 0);
            } else {
                this.op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), (AMD64Address)crb.asAddress((Value)this.source), this.selector, ValueUtil.asRegister((Value)this.mask), 1, 0);
            }
        }

        @Override
        public AllocatableValue getOpmask() {
            return this.mask;
        }
    }

    public static class ShuffleWordOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ShuffleWordOp> TYPE = LIRInstructionClass.create(ShuffleWordOp.class);
        protected final AMD64Assembler.VexRMIOp op;
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue source;
        protected final int selector;

        public ShuffleWordOp(AMD64Assembler.VexRMIOp op, AllocatableValue result, AllocatableValue source, int selector) {
            this(TYPE, op, result, source, selector);
        }

        protected ShuffleWordOp(LIRInstructionClass<? extends AMD64LIRInstruction> c, AMD64Assembler.VexRMIOp op, AllocatableValue result, AllocatableValue source, int selector) {
            super(c);
            this.op = op;
            this.result = result;
            this.source = source;
            this.selector = selector;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            if (ValueUtil.isRegister((Value)this.source)) {
                this.op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), this.selector);
            } else {
                this.op.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), (AMD64Address)crb.asAddress((Value)this.source), this.selector);
            }
        }
    }

    public static final class ConstShuffleBytesOpWithMask
    extends ConstShuffleBytesOp
    implements AVX512Support {
        public static final LIRInstructionClass<ConstShuffleBytesOpWithMask> TYPE = LIRInstructionClass.create(ConstShuffleBytesOpWithMask.class);
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue mask;

        public ConstShuffleBytesOpWithMask(AllocatableValue result, AllocatableValue source, AllocatableValue mask, byte ... selector) {
            super(TYPE, result, source, AMD64Assembler.AMD64SIMDInstructionEncoding.EVEX, selector);
            this.mask = mask;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            int alignment = crb.dataBuilder.ensureValidDataAlignment(this.selector.length);
            AMD64Address address = (AMD64Address)crb.recordDataReferenceInCode(this.selector, alignment);
            AMD64Assembler.VexRVMOp.EVPSHUFB.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), address, ValueUtil.asRegister((Value)this.mask));
        }

        @Override
        public AllocatableValue getOpmask() {
            return this.mask;
        }
    }

    public static class ConstShuffleBytesOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ConstShuffleBytesOp> TYPE = LIRInstructionClass.create(ConstShuffleBytesOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source;
        protected final byte[] selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ConstShuffleBytesOp(AllocatableValue result, AllocatableValue source, AMD64Assembler.AMD64SIMDInstructionEncoding encoding, byte ... selector) {
            this(TYPE, result, source, encoding, selector);
        }

        public ConstShuffleBytesOp(LIRInstructionClass<? extends AMD64LIRInstruction> c, AllocatableValue result, AllocatableValue source, AMD64Assembler.AMD64SIMDInstructionEncoding encoding, byte ... selector) {
            super(c);
            assert (AVXKind.getRegisterSize((AMD64Kind)source.getPlatformKind()).getBytes() == selector.length) : " Register size=" + AVXKind.getRegisterSize((AMD64Kind)source.getPlatformKind()).getBytes() + " select length=" + selector.length;
            this.result = result;
            this.source = source;
            this.encoding = encoding;
            this.selector = selector;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            int alignment = crb.dataBuilder.ensureValidDataAlignment(this.selector.length);
            AMD64Address address = (AMD64Address)crb.recordDataReferenceInCode(this.selector, alignment);
            AMD64Assembler.VexRVMOp.VPSHUFB.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), address);
        }
    }

    public static final class ConstPermuteBytesUsingTableOp
    extends AMD64LIRInstruction
    implements AVX512Support {
        public static final LIRInstructionClass<ConstPermuteBytesUsingTableOp> TYPE = LIRInstructionClass.create(ConstPermuteBytesUsingTableOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue source;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue mask;
        @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue selector;
        byte[] selectorData;

        public ConstPermuteBytesUsingTableOp(LIRGeneratorTool tool, AllocatableValue result, AllocatableValue source, byte[] selectorData, AllocatableValue mask) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source = source;
            this.selectorData = selectorData;
            this.selector = tool.newVariable(LIRKind.value(source.getPlatformKind()));
            this.mask = mask;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.source.getPlatformKind();
            int alignment = crb.dataBuilder.ensureValidDataAlignment(this.selectorData.length);
            AMD64Address address = (AMD64Address)crb.recordDataReferenceInCode(this.selectorData, alignment);
            AMD64Assembler.VexMoveOp.EVMOVDQU64.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.selector), address);
            AMD64Assembler.VexRVMOp.EVPXOR.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.result));
            if (ValueUtil.isRegister((Value)this.source)) {
                AMD64Assembler.VexRVMOp.EVPERMT2B.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.selector), ValueUtil.asRegister((Value)this.source), this.mask != null ? ValueUtil.asRegister((Value)this.mask) : Register.None, this.mask != null ? 1 : 0, 0);
            } else {
                AMD64Assembler.VexRVMOp.EVPERMT2B.emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.selector), (AMD64Address)crb.asAddress((Value)this.source), this.mask != null ? ValueUtil.asRegister((Value)this.mask) : Register.None, this.mask != null ? 1 : 0, 0);
            }
        }

        @Override
        public AllocatableValue getOpmask() {
            return this.mask;
        }
    }

    public static final class ShuffleBytesOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<ShuffleBytesOp> TYPE = LIRInstructionClass.create(ShuffleBytesOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue source;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue selector;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public ShuffleBytesOp(AllocatableValue result, AllocatableValue source, AllocatableValue selector, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            this.result = result;
            this.source = source;
            this.selector = selector;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            AMD64Kind kind = (AMD64Kind)this.result.getPlatformKind();
            if (ValueUtil.isRegister((Value)this.selector)) {
                AMD64Assembler.VexRVMOp.VPSHUFB.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), ValueUtil.asRegister((Value)this.selector));
            } else {
                assert (ValueUtil.isStackSlot((Value)this.selector));
                AMD64Assembler.VexRVMOp.VPSHUFB.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.getRegisterSize(kind), ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.source), (AMD64Address)crb.asAddress((Value)this.selector));
            }
        }
    }

    public static final class LongToVectorOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<LongToVectorOp> TYPE = LIRInstructionClass.create(LongToVectorOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue value;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public LongToVectorOp(AllocatableValue result, AllocatableValue value, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (result.getPlatformKind() == AMD64Kind.V128_QWORD || result.getPlatformKind() == AMD64Kind.V256_QWORD || result.getPlatformKind() == AMD64Kind.V512_QWORD) : result;
            this.result = result;
            this.value = value;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            if (ValueUtil.isRegister((Value)this.value)) {
                AMD64Assembler.VexMoveOp.VMOVQ.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.value));
            } else {
                assert (ValueUtil.isStackSlot((Value)this.value));
                AMD64Assembler.VexMoveOp.VMOVQ.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), (AMD64Address)crb.asAddress((Value)this.value));
            }
        }
    }

    public static final class IntToVectorOp
    extends AMD64LIRInstruction {
        public static final LIRInstructionClass<IntToVectorOp> TYPE = LIRInstructionClass.create(IntToVectorOp.class);
        @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
        protected AllocatableValue result;
        @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.STACK})
        protected AllocatableValue value;
        private final AMD64Assembler.AMD64SIMDInstructionEncoding encoding;

        public IntToVectorOp(AllocatableValue result, AllocatableValue value, AMD64Assembler.AMD64SIMDInstructionEncoding encoding) {
            super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
            assert (((AMD64Kind)result.getPlatformKind()).getScalar().isInteger()) : result.getPlatformKind();
            this.result = result;
            this.value = value;
            this.encoding = encoding;
        }

        @Override
        public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
            if (ValueUtil.isRegister((Value)this.value)) {
                AMD64Assembler.VexMoveOp.VMOVD.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), ValueUtil.asRegister((Value)this.value));
            } else {
                assert (ValueUtil.isStackSlot((Value)this.value));
                AMD64Assembler.VexMoveOp.VMOVD.encoding(this.encoding).emit((AMD64Assembler)masm, AVXKind.AVXSize.XMM, ValueUtil.asRegister((Value)this.result), (AMD64Address)crb.asAddress((Value)this.value));
            }
        }
    }
}

