/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.Assertions;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.amd64.AMD64ComplexVectorOp;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;

@Opcode(value="AMD64_STRING_INFLATE")
public final class AMD64StringLatin1InflateOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AMD64StringLatin1InflateOp.class);
    private final int useAVX3Threshold;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rsrc;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rdst;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rlen;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rsrcTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rdstTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rlenTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rtmp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value maskRegister;

    public AMD64StringLatin1InflateOp(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, int useAVX3Threshold, Value src, Value dst, Value len) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AMD64StringLatin1InflateOp.supportsAVX512VLBW(tool.target(), runtimeCheckedCPUFeatures) && AMD64StringLatin1InflateOp.supports(tool.target(), runtimeCheckedCPUFeatures, AMD64.CPUFeature.BMI2, new AMD64.CPUFeature[0]) ? AVXKind.AVXSize.ZMM : AVXKind.AVXSize.YMM);
        assert (useAVX3Threshold == 0 || CodeUtil.isPowerOf2((int)useAVX3Threshold)) : "AVX3Threshold must be 0 or a power of 2: " + useAVX3Threshold;
        this.useAVX3Threshold = useAVX3Threshold;
        assert (ValueUtil.asRegister((Value)src).equals((Object)AMD64.rsi));
        assert (ValueUtil.asRegister((Value)dst).equals((Object)AMD64.rdi));
        assert (ValueUtil.asRegister((Value)len).equals((Object)AMD64.rdx));
        this.rsrcTemp = this.rsrc = src;
        this.rdstTemp = this.rdst = dst;
        this.rlenTemp = this.rlen = len;
        this.vtmp1 = tool.newVariable(LIRKind.value((PlatformKind)this.getVectorKind(JavaKind.Byte)));
        this.rtmp2 = tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.DWORD));
        this.maskRegister = this.canUseAVX512Variant() ? AMD64.k2.asValue() : Value.ILLEGAL;
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp1 = ValueUtil.asRegister((Value)this.vtmp1);
        Register tmp2 = ValueUtil.asRegister((Value)this.rtmp2);
        this.byteArrayInflate(masm, src, dst, len, tmp1, tmp2);
    }

    private boolean canUseAVX512Variant() {
        return this.supportsAVX512VLBWAndZMM() && this.supportsBMI2();
    }

    private void byteArrayInflate(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1, Register tmp2) {
        assert (tmp1.getRegisterCategory().equals((Object)AMD64.XMM));
        Label labelCopyCharsLoop = new Label();
        Label labelDone = new Label();
        Label labelBelowThreshold = new Label();
        Label labelAVX3Threshold = new Label();
        assert (src.number != dst.number && src.number != len.number && src.number != tmp2.number) : Assertions.errorMessageContext("src", src, "dst", dst, "len", len, "tmp1", tmp1, "tmp2", tmp2);
        assert (dst.number != len.number && dst.number != tmp2.number) : Assertions.errorMessageContext("src", src, "dst", dst, "len", len, "tmp1", tmp1, "tmp2", tmp2);
        assert (len.number != tmp2.number) : Assertions.errorMessageContext("src", src, "dst", dst, "len", len, "tmp1", tmp1, "tmp2", tmp2);
        masm.movl(tmp2, len);
        if (this.canUseAVX512Variant()) {
            Label labelCopy32Loop = new Label();
            Label labelCopyTail = new Label();
            Register tmp3Aliased = len;
            masm.testlAndJcc(len, -16, AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold, false);
            masm.testlAndJcc(len, -1 * this.useAVX3Threshold, AMD64Assembler.ConditionFlag.Zero, labelAVX3Threshold, false);
            masm.andl(tmp2, 31);
            masm.andlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, true);
            masm.leaq(src, new AMD64Address(src, len, Stride.S1));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S2));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.evpmovzxbw(tmp1, new AMD64Address(src, len, Stride.S1));
            masm.evmovdqu16(new AMD64Address(dst, len, Stride.S2), tmp1);
            masm.addqAndJcc(len, 32, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, false);
            masm.bind(labelCopyTail);
            masm.testlAndJcc(tmp2, tmp2, AMD64Assembler.ConditionFlag.Zero, labelDone, false);
            masm.movl(tmp3Aliased, -1);
            masm.shlxl(tmp3Aliased, tmp3Aliased, tmp2);
            masm.notl(tmp3Aliased);
            masm.kmovd(AMD64.k2, tmp3Aliased);
            masm.evpmovzxbw(tmp1, AMD64.k2, new AMD64Address(src));
            masm.evmovdqu16(new AMD64Address(dst), AMD64.k2, tmp1);
            masm.jmp(labelDone);
            masm.bind(labelAVX3Threshold);
        }
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
            Label labelCopy16Loop = new Label();
            Label labelCopy8Loop = new Label();
            Label labelCopyBytes = new Label();
            Label labelCopyNewTail = new Label();
            Label labelCopyTail = new Label();
            if (masm.supports(AMD64.CPUFeature.AVX2)) {
                masm.andl(tmp2, 15);
                masm.andlAndJcc(len, -16, AMD64Assembler.ConditionFlag.Zero, labelCopyNewTail, true);
            } else {
                masm.andl(tmp2, 7);
                masm.andlAndJcc(len, -8, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, true);
            }
            masm.leaq(src, new AMD64Address(src, len, Stride.S1));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S2));
            masm.negq(len);
            if (masm.supports(AMD64.CPUFeature.AVX2)) {
                masm.bind(labelCopy16Loop);
                masm.vpmovzxbw(tmp1, new AMD64Address(src, len, Stride.S1));
                masm.vmovdqu(new AMD64Address(dst, len, Stride.S2), tmp1);
                masm.addqAndJcc(len, 16, AMD64Assembler.ConditionFlag.NotZero, labelCopy16Loop, false);
                masm.bind(labelBelowThreshold);
                masm.bind(labelCopyNewTail);
                masm.movl(len, tmp2);
                masm.andl(tmp2, 7);
                masm.andlAndJcc(len, -8, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, true);
                masm.pmovzxbw(tmp1, new AMD64Address(src));
                masm.movdqu(new AMD64Address(dst), tmp1);
                masm.addq(src, 8);
                masm.addq(dst, 16);
                masm.jmp(labelCopyTail);
            }
            masm.bind(labelCopy8Loop);
            masm.pmovzxbw(tmp1, new AMD64Address(src, len, Stride.S1));
            masm.movdqu(new AMD64Address(dst, len, Stride.S2), tmp1);
            masm.addqAndJcc(len, 8, AMD64Assembler.ConditionFlag.NotZero, labelCopy8Loop, false);
            masm.bind(labelCopyTail);
            masm.movl(len, tmp2);
            masm.cmplAndJcc(len, 4, AMD64Assembler.ConditionFlag.Less, labelCopyBytes, true);
            masm.movdl(tmp1, new AMD64Address(src));
            masm.pmovzxbw(tmp1, tmp1);
            masm.movq(new AMD64Address(dst), tmp1);
            masm.subq(len, 4);
            masm.addq(src, 4);
            masm.addq(dst, 8);
            masm.bind(labelCopyBytes);
        } else {
            masm.bind(labelBelowThreshold);
        }
        masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelDone, true);
        masm.leaq(src, new AMD64Address(src, len, Stride.S1));
        masm.leaq(dst, new AMD64Address(dst, len, Stride.S2));
        masm.negq(len);
        masm.bind(labelCopyCharsLoop);
        masm.movzbl(tmp2, new AMD64Address(src, len, Stride.S1));
        masm.movw(new AMD64Address(dst, len, Stride.S2), tmp2);
        masm.incqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop, false);
        masm.bind(labelDone);
    }
}

