/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import java.util.EnumSet;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.debug.Assertions;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.amd64.AMD64ComplexVectorOp;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.graal.compiler.serviceprovider.JavaVersionUtil;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.amd64.AMD64Kind;
import jdk.vm.ci.code.CodeUtil;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.JavaKind;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;

@Opcode(value="AMD64_STRING_COMPRESS")
@SyncPort(from="https://github.com/openjdk/jdk/blob/959fa4a1a35a1bb650ec5888efaf3d0fc8cfb025/src/hotspot/cpu/x86/macroAssembler_x86.cpp#L9289-L9497", sha1="3e365037f473204b3f742ab364bd9ad514e72161")
public final class AMD64StringUTF16CompressOp
extends AMD64ComplexVectorOp {
    public static final LIRInstructionClass<AMD64StringUTF16CompressOp> TYPE = LIRInstructionClass.create(AMD64StringUTF16CompressOp.class);
    private final int useAVX3Threshold;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    private Value rres;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rsrc;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rdst;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value rlen;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rsrcTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rdstTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rlenTemp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value vtmp4;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value rtmp5;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] maskRegisters;

    public AMD64StringUTF16CompressOp(LIRGeneratorTool tool, EnumSet<AMD64.CPUFeature> runtimeCheckedCPUFeatures, int useAVX3Threshold, Value res, Value src, Value dst, Value len) {
        super(TYPE, tool, runtimeCheckedCPUFeatures, AMD64StringUTF16CompressOp.supportsAVX512VLBW(tool.target(), runtimeCheckedCPUFeatures) && AMD64StringUTF16CompressOp.supports(tool.target(), runtimeCheckedCPUFeatures, AMD64.CPUFeature.BMI2, new AMD64.CPUFeature[0]) ? AVXKind.AVXSize.ZMM : AVXKind.AVXSize.XMM);
        assert (useAVX3Threshold == 0 || CodeUtil.isPowerOf2((int)useAVX3Threshold)) : "AVX3Threshold must be 0 or a power of 2 :" + useAVX3Threshold;
        this.useAVX3Threshold = useAVX3Threshold;
        assert (ValueUtil.asRegister((Value)src).equals((Object)AMD64.rsi));
        assert (ValueUtil.asRegister((Value)dst).equals((Object)AMD64.rdi));
        assert (ValueUtil.asRegister((Value)len).equals((Object)AMD64.rdx));
        assert (ValueUtil.asRegister((Value)res).equals((Object)AMD64.rax));
        this.rres = res;
        this.rsrcTemp = this.rsrc = src;
        this.rdstTemp = this.rdst = dst;
        this.rlenTemp = this.rlen = len;
        LIRKind vkind = LIRKind.value((PlatformKind)this.getVectorKind(JavaKind.Byte));
        this.vtmp1 = tool.newVariable(vkind);
        this.vtmp2 = tool.newVariable(vkind);
        this.vtmp3 = tool.newVariable(vkind);
        this.vtmp4 = tool.newVariable(vkind);
        this.rtmp5 = tool.newVariable(LIRKind.value((PlatformKind)AMD64Kind.DWORD));
        this.maskRegisters = this.canUseAVX512Variant() ? new Value[]{AMD64.k2.asValue(), AMD64.k3.asValue()} : new Value[0];
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Register res = ValueUtil.asRegister((Value)this.rres);
        Register src = ValueUtil.asRegister((Value)this.rsrc);
        Register dst = ValueUtil.asRegister((Value)this.rdst);
        Register len = ValueUtil.asRegister((Value)this.rlen);
        Register tmp1 = ValueUtil.asRegister((Value)this.vtmp1);
        Register tmp2 = ValueUtil.asRegister((Value)this.vtmp2);
        Register tmp3 = ValueUtil.asRegister((Value)this.vtmp3);
        Register tmp4 = ValueUtil.asRegister((Value)this.vtmp4);
        Register tmp5 = ValueUtil.asRegister((Value)this.rtmp5);
        if (JavaVersionUtil.JAVA_SPEC >= 22) {
            this.charArrayCompress(masm, src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, res);
        } else {
            this.charArrayCompressLegacy(masm, src, dst, len, tmp1, tmp2, tmp3, tmp4, tmp5, res);
        }
    }

    private boolean canUseAVX512Variant() {
        return this.useAVX3Threshold == 0 && this.supportsAVX512VLBWAndZMM() && this.supportsBMI2();
    }

    private void charArrayCompress(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1Reg, Register tmp2Reg, Register tmp3Reg, Register tmp4Reg, Register tmp5, Register result) {
        Label labelCopy32Loop;
        assert (tmp1Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp2Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp3Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp4Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        Label labelCopyCharsLoop = new Label();
        Label labelResetSp = new Label();
        Label labelDone = new Label();
        Label labelCopyTail = new Label();
        assert (len.number != result.number) : Assertions.errorMessageContext("len", len, "result", result);
        masm.movl(result, len);
        if (this.canUseAVX512Variant()) {
            labelCopy32Loop = new Label();
            Label labelCopyLoopTail = new Label();
            Label labelBelowThreshold = new Label();
            Label labelPostAlignment = new Label();
            Label labelResetForCopyTail = new Label();
            masm.testlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold, false);
            masm.movl(tmp5, 255);
            masm.evpbroadcastw(tmp2Reg, tmp5);
            masm.testlAndJcc(len, -64, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, true);
            masm.movl(tmp5, dst);
            masm.andl(tmp5, 31);
            masm.negl(tmp5);
            masm.andl(tmp5, 31);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, true);
            masm.movl(len, -1);
            masm.shlxl(len, len, tmp5);
            masm.notl(len);
            masm.kmovd(AMD64.k3, len);
            masm.movl(len, result);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelCopyTail);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.addq(src, tmp5);
            masm.addq(src, tmp5);
            masm.addq(dst, tmp5);
            masm.subl(len, tmp5);
            masm.bind(labelPostAlignment);
            masm.movl(tmp5, len);
            masm.andl(tmp5, 31);
            masm.andlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelCopyLoopTail, true);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.evmovdqu16(tmp1Reg, new AMD64Address(src, len, Stride.S2));
            masm.evpcmpuw(AMD64.k2, tmp1Reg, tmp2Reg, 2);
            masm.kortestd(AMD64.k2, AMD64.k2);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelResetForCopyTail);
            masm.evpmovwb(new AMD64Address(dst, len, Stride.S1), tmp1Reg);
            masm.addqAndJcc(len, 32, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, true);
            masm.bind(labelCopyLoopTail);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelDone, false);
            masm.movl(len, tmp5);
            masm.movl(tmp5, -1);
            masm.shlxl(tmp5, tmp5, len);
            masm.notl(tmp5);
            masm.kmovd(AMD64.k3, tmp5);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelCopyTail);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.jmp(labelDone);
            masm.bind(labelResetForCopyTail);
            masm.leaq(src, new AMD64Address(src, tmp5, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, tmp5, Stride.S1));
            masm.subq(len, tmp5);
            masm.jmp(labelCopyCharsLoop);
            masm.bind(labelBelowThreshold);
        }
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
            labelCopy32Loop = new Label();
            Label labelCopy16 = new Label();
            Label labelCopyTailSSE = new Label();
            Label labelResetForCopyTail = new Label();
            masm.testlAndJcc(len, -8, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, false);
            masm.movl(tmp5, -16711936);
            masm.movdl(tmp1Reg, tmp5);
            masm.pshufd(tmp1Reg, tmp1Reg, 0);
            masm.andlAndJcc(len, -16, AMD64Assembler.ConditionFlag.Zero, labelCopy16, true);
            masm.pxor(tmp4Reg, tmp4Reg);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.movdqu(tmp2Reg, new AMD64Address(src, len, Stride.S2));
            masm.por(tmp4Reg, tmp2Reg);
            masm.movdqu(tmp3Reg, new AMD64Address(src, len, Stride.S2, 16));
            masm.por(tmp4Reg, tmp3Reg);
            masm.ptest(tmp4Reg, tmp1Reg);
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelResetForCopyTail);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movdqu(new AMD64Address(dst, len, Stride.S1), tmp2Reg);
            masm.addqAndJcc(len, 16, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, true);
            masm.bind(labelCopy16);
            masm.testlAndJcc(result, 8, AMD64Assembler.ConditionFlag.Zero, labelCopyTailSSE, true);
            masm.pxor(tmp3Reg, tmp3Reg);
            masm.movdqu(tmp2Reg, new AMD64Address(src));
            masm.ptest(tmp2Reg, tmp1Reg);
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelResetForCopyTail);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movq(new AMD64Address(dst), tmp2Reg);
            masm.addq(src, 16);
            masm.addq(dst, 8);
            masm.jmpb(labelCopyTailSSE);
            masm.bind(labelResetForCopyTail);
            masm.movl(tmp5, result);
            masm.andl(tmp5, 15);
            masm.leaq(src, new AMD64Address(src, tmp5, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, tmp5, Stride.S1));
            masm.subq(len, tmp5);
            masm.jmpb(labelCopyCharsLoop);
            masm.bind(labelCopyTailSSE);
            masm.movl(len, result);
            masm.andl(len, 7);
        }
        masm.bind(labelCopyTail);
        masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelDone, true);
        masm.leaq(src, new AMD64Address(src, len, Stride.S2));
        masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
        masm.negq(len);
        masm.bind(labelCopyCharsLoop);
        masm.movzwl(tmp5, new AMD64Address(src, len, Stride.S2));
        masm.testlAndJcc(tmp5, 65280, AMD64Assembler.ConditionFlag.NotZero, labelResetSp, true);
        masm.movb(new AMD64Address(dst, len, Stride.S1), tmp5);
        masm.incqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop, true);
        masm.bind(labelResetSp);
        masm.addl(result, len);
        masm.bind(labelDone);
    }

    private void charArrayCompressLegacy(AMD64MacroAssembler masm, Register src, Register dst, Register len, Register tmp1Reg, Register tmp2Reg, Register tmp3Reg, Register tmp4Reg, Register tmp5, Register result) {
        Label labelCopy32Loop;
        assert (tmp1Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp2Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp3Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        assert (tmp4Reg.getRegisterCategory().equals((Object)AMD64.XMM));
        Label labelCopyCharsLoop = new Label();
        Label labelReturnLength = new Label();
        Label labelReturnZero = new Label();
        Label labelDone = new Label();
        assert (len.number != result.number) : Assertions.errorMessageContext("len", len, "result", result);
        masm.push(len);
        if (this.canUseAVX512Variant()) {
            labelCopy32Loop = new Label();
            Label labelCopyLoopTail = new Label();
            Label labelBelowThreshold = new Label();
            Label labelPostAlignment = new Label();
            masm.testlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelBelowThreshold, false);
            masm.movl(result, 255);
            masm.evpbroadcastw(tmp2Reg, result);
            masm.testlAndJcc(len, -64, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, false);
            masm.movl(tmp5, dst);
            masm.andl(tmp5, 31);
            masm.negl(tmp5);
            masm.andl(tmp5, 31);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelPostAlignment, false);
            masm.movl(result, -1);
            masm.shlxl(result, result, tmp5);
            masm.notl(result);
            masm.kmovd(AMD64.k3, result);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.addq(src, tmp5);
            masm.addq(src, tmp5);
            masm.addq(dst, tmp5);
            masm.subl(len, tmp5);
            masm.bind(labelPostAlignment);
            masm.movl(tmp5, len);
            masm.andl(tmp5, 31);
            masm.andlAndJcc(len, -32, AMD64Assembler.ConditionFlag.Zero, labelCopyLoopTail, false);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.evmovdqu16(tmp1Reg, new AMD64Address(src, len, Stride.S2));
            masm.evpcmpuw(AMD64.k2, tmp1Reg, tmp2Reg, 2);
            masm.kortestd(AMD64.k2, AMD64.k2);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst, len, Stride.S1), tmp1Reg);
            masm.addqAndJcc(len, 32, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, false);
            masm.bind(labelCopyLoopTail);
            masm.testlAndJcc(tmp5, tmp5, AMD64Assembler.ConditionFlag.Zero, labelReturnLength, false);
            masm.movl(len, tmp5);
            masm.movl(result, -1);
            masm.shlxl(result, result, len);
            masm.notl(result);
            masm.kmovd(AMD64.k3, result);
            masm.evmovdqu16(tmp1Reg, AMD64.k3, new AMD64Address(src));
            masm.evpcmpuw(AMD64.k2, AMD64.k3, tmp1Reg, tmp2Reg, 2);
            masm.ktestd(AMD64.k2, AMD64.k3);
            masm.jcc(AMD64Assembler.ConditionFlag.CarryClear, labelReturnZero);
            masm.evpmovwb(new AMD64Address(dst), AMD64.k3, tmp1Reg);
            masm.jmp(labelReturnLength);
            masm.bind(labelBelowThreshold);
        }
        if (masm.supports(AMD64.CPUFeature.SSE4_2)) {
            labelCopy32Loop = new Label();
            Label labelCopy16 = new Label();
            Label labelCopyTail = new Label();
            masm.movl(result, len);
            masm.movl(tmp5, -16711936);
            masm.andl(len, -16);
            masm.andl(result, 15);
            masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelCopy16, false);
            masm.movdl(tmp1Reg, tmp5);
            masm.pshufd(tmp1Reg, tmp1Reg, 0);
            masm.pxor(tmp4Reg, tmp4Reg);
            masm.leaq(src, new AMD64Address(src, len, Stride.S2));
            masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
            masm.negq(len);
            masm.bind(labelCopy32Loop);
            masm.movdqu(tmp2Reg, new AMD64Address(src, len, Stride.S2));
            masm.por(tmp4Reg, tmp2Reg);
            masm.movdqu(tmp3Reg, new AMD64Address(src, len, Stride.S2, 16));
            masm.por(tmp4Reg, tmp3Reg);
            masm.ptest(tmp4Reg, tmp1Reg);
            masm.jcc(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movdqu(new AMD64Address(dst, len, Stride.S1), tmp2Reg);
            masm.addqAndJcc(len, 16, AMD64Assembler.ConditionFlag.NotZero, labelCopy32Loop, false);
            masm.bind(labelCopy16);
            masm.movl(len, result);
            masm.andl(len, -8);
            masm.andl(result, 7);
            masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelCopyTail, true);
            masm.movdl(tmp1Reg, tmp5);
            masm.pshufd(tmp1Reg, tmp1Reg, 0);
            masm.pxor(tmp3Reg, tmp3Reg);
            masm.movdqu(tmp2Reg, new AMD64Address(src));
            masm.ptest(tmp2Reg, tmp1Reg);
            masm.jccb(AMD64Assembler.ConditionFlag.NotZero, labelReturnZero);
            masm.packuswb(tmp2Reg, tmp3Reg);
            masm.movq(new AMD64Address(dst), tmp2Reg);
            masm.addq(src, 16);
            masm.addq(dst, 8);
            masm.bind(labelCopyTail);
            masm.movl(len, result);
        }
        masm.testlAndJcc(len, len, AMD64Assembler.ConditionFlag.Zero, labelReturnLength, true);
        masm.leaq(src, new AMD64Address(src, len, Stride.S2));
        masm.leaq(dst, new AMD64Address(dst, len, Stride.S1));
        masm.negq(len);
        masm.bind(labelCopyCharsLoop);
        masm.movzwl(result, new AMD64Address(src, len, Stride.S2));
        masm.testlAndJcc(result, 65280, AMD64Assembler.ConditionFlag.NotZero, labelReturnZero, true);
        masm.movb(new AMD64Address(dst, len, Stride.S1), result);
        masm.incqAndJcc(len, AMD64Assembler.ConditionFlag.NotZero, labelCopyCharsLoop, false);
        masm.bind(labelReturnLength);
        masm.pop(result);
        masm.jmpb(labelDone);
        masm.bind(labelReturnZero);
        masm.xorl(result, result);
        masm.addq(AMD64.rsp, 8);
        masm.bind(labelDone);
    }

    @Override
    public boolean modifiesStackPointer() {
        return JavaVersionUtil.JAVA_SPEC < 22;
    }
}

