/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.common.LIRKind;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.PlatformKind;
import jdk.vm.ci.meta.Value;

@Opcode(value="AArch64_STRING_INFLATE")
public final class AArch64StringLatin1InflateOp
extends AArch64LIRInstruction {
    public static final LIRInstructionClass<AArch64StringLatin1InflateOp> TYPE = LIRInstructionClass.create(AArch64StringLatin1InflateOp.class);
    private static final int CHUNK_ELEMENT_COUNT = 16;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue len;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue src;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue dst;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp2;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue temp3;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue vectorTemp1;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected AllocatableValue vectorTemp2;

    public AArch64StringLatin1InflateOp(LIRGeneratorTool tool, AllocatableValue src, AllocatableValue dst, AllocatableValue len) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        assert (len.getPlatformKind().equals((Object)AArch64Kind.DWORD)) : len;
        assert (src.getPlatformKind().equals((Object)AArch64Kind.QWORD)) : src;
        assert (dst.getPlatformKind().equals((Object)AArch64Kind.QWORD)) : dst;
        this.len = len;
        this.src = src;
        this.dst = dst;
        LIRKind archWordKind = LIRKind.value((PlatformKind)AArch64Kind.QWORD);
        this.temp1 = tool.newVariable(archWordKind);
        this.temp2 = tool.newVariable(archWordKind);
        this.temp3 = tool.newVariable(archWordKind);
        LIRKind vectorKind = LIRKind.value(tool.target().arch.getLargestStorableKind(AArch64.SIMD));
        this.vectorTemp1 = tool.newVariable(vectorKind);
        this.vectorTemp2 = tool.newVariable(vectorKind);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        Label simdImpl = new Label();
        Label done = new Label();
        Register length = ValueUtil.asRegister((Value)this.temp1);
        Register srcAddress = ValueUtil.asRegister((Value)this.temp2);
        Register destAddress = ValueUtil.asRegister((Value)this.temp3);
        masm.cbz(32, ValueUtil.asRegister((Value)this.len), done);
        masm.sxt(64, 32, length, ValueUtil.asRegister((Value)this.len));
        masm.mov(64, srcAddress, ValueUtil.asRegister((Value)this.src));
        masm.mov(64, destAddress, ValueUtil.asRegister((Value)this.dst));
        masm.compare(64, length, 16);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.GE, simdImpl);
        AArch64StringLatin1InflateOp.emitScalar(masm, srcAddress, destAddress, length);
        masm.jmp(done);
        masm.bind(simdImpl);
        this.emitSIMD(masm, srcAddress, destAddress, length);
        masm.bind(done);
    }

    private static void emitScalar(AArch64MacroAssembler masm, Register srcAddress, Register destAddress, Register count) {
        Label loop = new Label();
        try (AArch64MacroAssembler.ScratchRegister scratchReg1 = masm.getScratchRegister();){
            Register val = scratchReg1.getRegister();
            masm.align(16);
            masm.bind(loop);
            masm.ldr(8, val, AArch64Address.createImmediateAddress(8, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcAddress, 1));
            masm.str(16, val, AArch64Address.createImmediateAddress(16, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, destAddress, 2));
            masm.subs(64, count, count, 1);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.GT, loop);
        }
    }

    private void emitSIMD(AArch64MacroAssembler masm, Register srcChunkAddress, Register destChunkAddress, Register length) {
        Register destLowV = ValueUtil.asRegister((Value)this.vectorTemp1);
        Register destHighV = ValueUtil.asRegister((Value)this.vectorTemp2);
        Label simdLoop = new Label();
        Label done = new Label();
        try (AArch64MacroAssembler.ScratchRegister scratchRegister1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister scratchRegister2 = masm.getScratchRegister();){
            Register endOfSrcAddress = scratchRegister1.getRegister();
            Register lastChunkAddress = scratchRegister2.getRegister();
            masm.add(64, endOfSrcAddress, srcChunkAddress, length);
            masm.sub(64, lastChunkAddress, endOfSrcAddress, 16);
            masm.align(16);
            masm.bind(simdLoop);
            masm.fldr(128, destLowV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, srcChunkAddress, 16));
            masm.neon.uxtl2VV(AArch64ASIMDAssembler.ElementSize.Byte, destHighV, destLowV);
            masm.neon.uxtlVV(AArch64ASIMDAssembler.ElementSize.Byte, destLowV, destLowV);
            masm.fstp(128, destLowV, destHighV, AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, destChunkAddress, 32));
            masm.cmp(64, srcChunkAddress, lastChunkAddress);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, simdLoop);
            masm.cmp(64, srcChunkAddress, endOfSrcAddress);
            masm.branchConditionally(AArch64Assembler.ConditionFlag.HS, done);
            masm.sub(64, srcChunkAddress, srcChunkAddress, lastChunkAddress);
            masm.sub(64, destChunkAddress, destChunkAddress, srcChunkAddress, AArch64Assembler.ShiftType.LSL, 1);
            masm.mov(64, srcChunkAddress, lastChunkAddress);
            masm.jmp(simdLoop);
            masm.bind(done);
        }
    }
}

