/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import java.util.Arrays;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.aarch64.AArch64AESEncryptOp;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.aarch64.AArch64;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

@SyncPort(from="https://github.com/openjdk/jdk/blob/1d117f65f06456ae571aecc146542c2f79d402cf/src/hotspot/cpu/aarch64/stubGenerator_aarch64.cpp#L2986-L3266", sha1="75a3a4dabdc42e5e23bbec0cb448d09fb0d7b129")
public final class AArch64CounterModeAESCryptOp
extends AArch64LIRInstruction {
    public static final LIRInstructionClass<AArch64CounterModeAESCryptOp> TYPE = LIRInstructionClass.create(AArch64CounterModeAESCryptOp.class);
    private final int lengthOffset;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value inValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value outValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value keyValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value counterValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value lenValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value encryptedCounterValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    private Value usedPtrValue;
    @LIRInstruction.Def(value={LIRInstruction.OperandFlag.REG})
    protected Value resultValue;
    @LIRInstruction.Temp
    protected Value[] gpTemps;
    @LIRInstruction.Temp
    protected Value[] simdTemps;

    public AArch64CounterModeAESCryptOp(AllocatableValue inValue, AllocatableValue outValue, AllocatableValue keyValue, AllocatableValue counterValue, AllocatableValue lenValue, AllocatableValue encryptedCounterValue, AllocatableValue usedPtrValue, AllocatableValue resultValue, int lengthOffset) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.inValue = inValue;
        this.outValue = outValue;
        this.keyValue = keyValue;
        this.counterValue = counterValue;
        this.lenValue = lenValue;
        this.encryptedCounterValue = encryptedCounterValue;
        this.usedPtrValue = usedPtrValue;
        this.resultValue = resultValue;
        this.lengthOffset = lengthOffset;
        this.gpTemps = new Value[]{AArch64.r7.asValue(), AArch64.r10.asValue(), AArch64.r11.asValue(), AArch64.r12.asValue()};
        this.simdTemps = (Value[])Arrays.stream(AArch64.simdRegisters.toArray()).map(Register::asValue).toArray(Value[]::new);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        GraalError.guarantee(this.inValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid inValue kind: %s", (Object)this.inValue);
        GraalError.guarantee(this.outValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid outValue kind: %s", (Object)this.outValue);
        GraalError.guarantee(this.keyValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid keyValue kind: %s", (Object)this.keyValue);
        GraalError.guarantee(this.counterValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid counterValue kind: %s", (Object)this.counterValue);
        GraalError.guarantee(this.lenValue.getPlatformKind().equals((Object)AArch64Kind.DWORD), "Invalid lenValue kind: %s", (Object)this.lenValue);
        GraalError.guarantee(this.encryptedCounterValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid encryptedCounterValue kind: %s", (Object)this.encryptedCounterValue);
        GraalError.guarantee(this.usedPtrValue.getPlatformKind().equals((Object)AArch64Kind.QWORD), "Invalid usedPtrValue kind: %s", (Object)this.usedPtrValue);
        GraalError.guarantee(this.resultValue.getPlatformKind().equals((Object)AArch64Kind.DWORD), "Invalid resultValue kind: %s", (Object)this.resultValue);
        Register in = ValueUtil.asRegister((Value)this.inValue);
        Register out = ValueUtil.asRegister((Value)this.outValue);
        Register key = ValueUtil.asRegister((Value)this.keyValue);
        Register counter = ValueUtil.asRegister((Value)this.counterValue);
        Register savedLen = ValueUtil.asRegister((Value)this.lenValue);
        Register savedEncryptedCtr = ValueUtil.asRegister((Value)this.encryptedCounterValue);
        Register usedPtr = ValueUtil.asRegister((Value)this.usedPtrValue);
        Register len = AArch64.r10;
        Register used = AArch64.r12;
        Register offset = AArch64.r7;
        Register keylen = AArch64.r11;
        int blockSize = 16;
        int bulkWidth = 4;
        Label labelSkipLargeBlock = new Label();
        Label labelDone = new Label();
        masm.ldr(32, used, AArch64Address.createBaseRegisterOnlyAddress(32, usedPtr));
        masm.cbz(32, savedLen, labelDone);
        masm.mov(32, len, savedLen);
        masm.mov(offset, 0);
        masm.ldr(32, keylen, AArch64Address.createImmediateAddress(32, AArch64Address.AddressingMode.IMMEDIATE_SIGNED_UNSCALED, key, this.lengthOffset));
        AArch64AESEncryptOp.aesencLoadkeys(masm, key, keylen);
        Label labelCTRLoop = new Label();
        Label labelNext = new Label();
        masm.bind(labelCTRLoop);
        masm.compare(32, used, blockSize);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, labelNext);
        masm.subs(32, AArch64.zr, len, bulkWidth * blockSize);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, labelSkipLargeBlock);
        AArch64CounterModeAESCryptOp.emitCTRLargeBlock(masm, bulkWidth, in, out, counter, usedPtr, len, used, offset, keylen);
        masm.bind(labelSkipLargeBlock);
        masm.cbz(32, len, labelDone);
        masm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, AArch64.v4, 0L);
        masm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, AArch64.v5, 1L);
        masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.Word, AArch64.v4, 2, AArch64.v5, 2);
        masm.fldr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v0);
        AArch64CounterModeAESCryptOp.beAdd128x64(masm, AArch64.v16, AArch64.v16, AArch64.v4, AArch64.v5);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v16);
        masm.fstr(128, AArch64.v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
        Label labelInnerLoop = new Label();
        masm.bind(labelInnerLoop);
        AArch64AESEncryptOp.aesecbEncrypt(masm, Register.None, Register.None, keylen, AArch64.v0, 1);
        masm.fstr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, savedEncryptedCtr));
        masm.mov(used, 0);
        masm.compare(32, len, blockSize);
        masm.branchConditionally(AArch64Assembler.ConditionFlag.LO, labelNext);
        masm.fldr(128, AArch64.v1, AArch64Address.createRegisterOffsetAddress(128, in, offset, false));
        masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v1, AArch64.v1, AArch64.v0);
        masm.fstr(128, AArch64.v1, AArch64Address.createRegisterOffsetAddress(128, out, offset, false));
        masm.mov(used, blockSize);
        masm.add(64, offset, offset, blockSize);
        masm.sub(32, len, len, blockSize);
        masm.cbz(32, len, labelDone);
        masm.neon.orrVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64.v0, AArch64.v16, AArch64.v16);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v16);
        AArch64CounterModeAESCryptOp.beAdd128x64(masm, AArch64.v16, AArch64.v16, AArch64.v4, AArch64.v5);
        masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v16);
        masm.fstr(128, AArch64.v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
        masm.jmp(labelInnerLoop);
        masm.bind(labelNext);
        try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sc2 = masm.getScratchRegister();){
            Register rscratch1 = sc1.getRegister();
            Register rscratch2 = sc2.getRegister();
            masm.ldr(8, rscratch1, AArch64Address.createRegisterOffsetAddress(8, in, offset, false));
            masm.ldr(8, rscratch2, AArch64Address.createRegisterOffsetAddress(8, savedEncryptedCtr, used, false));
            masm.eor(64, rscratch1, rscratch1, rscratch2);
            masm.str(8, rscratch1, AArch64Address.createRegisterOffsetAddress(8, out, offset, false));
            masm.add(64, offset, offset, 1);
            masm.add(64, used, used, 1);
            masm.sub(32, len, len, 1);
            masm.cbnz(32, len, labelCTRLoop);
        }
        masm.bind(labelDone);
        masm.str(32, used, AArch64Address.createBaseRegisterOnlyAddress(32, usedPtr));
        Register result = ValueUtil.asRegister((Value)this.resultValue);
        masm.mov(32, result, savedLen);
    }

    private static void beAdd128x64(AArch64MacroAssembler masm, Register result, Register in, Register inc, Register tmp) {
        masm.neon.addVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.DoubleWord, result, in, inc);
        masm.neon.cmhiVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.DoubleWord, tmp, inc, result);
        masm.neon.extVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, tmp, tmp, tmp, 8);
        masm.neon.subVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.DoubleWord, result, result, tmp);
    }

    private static void emitCTRLargeBlock(AArch64MacroAssembler masm, int bulkWidth, Register in, Register out, Register counter, Register usedPtr, Register len, Register used, Register offset, Register keylen) {
        GraalError.guarantee(bulkWidth == 4 || bulkWidth == 8, "bulk_width must be 4 or 8");
        try (AArch64MacroAssembler.ScratchRegister sc = masm.getScratchRegister();){
            int i;
            Register rscratch = sc.getRegister();
            Label labelCTRLoop = new Label();
            if (bulkWidth == 8) {
                masm.sub(64, AArch64.sp, AArch64.sp, 64);
                masm.neon.st1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v12, AArch64.v13, AArch64.v14, AArch64.v15, AArch64Address.createBaseRegisterOnlyAddress(128, AArch64.sp));
            }
            masm.sub(64, AArch64.sp, AArch64.sp, 64);
            masm.neon.st1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v8, AArch64.v9, AArch64.v10, AArch64.v11, AArch64Address.createBaseRegisterOnlyAddress(128, AArch64.sp));
            masm.mov(32, rscratch, len);
            masm.and(32, len, len, -16 * bulkWidth);
            masm.add(64, in, in, offset);
            masm.add(64, out, out, offset);
            masm.fldr(128, AArch64.v0, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
            masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v0);
            masm.bind(labelCTRLoop);
            masm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, AArch64.v8, 0L);
            masm.neon.moveVI(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Word, AArch64.v9, 1L);
            masm.neon.insXX(AArch64ASIMDAssembler.ElementSize.Word, AArch64.v8, 2, AArch64.v9, 2);
            for (i = 0; i < bulkWidth; ++i) {
                masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64AESEncryptOp.asFloatRegister(AArch64.v0, i), AArch64.v16);
                AArch64CounterModeAESCryptOp.beAdd128x64(masm, AArch64.v16, AArch64.v16, AArch64.v8, AArch64.v9);
            }
            masm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v8, AArch64.v9, AArch64.v10, AArch64.v11, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, in, 64));
            AArch64AESEncryptOp.aesecbEncrypt(masm, Register.None, Register.None, keylen, AArch64.v0, bulkWidth);
            if (bulkWidth == 8) {
                masm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v12, AArch64.v13, AArch64.v14, AArch64.v15, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, in, 64));
            }
            for (i = 0; i < bulkWidth; ++i) {
                masm.neon.eorVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64AESEncryptOp.asFloatRegister(AArch64.v0, i), AArch64AESEncryptOp.asFloatRegister(AArch64.v0, i), AArch64AESEncryptOp.asFloatRegister(AArch64.v8, i));
            }
            masm.neon.st1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v0, AArch64.v1, AArch64.v2, AArch64.v3, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.ST1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, out, 64));
            if (bulkWidth == 8) {
                masm.neon.st1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v4, AArch64.v5, AArch64.v6, AArch64.v7, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.ST1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, out, 64));
            }
            masm.sub(32, len, len, 16 * bulkWidth);
            masm.cbnz(32, len, labelCTRLoop);
            masm.neon.rev64VV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v16, AArch64.v16);
            masm.fstr(128, AArch64.v16, AArch64Address.createBaseRegisterOnlyAddress(128, counter));
            masm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v8, AArch64.v9, AArch64.v10, AArch64.v11, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.sp, 64));
            if (bulkWidth == 8) {
                masm.neon.ld1MultipleVVVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.v12, AArch64.v13, AArch64.v14, AArch64.v15, AArch64Address.createStructureImmediatePostIndexAddress(AArch64ASIMDAssembler.ASIMDInstruction.LD1_MULTIPLE_4R, AArch64ASIMDAssembler.ASIMDSize.FullReg, AArch64ASIMDAssembler.ElementSize.Byte, AArch64.sp, 64));
            }
            masm.mov(32, len, rscratch);
            masm.and(32, rscratch, rscratch, -16 * bulkWidth);
            masm.add(64, offset, offset, rscratch);
            masm.sub(64, in, in, offset);
            masm.sub(64, out, out, offset);
            masm.sub(32, len, len, rscratch);
            masm.mov(used, 16);
            masm.str(32, used, AArch64Address.createBaseRegisterOnlyAddress(32, usedPtr));
        }
    }
}

