/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.amd64;

import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.amd64.AMD64Address;
import jdk.graal.compiler.asm.amd64.AMD64Assembler;
import jdk.graal.compiler.asm.amd64.AMD64MacroAssembler;
import jdk.graal.compiler.asm.amd64.AVXKind;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.SyncPort;
import jdk.graal.compiler.lir.SyncPorts;
import jdk.graal.compiler.lir.amd64.AMD64LIRHelper;
import jdk.graal.compiler.lir.amd64.AMD64LIRInstruction;
import jdk.graal.compiler.lir.asm.ArrayDataPointerConstant;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.vm.ci.amd64.AMD64;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.AllocatableValue;
import jdk.vm.ci.meta.Value;

@SyncPorts(value={@SyncPort(from="https://github.com/openjdk/jdk/blob/79345bbbae2564f9f523859d1227a1784293b20f/src/hotspot/cpu/x86/stubGenerator_x86_64.cpp#L1560-L1595", sha1="aa94accda4424ad1bafadcd9dd85af929f92e7bb"), @SyncPort(from="https://github.com/openjdk/jdk/blob/431d4f7e18369466eedd00926a5162a1461d0b25/src/hotspot/cpu/x86/macroAssembler_x86_sha.cpp#L1037-L1520", sha1="a13f01c5f15f95cbdb6acb082866aa3f14bc94b4")})
public final class AMD64SHA512Op
extends AMD64LIRInstruction {
    public static final LIRInstructionClass<AMD64SHA512Op> TYPE = LIRInstructionClass.create(AMD64SHA512Op.class);
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value bufValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG})
    private Value stateValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value ofsValue;
    @LIRInstruction.Use(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value limitValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    private Value[] temps;
    private final boolean multiBlock;
    static ArrayDataPointerConstant k512W = AMD64LIRHelper.pointerConstant(16, new long[]{4794697086780616226L, 8158064640168781261L, -5349999486874862801L, -1606136188198331460L, 4131703408338449720L, 6480981068601479193L, -7908458776815382629L, -6116909921290321640L, -2880145864133508542L, 1334009975649890238L, 2608012711638119052L, 6128411473006802146L, 8268148722764581231L, -9160688886553864527L, -7215885187991268811L, -4495734319001033068L, -1973867731355612462L, -1171420211273849373L, 1135362057144423861L, 2597628984639134821L, 3308224258029322869L, 5365058923640841347L, 6679025012923562964L, 8573033837759648693L, -7476448914759557205L, -6327057829258317296L, -5763719355590565569L, -4658551843659510044L, -4116276920077217854L, -3051310485924567259L, 489312712824947311L, 1452737877330783856L, 2861767655752347644L, 3322285676063803686L, 5560940570517711597L, 5996557281743188959L, 7280758554555802590L, 8532644243296465576L, -9096487096722542874L, -7894198246740708037L, -6719396339535248540L, -6333637450476146687L, -4446306890439682159L, -4076793802049405392L, -3345356375505022440L, -2983346525034927856L, -860691631967231958L, 1182934255886127544L, 1847814050463011016L, 2177327727835720531L, 2830643537854262169L, 3796741975233480872L, 4115178125766777443L, 5681478168544905931L, 6601373596472566643L, 7507060721942968483L, 8399075790359081724L, 8693463985226723168L, -8878714635349349518L, -8302665154208450068L, -8016688836872298968L, -6606660893046293015L, -4685533653050689259L, -4147400797238176981L, -3880063495543823972L, -3348786107499101689L, -1523767162380948706L, -757361751448694408L, 500013540394364858L, 748580250866718886L, 1242879168328830382L, 1977374033974150939L, 2944078676154940804L, 3659926193048069267L, 4368137639120453308L, 4836135668995329356L, 5532061633213252278L, 6448918945643986474L, 6902733635092675308L, 7801388544844847127L});
    static ArrayDataPointerConstant pshuffleByteFlipMaskSha512 = AMD64LIRHelper.pointerConstant(16, new long[]{283686952306183L, 579005069656919567L, 1157726452361532951L, 1736447835066146335L});
    static ArrayDataPointerConstant ymmMask = AMD64LIRHelper.pointerConstant(16, new long[]{0L, 0L, -1L, -1L});
    private static final int XFER_SIZE = 32;
    private static final int SRND_SIZE = 8;
    private static final int INP_SIZE = 8;
    private static final int INP_END_SIZE = 8;
    private static final int RSP_SAVE_SIZE = 8;
    private static final int GPR_SAVE_SIZE = 48;
    private static final int OFFSET_XFER = 0;
    private static final int OFFSET_SRND = 32;
    private static final int OFFSET_INP = 40;
    private static final int OFFSET_INP_END = 48;
    private static final int OFFSET_RSP = 56;
    private static final int OFFSET_GPR = 64;
    private static final int STACK_SIZE = 112;

    public AMD64SHA512Op(AllocatableValue bufValue, AllocatableValue stateValue) {
        this(bufValue, stateValue, Value.ILLEGAL, Value.ILLEGAL, false);
    }

    public AMD64SHA512Op(AllocatableValue bufValue, AllocatableValue stateValue, AllocatableValue ofsValue, AllocatableValue limitValue, boolean multiBlock) {
        super((LIRInstructionClass<? extends AMD64LIRInstruction>)TYPE);
        GraalError.guarantee(ValueUtil.asRegister((Value)bufValue).equals((Object)AMD64.rdi), "expect bufValue at rdi, but was %s", (Object)bufValue);
        GraalError.guarantee(ValueUtil.asRegister((Value)stateValue).equals((Object)AMD64.rsi), "expect stateValue at rsi, but was %s", (Object)stateValue);
        GraalError.guarantee(!multiBlock || ValueUtil.asRegister((Value)ofsValue).equals((Object)AMD64.rdx), "expect ofsValue at rdx, but was %s", (Object)ofsValue);
        GraalError.guarantee(!multiBlock || ValueUtil.asRegister((Value)limitValue).equals((Object)AMD64.rcx), "expect limitValue at rdx, but was %s", (Object)limitValue);
        this.bufValue = bufValue;
        this.stateValue = stateValue;
        this.ofsValue = ofsValue;
        this.limitValue = limitValue;
        this.multiBlock = multiBlock;
        this.temps = new Value[]{AMD64.rax.asValue(), AMD64.rcx.asValue(), AMD64.rdx.asValue(), AMD64.rsi.asValue(), AMD64.rdi.asValue(), AMD64.r8.asValue(), AMD64.r9.asValue(), AMD64.r10.asValue(), AMD64.r11.asValue(), AMD64.xmm0.asValue(), AMD64.xmm1.asValue(), AMD64.xmm2.asValue(), AMD64.xmm3.asValue(), AMD64.xmm4.asValue(), AMD64.xmm5.asValue(), AMD64.xmm6.asValue(), AMD64.xmm7.asValue(), AMD64.xmm8.asValue(), AMD64.xmm9.asValue(), AMD64.xmm10.asValue()};
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AMD64MacroAssembler masm) {
        Label labelLoop0 = new Label();
        Label labelLoop1 = new Label();
        Label labelLoop2 = new Label();
        Label labelDoneHash = new Label();
        Label labelComputeBlockSize = new Label();
        Label labelComputeSize = new Label();
        Label labelComputeBlockSizeEnd = new Label();
        Label labelComputeSizeEnd = new Label();
        Register byteFlipMask = AMD64.xmm9;
        Register ymmMaskLo = AMD64.xmm10;
        Register regINP = AMD64.rdi;
        Register regCTX = AMD64.rsi;
        Register regNUMBLKS = AMD64.rdx;
        Register offset = AMD64.rdx;
        Register regInputLimit = AMD64.rcx;
        Register regTBL = AMD64.rbp;
        Register a = AMD64.rax;
        Register b = AMD64.rbx;
        Register c = AMD64.rcx;
        Register d = AMD64.r8;
        Register e = AMD64.rdx;
        Register f = AMD64.r9;
        Register g = AMD64.r10;
        Register h = AMD64.r11;
        masm.push(AMD64.rdx);
        masm.push(AMD64.rcx);
        masm.movq(AMD64.rax, AMD64.rsp);
        masm.subq(AMD64.rsp, 112);
        masm.andq(AMD64.rsp, -32);
        masm.movq(new AMD64Address(AMD64.rsp, 56), AMD64.rax);
        masm.movq(new AMD64Address(AMD64.rsp, 64), AMD64.rbp);
        masm.movq(new AMD64Address(AMD64.rsp, 72), AMD64.rbx);
        masm.movq(new AMD64Address(AMD64.rsp, 80), AMD64.r12);
        masm.movq(new AMD64Address(AMD64.rsp, 88), AMD64.r13);
        masm.movq(new AMD64Address(AMD64.rsp, 96), AMD64.r14);
        masm.movq(new AMD64Address(AMD64.rsp, 104), AMD64.r15);
        masm.vpblendd(AMD64.xmm0, AMD64.xmm0, AMD64.xmm1, 240, AVXKind.AVXSize.XMM);
        masm.vpblendd(AMD64.xmm0, AMD64.xmm0, AMD64.xmm1, 240, AVXKind.AVXSize.YMM);
        if (this.multiBlock) {
            masm.xorq(AMD64.rax, AMD64.rax);
            masm.bind(labelComputeBlockSize);
            masm.cmpqAndJcc(offset, regInputLimit, AMD64Assembler.ConditionFlag.AboveEqual, labelComputeBlockSizeEnd, true);
            masm.addq(offset, 128);
            masm.addq(AMD64.rax, 128);
            masm.jmpb(labelComputeBlockSize);
            masm.bind(labelComputeBlockSizeEnd);
            masm.movq(regNUMBLKS, AMD64.rax);
            masm.cmpqAndJcc(regNUMBLKS, 0, AMD64Assembler.ConditionFlag.Equal, labelDoneHash, false);
        } else {
            masm.xorq(regNUMBLKS, regNUMBLKS);
            masm.addq(regNUMBLKS, 128);
        }
        masm.addq(regNUMBLKS, regINP);
        masm.movq(new AMD64Address(AMD64.rsp, 48), regNUMBLKS);
        masm.movq(a, new AMD64Address(regCTX, 0));
        masm.movq(b, new AMD64Address(regCTX, 8));
        masm.movq(c, new AMD64Address(regCTX, 16));
        masm.movq(d, new AMD64Address(regCTX, 24));
        masm.movq(e, new AMD64Address(regCTX, 32));
        masm.movq(f, new AMD64Address(regCTX, 40));
        masm.movq(h, new AMD64Address(regCTX, 56));
        masm.vmovdqu(byteFlipMask, AMD64LIRHelper.recordExternalAddress(crb, pshuffleByteFlipMaskSha512));
        masm.vmovdqu(ymmMaskLo, AMD64LIRHelper.recordExternalAddress(crb, ymmMask));
        masm.movq(g, new AMD64Address(regCTX, 48));
        masm.bind(labelLoop0);
        masm.leaq(regTBL, AMD64LIRHelper.recordExternalAddress(crb, k512W));
        masm.vmovdqu(AMD64.xmm4, new AMD64Address(regINP, 0));
        masm.vpshufb(AMD64.xmm4, AMD64.xmm4, byteFlipMask, AVXKind.AVXSize.YMM);
        masm.vmovdqu(AMD64.xmm5, new AMD64Address(regINP, 32));
        masm.vpshufb(AMD64.xmm5, AMD64.xmm5, byteFlipMask, AVXKind.AVXSize.YMM);
        masm.vmovdqu(AMD64.xmm6, new AMD64Address(regINP, 64));
        masm.vpshufb(AMD64.xmm6, AMD64.xmm6, byteFlipMask, AVXKind.AVXSize.YMM);
        masm.vmovdqu(AMD64.xmm7, new AMD64Address(regINP, 96));
        masm.vpshufb(AMD64.xmm7, AMD64.xmm7, byteFlipMask, AVXKind.AVXSize.YMM);
        masm.movq(new AMD64Address(AMD64.rsp, 40), regINP);
        masm.movslq(new AMD64Address(AMD64.rsp, 32), 4);
        masm.align(16);
        masm.bind(labelLoop1);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm4, new AMD64Address(regTBL, 0), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, a, b, c, d, e, f, g, h, 0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, h, a, b, c, d, e, f, g, 1);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, g, h, a, b, c, d, e, f, 2);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, f, g, h, a, b, c, d, e, 3);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm5, new AMD64Address(regTBL, 32), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, e, f, g, h, a, b, c, d, 0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, d, e, f, g, h, a, b, c, 1);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, c, d, e, f, g, h, a, b, 2);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm5, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, b, c, d, e, f, g, h, a, 3);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm6, new AMD64Address(regTBL, 64), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, a, b, c, d, e, f, g, h, 0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, h, a, b, c, d, e, f, g, 1);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, g, h, a, b, c, d, e, f, 2);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm6, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, f, g, h, a, b, c, d, e, 3);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm7, new AMD64Address(regTBL, 96), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        masm.addq(regTBL, 128);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, e, f, g, h, a, b, c, d, 0);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, d, e, f, g, h, a, b, c, 1);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, c, d, e, f, g, h, a, b, 2);
        AMD64SHA512Op.sha512AVX2OneRoundAndSchedule(masm, AMD64.xmm7, AMD64.xmm4, AMD64.xmm5, AMD64.xmm6, b, c, d, e, f, g, h, a, 3);
        masm.subqAndJcc(new AMD64Address(AMD64.rsp, 32), 1, AMD64Assembler.ConditionFlag.NotEqual, labelLoop1, false);
        masm.movslq(new AMD64Address(AMD64.rsp, 32), 2);
        masm.bind(labelLoop2);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm4, new AMD64Address(regTBL, 0), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, a, a, b, c, d, e, f, g, h, 0);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, h, h, a, b, c, d, e, f, g, 1);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, g, g, h, a, b, c, d, e, f, 2);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, f, f, g, h, a, b, c, d, e, 3);
        masm.vpaddq(AMD64.xmm0, AMD64.xmm5, new AMD64Address(regTBL, 32), AVXKind.AVXSize.YMM);
        masm.vmovdqu(new AMD64Address(AMD64.rsp, 0), AMD64.xmm0);
        masm.addq(regTBL, 64);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, e, e, f, g, h, a, b, c, d, 0);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, d, d, e, f, g, h, a, b, c, 1);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, c, c, d, e, f, g, h, a, b, 2);
        AMD64SHA512Op.sha512AVX2OneRoundCompute(masm, b, b, c, d, e, f, g, h, a, 3);
        masm.vmovdqu(AMD64.xmm4, AMD64.xmm6);
        masm.vmovdqu(AMD64.xmm5, AMD64.xmm7);
        masm.subqAndJcc(new AMD64Address(AMD64.rsp, 32), 1, AMD64Assembler.ConditionFlag.NotEqual, labelLoop2, false);
        AMD64SHA512Op.addmq(masm, 0, regCTX, a);
        AMD64SHA512Op.addmq(masm, 8, regCTX, b);
        AMD64SHA512Op.addmq(masm, 16, regCTX, c);
        AMD64SHA512Op.addmq(masm, 24, regCTX, d);
        AMD64SHA512Op.addmq(masm, 32, regCTX, e);
        AMD64SHA512Op.addmq(masm, 40, regCTX, f);
        AMD64SHA512Op.addmq(masm, 48, regCTX, g);
        AMD64SHA512Op.addmq(masm, 56, regCTX, h);
        masm.movq(regINP, new AMD64Address(AMD64.rsp, 40));
        masm.addq(regINP, 128);
        masm.cmpqAndJcc(regINP, new AMD64Address(AMD64.rsp, 48), AMD64Assembler.ConditionFlag.NotEqual, labelLoop0, false);
        masm.bind(labelDoneHash);
        masm.movq(AMD64.rbp, new AMD64Address(AMD64.rsp, 64));
        masm.movq(AMD64.rbx, new AMD64Address(AMD64.rsp, 72));
        masm.movq(AMD64.r12, new AMD64Address(AMD64.rsp, 80));
        masm.movq(AMD64.r13, new AMD64Address(AMD64.rsp, 88));
        masm.movq(AMD64.r14, new AMD64Address(AMD64.rsp, 96));
        masm.movq(AMD64.r15, new AMD64Address(AMD64.rsp, 104));
        masm.movq(AMD64.rsp, new AMD64Address(AMD64.rsp, 56));
        masm.pop(AMD64.rcx);
        masm.pop(AMD64.rdx);
        if (this.multiBlock) {
            Register limitEnd = AMD64.rcx;
            Register ofsEnd = AMD64.rdx;
            masm.movq(AMD64.rax, ofsEnd);
            masm.bind(labelComputeSize);
            masm.cmpqAndJcc(AMD64.rax, limitEnd, AMD64Assembler.ConditionFlag.AboveEqual, labelComputeSizeEnd, true);
            masm.addq(AMD64.rax, 128);
            masm.jmpb(labelComputeSize);
            masm.bind(labelComputeSizeEnd);
        }
    }

    private static void addmq(AMD64MacroAssembler masm, int disp, Register base, Register value) {
        masm.addq(value, new AMD64Address(base, disp));
        masm.movq(new AMD64Address(base, disp), value);
    }

    private static void sha512AVX2OneRoundCompute(AMD64MacroAssembler masm, Register oldH, Register a, Register b, Register c, Register d, Register e, Register f, Register g, Register h, int iteration) {
        Register y0 = AMD64.r13;
        Register y1 = AMD64.r14;
        Register y2 = AMD64.r15;
        Register y3 = AMD64.rdi;
        Register t1 = AMD64.r12;
        if (iteration % 4 > 0) {
            masm.addq(oldH, y2);
        }
        masm.movq(y2, f);
        masm.rorxq(y0, e, 41);
        masm.rorxq(y1, e, 18);
        masm.xorq(y2, g);
        masm.xorq(y0, y1);
        masm.rorxq(y1, e, 14);
        masm.andq(y2, e);
        if (iteration % 4 > 0) {
            masm.addq(oldH, y3);
        }
        masm.xorq(y0, y1);
        masm.rorxq(t1, a, 34);
        masm.xorq(y2, g);
        masm.rorxq(y1, a, 39);
        masm.movq(y3, a);
        masm.xorq(y1, t1);
        masm.rorxq(t1, a, 28);
        masm.addq(h, new AMD64Address(AMD64.rsp, 8 * iteration));
        masm.orq(y3, c);
        masm.xorq(y1, t1);
        masm.movq(t1, a);
        masm.andq(y3, b);
        masm.andq(t1, c);
        masm.addq(y2, y0);
        masm.addq(d, h);
        masm.orq(y3, t1);
        masm.addq(h, y1);
        masm.addq(d, y2);
        if (iteration % 4 == 3) {
            masm.addq(h, y2);
            masm.addq(h, y3);
        }
    }

    private static void sha512AVX2OneRoundAndSchedule(AMD64MacroAssembler masm, Register vector4, Register vector5, Register vector6, Register vector7, Register a, Register b, Register c, Register d, Register e, Register f, Register g, Register h, int iteration) {
        Register y0 = AMD64.r13;
        Register y1 = AMD64.r14;
        Register y2 = AMD64.r15;
        Register y3 = AMD64.rdi;
        Register t1 = AMD64.r12;
        if (iteration % 4 == 0) {
            masm.vperm2f128(AMD64.xmm0, vector7, vector6, 3);
            masm.vpalignr(AMD64.xmm0, AMD64.xmm0, vector6, 8, AVXKind.AVXSize.YMM);
            masm.vpaddq(AMD64.xmm0, AMD64.xmm0, vector4, AVXKind.AVXSize.YMM);
            masm.vperm2f128(AMD64.xmm1, vector5, vector4, 3);
            masm.vpalignr(AMD64.xmm1, AMD64.xmm1, vector4, 8, AVXKind.AVXSize.YMM);
            masm.vpsrlq(AMD64.xmm2, AMD64.xmm1, 1, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm3, AMD64.xmm1, 63, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm2, AVXKind.AVXSize.YMM);
            masm.vpsrlq(AMD64.xmm8, AMD64.xmm1, 7, AVXKind.AVXSize.YMM);
        } else if (iteration % 4 == 1) {
            masm.vpsrlq(AMD64.xmm2, AMD64.xmm1, 8, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm1, AMD64.xmm1, 56, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm1, AMD64.xmm1, AMD64.xmm2, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm8, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm1, AMD64.xmm3, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vpaddq(AMD64.xmm0, AMD64.xmm0, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vperm2f128(vector4, AMD64.xmm0, AMD64.xmm0, 0);
            masm.vpand(AMD64.xmm0, AMD64.xmm0, AMD64.xmm10, AVXKind.AVXSize.YMM);
            masm.vperm2f128(AMD64.xmm2, vector7, vector7, 17);
            masm.vpsrlq(AMD64.xmm8, AMD64.xmm2, 6, AVXKind.AVXSize.YMM);
        } else if (iteration % 4 == 2) {
            masm.vpsrlq(AMD64.xmm3, AMD64.xmm2, 19, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm1, AMD64.xmm2, 45, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm3, AVXKind.AVXSize.YMM);
            masm.vpsrlq(AMD64.xmm3, AMD64.xmm2, 61, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm1, AMD64.xmm2, 3, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm3, AVXKind.AVXSize.YMM);
            masm.vpaddq(vector4, vector4, AMD64.xmm8, AVXKind.AVXSize.YMM);
            masm.vpsrlq(AMD64.xmm8, vector4, 6, AVXKind.AVXSize.YMM);
        } else if (iteration % 4 == 3) {
            masm.vpsrlq(AMD64.xmm3, vector4, 19, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm1, vector4, 45, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm3, AVXKind.AVXSize.YMM);
            masm.vpsrlq(AMD64.xmm3, vector4, 61, AVXKind.AVXSize.YMM);
            masm.vpsllq(AMD64.xmm1, vector4, 3, AVXKind.AVXSize.YMM);
            masm.vpor(AMD64.xmm3, AMD64.xmm3, AMD64.xmm1, AVXKind.AVXSize.YMM);
            masm.vpxor(AMD64.xmm8, AMD64.xmm8, AMD64.xmm3, AVXKind.AVXSize.YMM);
            masm.vpaddq(AMD64.xmm2, AMD64.xmm0, AMD64.xmm8, AVXKind.AVXSize.YMM);
            masm.vpblendd(vector4, vector4, AMD64.xmm2, 240, AVXKind.AVXSize.YMM);
        }
        masm.movq(y3, a);
        masm.rorxq(y0, e, 41);
        masm.rorxq(y1, e, 18);
        masm.addq(h, new AMD64Address(AMD64.rsp, iteration * 8));
        masm.orq(y3, c);
        masm.movq(y2, f);
        masm.xorq(y2, g);
        masm.rorxq(t1, a, 34);
        masm.xorq(y0, y1);
        masm.rorxq(y1, e, 14);
        masm.andq(y2, e);
        masm.addq(d, h);
        masm.andq(y3, b);
        masm.xorq(y0, y1);
        masm.rorxq(y1, a, 39);
        masm.xorq(y1, t1);
        masm.rorxq(t1, a, 28);
        masm.xorq(y2, g);
        masm.xorq(y1, t1);
        masm.movq(t1, a);
        masm.andq(t1, c);
        masm.addq(y2, y0);
        masm.orq(y3, t1);
        masm.addq(h, y1);
        masm.addq(d, y2);
        masm.addq(h, y2);
        masm.addq(h, y3);
    }

    @Override
    public boolean modifiesStackPointer() {
        return true;
    }
}

