/*
 * Decompiled with CFR 0.152.
 */
package jdk.graal.compiler.lir.aarch64;

import java.util.Arrays;
import jdk.graal.compiler.asm.Label;
import jdk.graal.compiler.asm.aarch64.AArch64ASIMDAssembler;
import jdk.graal.compiler.asm.aarch64.AArch64Address;
import jdk.graal.compiler.asm.aarch64.AArch64Assembler;
import jdk.graal.compiler.asm.aarch64.AArch64MacroAssembler;
import jdk.graal.compiler.core.common.Stride;
import jdk.graal.compiler.core.common.StrideUtil;
import jdk.graal.compiler.debug.GraalError;
import jdk.graal.compiler.lir.LIRInstruction;
import jdk.graal.compiler.lir.LIRInstructionClass;
import jdk.graal.compiler.lir.Opcode;
import jdk.graal.compiler.lir.aarch64.AArch64ComplexVectorOp;
import jdk.graal.compiler.lir.aarch64.AArch64ControlFlow;
import jdk.graal.compiler.lir.aarch64.AArch64LIRInstruction;
import jdk.graal.compiler.lir.asm.CompilationResultBuilder;
import jdk.graal.compiler.lir.gen.LIRGeneratorTool;
import jdk.vm.ci.aarch64.AArch64Kind;
import jdk.vm.ci.code.Register;
import jdk.vm.ci.code.ValueUtil;
import jdk.vm.ci.meta.Value;

@Opcode(value="ARRAY_COPY_WITH_CONVERSIONS")
public final class AArch64ArrayCopyWithConversionsOp
extends AArch64ComplexVectorOp {
    public static final LIRInstructionClass<AArch64ArrayCopyWithConversionsOp> TYPE = LIRInstructionClass.create(AArch64ArrayCopyWithConversionsOp.class);
    private final Stride argStrideSrc;
    private final Stride argStrideDst;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value arrayDstValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value offsetDstValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value arraySrcValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value offsetSrcValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG})
    protected Value lengthValue;
    @LIRInstruction.Alive(value={LIRInstruction.OperandFlag.REG, LIRInstruction.OperandFlag.ILLEGAL})
    private Value dynamicStridesValue;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected Value[] temp;
    @LIRInstruction.Temp(value={LIRInstruction.OperandFlag.REG})
    protected Value[] vectorTemp;

    public AArch64ArrayCopyWithConversionsOp(LIRGeneratorTool tool, Stride strideSrc, Stride strideDst, Value arrayDst, Value offsetDst, Value arraySrc, Value offsetSrc, Value length, Value dynamicStrides) {
        super((LIRInstructionClass<? extends AArch64LIRInstruction>)TYPE);
        this.argStrideSrc = strideSrc;
        this.argStrideDst = strideDst;
        GraalError.guarantee(arrayDst.getPlatformKind() == AArch64Kind.QWORD && arrayDst.getPlatformKind() == arraySrc.getPlatformKind(), "64 bit array pointers expected");
        GraalError.guarantee(offsetDst.getPlatformKind() == AArch64Kind.QWORD, "long value expected");
        GraalError.guarantee(offsetSrc.getPlatformKind() == AArch64Kind.QWORD, "long value expected");
        GraalError.guarantee(length.getPlatformKind() == AArch64Kind.DWORD, "int value expected");
        GraalError.guarantee(strideSrc != Stride.S8 && strideDst != Stride.S8, "8 byte stride is not supported");
        this.arrayDstValue = arrayDst;
        this.offsetDstValue = offsetDst;
        this.arraySrcValue = arraySrc;
        this.offsetSrcValue = offsetSrc;
        this.lengthValue = length;
        this.dynamicStridesValue = dynamicStrides == null ? Value.ILLEGAL : dynamicStrides;
        this.temp = AArch64ArrayCopyWithConversionsOp.allocateTempRegisters(tool, this.withDynamicStrides() ? 3 : 2);
        this.vectorTemp = AArch64ArrayCopyWithConversionsOp.allocateVectorRegisters(tool, 4);
    }

    @Override
    public void emitCode(CompilationResultBuilder crb, AArch64MacroAssembler masm) {
        Register arrayDst = ValueUtil.asRegister((Value)this.temp[0]);
        Register arraySrc = ValueUtil.asRegister((Value)this.temp[1]);
        Label end = new Label();
        masm.add(64, arrayDst, ValueUtil.asRegister((Value)this.arrayDstValue), ValueUtil.asRegister((Value)this.offsetDstValue));
        masm.add(64, arraySrc, ValueUtil.asRegister((Value)this.arraySrcValue), ValueUtil.asRegister((Value)this.offsetSrcValue));
        try (AArch64MacroAssembler.ScratchRegister sc1 = masm.getScratchRegister();
             AArch64MacroAssembler.ScratchRegister sc2 = masm.getScratchRegister();){
            Register tmp = sc1.getRegister();
            Register length = sc2.getRegister();
            masm.mov(32, length, ValueUtil.asRegister((Value)this.lengthValue));
            if (this.withDynamicStrides()) {
                Label[] variants = new Label[9];
                for (int i = 0; i < variants.length; ++i) {
                    variants[i] = new Label();
                }
                Register tmp2 = ValueUtil.asRegister((Value)this.temp[2]);
                masm.mov(32, tmp2, ValueUtil.asRegister((Value)this.dynamicStridesValue));
                AArch64ControlFlow.RangeTableSwitchOp.emitJumpTable(crb, masm, tmp, tmp2, 0, 8, Arrays.stream(variants));
                masm.align(16);
                masm.bind(variants[StrideUtil.getDirectStubCallIndex(Stride.S4, Stride.S4)]);
                masm.lsl(64, length, length, 1L);
                masm.align(16);
                masm.bind(variants[StrideUtil.getDirectStubCallIndex(Stride.S2, Stride.S2)]);
                masm.lsl(64, length, length, 1L);
                masm.align(16);
                masm.bind(variants[StrideUtil.getDirectStubCallIndex(Stride.S1, Stride.S1)]);
                this.emitArrayCopy(masm, Stride.S1, Stride.S1, arrayDst, arraySrc, length, tmp, end);
                masm.jmp(end);
                for (Stride strideSrc : new Stride[]{Stride.S1, Stride.S2, Stride.S4}) {
                    for (Stride strideDst : new Stride[]{Stride.S1, Stride.S2, Stride.S4}) {
                        if (strideSrc == strideDst) continue;
                        masm.align(16);
                        masm.bind(variants[StrideUtil.getDirectStubCallIndex(strideSrc, strideDst)]);
                        this.emitArrayCopy(masm, strideDst, strideSrc, arrayDst, arraySrc, length, tmp, end);
                        masm.jmp(end);
                    }
                }
            } else {
                this.emitArrayCopy(masm, this.argStrideDst, this.argStrideSrc, arrayDst, arraySrc, length, tmp, end);
            }
            masm.align(16);
            masm.bind(end);
        }
    }

    private boolean withDynamicStrides() {
        return !ValueUtil.isIllegal((Value)this.dynamicStridesValue);
    }

    private void emitArrayCopy(AArch64MacroAssembler asm, Stride strideDst, Stride strideSrc, Register arrayDst, Register arraySrc, Register len, Register tmp, Label end) {
        Label tailLessThan64 = new Label();
        Label tailLessThan32 = new Label();
        Label tailLessThan16 = new Label();
        Label tailLessThan8 = new Label();
        Label tailLessThan4 = new Label();
        Label tailLessThan2 = new Label();
        Label vectorLoop = new Label();
        Label tail = new Label();
        Register maxStrideArray = strideSrc.value < strideDst.value ? arrayDst : arraySrc;
        Register minStrideArray = strideSrc.value < strideDst.value ? arraySrc : arrayDst;
        Stride strideMax = Stride.max(strideSrc, strideDst);
        Stride strideMin = Stride.min(strideSrc, strideDst);
        asm.subs(64, len, len, 64 >> strideMax.log2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, tailLessThan64);
        Register refAddress = len;
        asm.add(64, refAddress, maxStrideArray, len, AArch64Assembler.ShiftType.LSL, strideMax.log2);
        this.simdCopy64(asm, strideDst, strideSrc, arrayDst, arraySrc);
        asm.cmp(64, refAddress, maxStrideArray);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LS, tail);
        asm.and(64, tmp, maxStrideArray, 63L);
        asm.sub(64, minStrideArray, minStrideArray, tmp, AArch64Assembler.ShiftType.LSR, strideMax.log2 - strideMin.log2);
        asm.bic(64, maxStrideArray, maxStrideArray, 63L);
        asm.align(16);
        asm.bind(vectorLoop);
        this.simdCopy64(asm, strideDst, strideSrc, arrayDst, arraySrc);
        asm.cmp(64, maxStrideArray, refAddress);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.LO, vectorLoop);
        asm.bind(tail);
        asm.sub(64, tmp, maxStrideArray, refAddress);
        asm.mov(64, maxStrideArray, refAddress);
        asm.sub(64, minStrideArray, minStrideArray, tmp, AArch64Assembler.ShiftType.LSR, strideMax.log2 - strideMin.log2);
        this.simdCopy64(asm, strideDst, strideSrc, arrayDst, arraySrc);
        asm.jmp(end);
        this.tail32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tmp, tailLessThan64, tailLessThan32, end);
        this.tailLessThan32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tailLessThan32, tailLessThan16, end, 16);
        this.tailLessThan32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tailLessThan16, tailLessThan8, end, 8);
        this.tailLessThan32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tailLessThan8, tailLessThan4, end, 4);
        this.tailLessThan32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tailLessThan4, tailLessThan2, end, 2);
        this.tailLessThan32(asm, strideDst, strideSrc, arrayDst, arraySrc, len, tailLessThan2, end, end, 1);
    }

    private Register v(int index) {
        return ValueUtil.asRegister((Value)this.vectorTemp[index]);
    }

    private void simdCopy64(AArch64MacroAssembler asm, Stride strideDst, Stride strideSrc, Register arrayDst, Register arraySrc) {
        AArch64ASIMDAssembler.ElementSize dstESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideDst);
        AArch64ASIMDAssembler.ElementSize srcESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideSrc);
        switch (strideDst.log2 - strideSrc.log2) {
            case -2: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, srcESize.narrow(), this.v(0), this.v(0), this.v(1));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, srcESize.narrow(), this.v(2), this.v(2), this.v(3));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, dstESize, this.v(0), this.v(0), this.v(2));
                asm.fstr(128, this.v(0), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arrayDst, 16));
                break;
            }
            case -1: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, dstESize, this.v(0), this.v(0), this.v(1));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, dstESize, this.v(2), this.v(2), this.v(3));
                asm.fstp(128, this.v(0), this.v(2), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                break;
            }
            case 0: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                break;
            }
            case 1: {
                asm.fldp(128, this.v(0), this.v(2), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arraySrc, 32));
                asm.neon.uxtl2VV(srcESize, this.v(1), this.v(0));
                asm.neon.uxtl2VV(srcESize, this.v(3), this.v(2));
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(2), this.v(2));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                break;
            }
            case 2: {
                asm.fldr(128, this.v(0), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_POST_INDEXED, arraySrc, 16));
                asm.neon.uxtl2VV(srcESize, this.v(2), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtl2VV(srcESize.expand(), this.v(1), this.v(0));
                asm.neon.uxtl2VV(srcESize.expand(), this.v(3), this.v(2));
                asm.neon.uxtlVV(srcESize.expand(), this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize.expand(), this.v(2), this.v(2));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createImmediateAddress(128, AArch64Address.AddressingMode.IMMEDIATE_PAIR_POST_INDEXED, arrayDst, 32));
                break;
            }
            default: {
                throw GraalError.unimplemented("conversion from " + String.valueOf((Object)strideSrc) + " to " + String.valueOf((Object)strideDst) + " not implemented");
            }
        }
    }

    private void tail32(AArch64MacroAssembler asm, Stride strideDst, Stride strideSrc, Register arrayDst, Register arraySrc, Register len, Register tmp, Label entry, Label nextTail, Label end) {
        Stride strideMax = Stride.max(strideSrc, strideDst);
        asm.bind(entry);
        asm.adds(64, len, len, 32 >> strideMax.log2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, nextTail);
        AArch64ASIMDAssembler.ElementSize dstESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideDst);
        AArch64ASIMDAssembler.ElementSize srcESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideSrc);
        switch (strideDst.log2 - strideSrc.log2) {
            case -2: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.add(64, arraySrc, arraySrc, len, AArch64Assembler.ShiftType.LSL, strideSrc.log2);
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, srcESize.narrow(), this.v(0), this.v(0), this.v(1));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, srcESize.narrow(), this.v(2), this.v(2), this.v(3));
                asm.neon.xtnVV(dstESize, this.v(0), this.v(0));
                asm.neon.xtnVV(dstESize, this.v(2), this.v(2));
                assert (strideDst.value == 1) : strideDst;
                asm.fstr(64, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(64, arrayDst));
                asm.fstr(64, this.v(2), AArch64Address.createRegisterOffsetAddress(64, arrayDst, len, false));
                break;
            }
            case -1: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.add(64, arraySrc, arraySrc, len, AArch64Assembler.ShiftType.LSL, strideSrc.log2);
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.lsl(64, tmp, len, strideDst.log2);
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, dstESize, this.v(0), this.v(0), this.v(1));
                asm.neon.uzp1VVV(AArch64ASIMDAssembler.ASIMDSize.FullReg, dstESize, this.v(2), this.v(2), this.v(3));
                asm.fstr(128, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(128, arrayDst));
                asm.fstr(128, this.v(2), AArch64Address.createRegisterOffsetAddress(128, arrayDst, tmp, false));
                break;
            }
            case 0: {
                asm.fldp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.add(64, arraySrc, arraySrc, len, AArch64Assembler.ShiftType.LSL, strideSrc.log2);
                asm.fldp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arraySrc));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                asm.add(64, arrayDst, arrayDst, len, AArch64Assembler.ShiftType.LSL, strideDst.log2);
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                break;
            }
            case 1: {
                asm.lsl(64, tmp, len, strideSrc.log2);
                asm.fldr(128, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(128, arraySrc));
                asm.fldr(128, this.v(2), AArch64Address.createRegisterOffsetAddress(128, arraySrc, tmp, false));
                asm.neon.uxtl2VV(srcESize, this.v(1), this.v(0));
                asm.neon.uxtl2VV(srcESize, this.v(3), this.v(2));
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(2), this.v(2));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                asm.add(64, arrayDst, arrayDst, len, AArch64Assembler.ShiftType.LSL, strideDst.log2);
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                break;
            }
            case 2: {
                assert (strideSrc.value == 1) : strideSrc;
                asm.fldr(64, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(64, arraySrc));
                asm.fldr(64, this.v(2), AArch64Address.createRegisterOffsetAddress(64, arraySrc, len, false));
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(2), this.v(2));
                asm.neon.uxtl2VV(srcESize.expand(), this.v(1), this.v(0));
                asm.neon.uxtl2VV(srcESize.expand(), this.v(3), this.v(2));
                asm.neon.uxtlVV(srcESize.expand(), this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize.expand(), this.v(2), this.v(2));
                asm.fstp(128, this.v(0), this.v(1), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                asm.add(64, arrayDst, arrayDst, len, AArch64Assembler.ShiftType.LSL, strideDst.log2);
                asm.fstp(128, this.v(2), this.v(3), AArch64Address.createPairBaseRegisterOnlyAddress(128, arrayDst));
                break;
            }
            default: {
                throw GraalError.unimplemented("conversion from " + String.valueOf((Object)strideSrc) + " to " + String.valueOf((Object)strideDst) + " not implemented");
            }
        }
        asm.jmp(end);
    }

    private void tailLessThan32(AArch64MacroAssembler asm, Stride strideDst, Stride strideSrc, Register arrayDst, Register arraySrc, Register len, Label entry, Label nextTail, Label end, int nBytes) {
        Stride strideMax = Stride.max(strideSrc, strideDst);
        if (strideMax.value > nBytes) {
            return;
        }
        asm.bind(entry);
        asm.adds(64, len, len, nBytes >> strideMax.log2);
        asm.branchConditionally(AArch64Assembler.ConditionFlag.MI, strideMax.value == nBytes ? end : nextTail);
        AArch64ASIMDAssembler.ElementSize dstESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideDst);
        AArch64ASIMDAssembler.ElementSize srcESize = AArch64ASIMDAssembler.ElementSize.fromStride(strideSrc);
        int op = strideDst.log2 - strideSrc.log2;
        int bits = nBytes << 3;
        int loadBits = bits >> Math.max(0, op);
        int storeBits = bits >> Math.max(0, -op);
        if (strideMax.value == nBytes) {
            asm.ldr(loadBits, len, AArch64Address.createBaseRegisterOnlyAddress(loadBits, arraySrc));
            asm.str(storeBits, len, AArch64Address.createBaseRegisterOnlyAddress(storeBits, arrayDst));
            asm.jmp(end);
            return;
        }
        asm.fldr(loadBits, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(loadBits, arraySrc));
        asm.add(64, arraySrc, arraySrc, len, AArch64Assembler.ShiftType.LSL, strideSrc.log2);
        asm.fldr(loadBits, this.v(1), AArch64Address.createBaseRegisterOnlyAddress(loadBits, arraySrc));
        switch (op) {
            case -2: {
                asm.neon.xtnVV(dstESize.expand(), this.v(0), this.v(0));
                asm.neon.xtnVV(dstESize.expand(), this.v(1), this.v(1));
                asm.neon.xtnVV(dstESize, this.v(0), this.v(0));
                asm.neon.xtnVV(dstESize, this.v(1), this.v(1));
                break;
            }
            case -1: {
                asm.neon.xtnVV(dstESize, this.v(0), this.v(0));
                asm.neon.xtnVV(dstESize, this.v(1), this.v(1));
                break;
            }
            case 0: {
                break;
            }
            case 1: {
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(1), this.v(1));
                break;
            }
            case 2: {
                asm.neon.uxtlVV(srcESize, this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize, this.v(1), this.v(1));
                asm.neon.uxtlVV(srcESize.expand(), this.v(0), this.v(0));
                asm.neon.uxtlVV(srcESize.expand(), this.v(1), this.v(1));
                break;
            }
            default: {
                throw GraalError.unimplemented("conversion from " + String.valueOf((Object)strideSrc) + " to " + String.valueOf((Object)strideDst) + " not implemented");
            }
        }
        asm.fstr(storeBits, this.v(0), AArch64Address.createBaseRegisterOnlyAddress(storeBits, arrayDst));
        asm.add(64, arrayDst, arrayDst, len, AArch64Assembler.ShiftType.LSL, strideDst.log2);
        asm.fstr(storeBits, this.v(1), AArch64Address.createBaseRegisterOnlyAddress(storeBits, arrayDst));
        asm.jmp(end);
    }
}

