blob: 482807780057a3c72cbc3817d1026f44f6f2b696 [file] [log] [blame]
@
@ Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
@
@ Use of this source code is governed by a BSD-style license
@ that can be found in the LICENSE file in the root of the source
@ tree. An additional intellectual property rights grant can be found
@ in the file PATENTS. All contributing project authors may
@ be found in the AUTHORS file in the root of the source tree.
@
@ This file contains the function WebRtcSpl_ComplexBitReverse(), optimized
@ for ARMv5 platforms.
@ Reference C code is in file complex_bit_reverse.c. Bit-exact.
.arch armv5
.global WebRtcSpl_ComplexBitReverse
.align 2
WebRtcSpl_ComplexBitReverse:
.fnstart
push {r4-r7}
cmp r1, #7
adr r3, index_7 @ Table pointer.
mov r4, #112 @ Number of interations.
beq PRE_LOOP_STAGES_7_OR_8
cmp r1, #8
adr r3, index_8 @ Table pointer.
mov r4, #240 @ Number of interations.
beq PRE_LOOP_STAGES_7_OR_8
mov r3, #1 @ Initialize m.
mov r1, r3, asl r1 @ n = 1 << stages;
subs r6, r1, #1 @ nn = n - 1;
ble END
mov r5, r0 @ &complex_data
mov r4, #0 @ ml
LOOP_GENERIC:
rsb r12, r4, r6 @ l > nn - mr
mov r2, r1 @ n
LOOP_SHIFT:
asr r2, #1 @ l >>= 1;
cmp r2, r12
bgt LOOP_SHIFT
sub r12, r2, #1
and r4, r12, r4
add r4, r2 @ mr = (mr & (l - 1)) + l;
cmp r4, r3 @ mr <= m ?
ble UPDATE_REGISTERS
mov r12, r4, asl #2
ldr r7, [r5, #4] @ complex_data[2 * m, 2 * m + 1].
@ Offset 4 due to m incrementing from 1.
ldr r2, [r0, r12] @ complex_data[2 * mr, 2 * mr + 1].
str r7, [r0, r12]
str r2, [r5, #4]
UPDATE_REGISTERS:
add r3, r3, #1
add r5, #4
cmp r3, r1
bne LOOP_GENERIC
b END
PRE_LOOP_STAGES_7_OR_8:
add r4, r3, r4, asl #1
LOOP_STAGES_7_OR_8:
ldrsh r2, [r3], #2 @ index[m]
ldrsh r5, [r3], #2 @ index[m + 1]
ldr r1, [r0, r2] @ complex_data[index[m], index[m] + 1]
ldr r12, [r0, r5] @ complex_data[index[m + 1], index[m + 1] + 1]
cmp r3, r4
str r1, [r0, r5]
str r12, [r0, r2]
bne LOOP_STAGES_7_OR_8
END:
pop {r4-r7}
bx lr
.fnend
@ The index tables. Note the values are doubles of the actual indexes for 16-bit
@ elements, different from the generic C code. It actually provides byte offsets
@ for the indexes.
.align 2
index_7: @ Indexes for stages == 7.
.hword 4, 256, 8, 128, 12, 384, 16, 64, 20, 320, 24, 192, 28, 448, 36, 288
.hword 40, 160, 44, 416, 48, 96, 52, 352, 56, 224, 60, 480, 68, 272, 72, 144
.hword 76, 400, 84, 336, 88, 208, 92, 464, 100, 304, 104, 176, 108, 432, 116
.hword 368, 120, 240, 124, 496, 132, 264, 140, 392, 148, 328, 152, 200, 156
.hword 456, 164, 296, 172, 424, 180, 360, 184, 232, 188, 488, 196, 280, 204
.hword 408, 212, 344, 220, 472, 228, 312, 236, 440, 244, 376, 252, 504, 268
.hword 388, 276, 324, 284, 452, 300, 420, 308, 356, 316, 484, 332, 404, 348
.hword 468, 364, 436, 380, 500, 412, 460, 444, 492
index_8: @ Indexes for stages == 8.
.hword 4, 512, 8, 256, 12, 768, 16, 128, 20, 640, 24, 384, 28, 896, 32, 64
.hword 36, 576, 40, 320, 44, 832, 48, 192, 52, 704, 56, 448, 60, 960, 68, 544
.hword 72, 288, 76, 800, 80, 160, 84, 672, 88, 416, 92, 928, 100, 608, 104
.hword 352, 108, 864, 112, 224, 116, 736, 120, 480, 124, 992, 132, 528, 136
.hword 272, 140, 784, 148, 656, 152, 400, 156, 912, 164, 592, 168, 336, 172
.hword 848, 176, 208, 180, 720, 184, 464, 188, 976, 196, 560, 200, 304, 204
.hword 816, 212, 688, 216, 432, 220, 944, 228, 624, 232, 368, 236, 880, 244
.hword 752, 248, 496, 252, 1008, 260, 520, 268, 776, 276, 648, 280, 392, 284
.hword 904, 292, 584, 296, 328, 300, 840, 308, 712, 312, 456, 316, 968, 324
.hword 552, 332, 808, 340, 680, 344, 424, 348, 936, 356, 616, 364, 872, 372
.hword 744, 376, 488, 380, 1000, 388, 536, 396, 792, 404, 664, 412, 920, 420
.hword 600, 428, 856, 436, 728, 440, 472, 444, 984, 452, 568, 460, 824, 468
.hword 696, 476, 952, 484, 632, 492, 888, 500, 760, 508, 1016, 524, 772, 532
.hword 644, 540, 900, 548, 580, 556, 836, 564, 708, 572, 964, 588, 804, 596
.hword 676, 604, 932, 620, 868, 628, 740, 636, 996, 652, 788, 668, 916, 684
.hword 852, 692, 724, 700, 980, 716, 820, 732, 948, 748, 884, 764, 1012, 796
.hword 908, 812, 844, 828, 972, 860, 940, 892, 1004, 956, 988