2 This file is part of PulseAudio.
4 Copyright 2004-2006 Lennart Poettering
5 Copyright 2009 Wim Taymans <wim.taymans@collabora.co.uk>
7 PulseAudio is free software; you can redistribute it and/or modify
8 it under the terms of the GNU Lesser General Public License as published
9 by the Free Software Foundation; either version 2.1 of the License,
10 or (at your option) any later version.
12 PulseAudio is distributed in the hope that it will be useful, but
13 WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 General Public License for more details.
17 You should have received a copy of the GNU Lesser General Public License
18 along with PulseAudio; if not, write to the Free Software
19 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
27 #include <pulsecore/random.h>
28 #include <pulsecore/macro.h>
29 #include <pulsecore/endianmacros.h>
33 #include "sample-util.h"
35 #if defined (__arm__) && defined (HAVE_ARMV6)
38 " subs r0, r6, %2 \n\t" \
40 " addcs r0, %1 \n\t" \
43 static void pa_volume_s16ne_arm(int16_t *samples
, int32_t *volumes
, unsigned channels
, unsigned length
) {
46 /* Channels must be at least 4, and always a multiple of the original number.
47 * This is also the max amount we overread the volume array, which should
48 * have enough padding. */
49 channels
= channels
== 3 ? 6 : PA_MAX (4U, channels
);
50 ve
= volumes
+ channels
;
52 __asm__
__volatile__ (
54 " mov %3, %3, LSR #1 \n\t" /* length /= sizeof (int16_t) */
55 " tst %3, #1 \n\t" /* check for odd samples */
59 " ldr r0, [r6], #4 \n\t" /* odd samples volumes */
62 " smulwb r0, r0, r2 \n\t"
63 " ssat r0, #16, r0 \n\t"
65 " strh r0, [%0], #2 \n\t"
70 " mov %3, %3, LSR #1 \n\t"
71 " tst %3, #1 \n\t" /* check for odd samples */
75 " ldrd r2, [r6], #8 \n\t" /* 2 samples at a time */
78 " smulwt r2, r2, r0 \n\t"
79 " smulwb r3, r3, r0 \n\t"
81 " ssat r2, #16, r2 \n\t"
82 " ssat r3, #16, r3 \n\t"
84 " pkhbt r0, r3, r2, LSL #16 \n\t"
85 " str r0, [%0], #4 \n\t"
90 " movs %3, %3, LSR #1 \n\t"
94 " ldrd r2, [r6], #8 \n\t" /* 4 samples at a time */
95 " ldrd r4, [r6], #8 \n\t"
98 " smulwt r2, r2, r0 \n\t"
99 " smulwb r3, r3, r0 \n\t"
100 " smulwt r4, r4, r1 \n\t"
101 " smulwb r5, r5, r1 \n\t"
103 " ssat r2, #16, r2 \n\t"
104 " ssat r3, #16, r3 \n\t"
105 " ssat r4, #16, r4 \n\t"
106 " ssat r5, #16, r5 \n\t"
108 " pkhbt r0, r3, r2, LSL #16 \n\t"
109 " pkhbt r1, r5, r4, LSL #16 \n\t"
110 " strd r0, [%0], #8 \n\t"
114 " subs %3, %3, #1 \n\t"
118 : "+r" (samples
), "+r" (volumes
), "+r" (ve
), "+r" (length
)
120 : "r6", "r5", "r4", "r3", "r2", "r1", "r0", "cc"
133 static void run_test(void) {
134 int16_t samples
[SAMPLES
];
135 int16_t samples_ref
[SAMPLES
];
136 int16_t samples_orig
[SAMPLES
];
137 int32_t volumes
[CHANNELS
+ PADDING
];
139 pa_do_volume_func_t func
;
140 pa_usec_t start
, stop
;
142 pa_usec_t min
= INT_MAX
, max
= 0;
143 double s1
= 0, s2
= 0;
145 func
= pa_get_volume_func(PA_SAMPLE_S16NE
);
147 printf("checking ARM %zd\n", sizeof(samples
));
149 pa_random(samples
, sizeof(samples
));
150 memcpy(samples_ref
, samples
, sizeof(samples
));
151 memcpy(samples_orig
, samples
, sizeof(samples
));
153 for (i
= 0; i
< CHANNELS
; i
++)
154 volumes
[i
] = PA_CLAMP_VOLUME(rand() >> 1);
155 for (padding
= 0; padding
< PADDING
; padding
++, i
++)
156 volumes
[i
] = volumes
[padding
];
158 func(samples_ref
, volumes
, CHANNELS
, sizeof(samples
));
159 pa_volume_s16ne_arm(samples
, volumes
, CHANNELS
, sizeof(samples
));
160 for (i
= 0; i
< SAMPLES
; i
++) {
161 if (samples
[i
] != samples_ref
[i
]) {
162 printf ("%d: %04x != %04x (%04x * %04x)\n", i
, samples
[i
], samples_ref
[i
],
163 samples_orig
[i
], volumes
[i
% CHANNELS
]);
167 for (k
= 0; k
< TIMES2
; k
++) {
168 start
= pa_rtclock_now();
169 for (j
= 0; j
< TIMES
; j
++) {
170 memcpy(samples
, samples_orig
, sizeof(samples
));
171 pa_volume_s16ne_arm(samples
, volumes
, CHANNELS
, sizeof(samples
));
173 stop
= pa_rtclock_now();
175 if (min
> (stop
- start
)) min
= stop
- start
;
176 if (max
< (stop
- start
)) max
= stop
- start
;
178 s2
+= (stop
- start
) * (stop
- start
);
180 pa_log_info("ARM: %llu usec (min = %llu, max = %llu, stddev = %g).", (long long unsigned int)s1
,
181 (long long unsigned int)min
, (long long unsigned int)max
, sqrt(TIMES2
* s2
- s1
* s1
) / TIMES2
);
183 min
= INT_MAX
; max
= 0;
185 for (k
= 0; k
< TIMES2
; k
++) {
186 start
= pa_rtclock_now();
187 for (j
= 0; j
< TIMES
; j
++) {
188 memcpy(samples_ref
, samples_orig
, sizeof(samples
));
189 func(samples_ref
, volumes
, CHANNELS
, sizeof(samples
));
191 stop
= pa_rtclock_now();
193 if (min
> (stop
- start
)) min
= stop
- start
;
194 if (max
< (stop
- start
)) max
= stop
- start
;
196 s2
+= (stop
- start
) * (stop
- start
);
198 pa_log_info("ref: %llu usec (min = %llu, max = %llu, stddev = %g).", (long long unsigned int)s1
,
199 (long long unsigned int)min
, (long long unsigned int)max
, sqrt(TIMES2
* s2
- s1
* s1
) / TIMES2
);
201 pa_assert_se(memcmp(samples_ref
, samples
, sizeof(samples
)) == 0);
205 #endif /* defined (__arm__) && defined (HAVE_ARMV6) */
208 void pa_volume_func_init_arm(pa_cpu_arm_flag_t flags
) {
209 #if defined (__arm__) && defined (HAVE_ARMV6)
210 pa_log_info("Initialising ARM optimized functions.");
216 pa_set_volume_func(PA_SAMPLE_S16NE
, (pa_do_volume_func_t
) pa_volume_s16ne_arm
);
217 #endif /* defined (__arm__) && defined (HAVE_ARMV6) */