1 | /* |
---|
2 | * Copyright (c) 2012-2014 ARM Ltd |
---|
3 | * All rights reserved. |
---|
4 | * |
---|
5 | * Redistribution and use in source and binary forms, with or without |
---|
6 | * modification, are permitted provided that the following conditions |
---|
7 | * are met: |
---|
8 | * 1. Redistributions of source code must retain the above copyright |
---|
9 | * notice, this list of conditions and the following disclaimer. |
---|
10 | * 2. Redistributions in binary form must reproduce the above copyright |
---|
11 | * notice, this list of conditions and the following disclaimer in the |
---|
12 | * documentation and/or other materials provided with the distribution. |
---|
13 | * 3. The name of the company may not be used to endorse or promote |
---|
14 | * products derived from this software without specific prior written |
---|
15 | * permission. |
---|
16 | * |
---|
17 | * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED |
---|
18 | * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF |
---|
19 | * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. |
---|
20 | * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, |
---|
21 | * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED |
---|
22 | * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR |
---|
23 | * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
---|
24 | * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
---|
25 | * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS |
---|
26 | * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
---|
27 | */ |
---|
28 | |
---|
29 | /* Implementation of strcmp for ARMv7 when DSP instructions are |
---|
30 | available. Use ldrd to support wider loads, provided the data |
---|
31 | is sufficiently aligned. Use saturating arithmetic to optimize |
---|
32 | the compares. */ |
---|
33 | |
---|
34 | /* Build Options: |
---|
35 | STRCMP_NO_PRECHECK: Don't run a quick pre-check of the first |
---|
36 | byte in the string. If comparing completely random strings |
---|
37 | the pre-check will save time, since there is a very high |
---|
38 | probability of a mismatch in the first character: we save |
---|
39 | significant overhead if this is the common case. However, |
---|
40 | if strings are likely to be identical (eg because we're |
---|
41 | verifying a hit in a hash table), then this check is largely |
---|
42 | redundant. */ |
---|
43 | |
---|
44 | /* This version uses Thumb-2 code. */ |
---|
45 | .thumb |
---|
46 | .syntax unified |
---|
47 | |
---|
48 | /* Parameters and result. */ |
---|
49 | #define src1 r0 |
---|
50 | #define src2 r1 |
---|
51 | #define result r0 /* Overlaps src1. */ |
---|
52 | |
---|
53 | /* Internal variables. */ |
---|
54 | #define tmp1 r4 |
---|
55 | #define tmp2 r5 |
---|
56 | #define const_m1 r12 |
---|
57 | |
---|
58 | /* Additional internal variables for 64-bit aligned data. */ |
---|
59 | #define data1a r2 |
---|
60 | #define data1b r3 |
---|
61 | #define data2a r6 |
---|
62 | #define data2b r7 |
---|
63 | #define syndrome_a tmp1 |
---|
64 | #define syndrome_b tmp2 |
---|
65 | |
---|
66 | /* Additional internal variables for 32-bit aligned data. */ |
---|
67 | #define data1 r2 |
---|
68 | #define data2 r3 |
---|
69 | #define syndrome tmp2 |
---|
70 | |
---|
71 | |
---|
72 | /* Macro to compute and return the result value for word-aligned |
---|
73 | cases. */ |
---|
74 | .macro strcmp_epilogue_aligned synd d1 d2 restore_r6 |
---|
75 | #ifdef __ARM_BIG_ENDIAN |
---|
76 | /* If data1 contains a zero byte, then syndrome will contain a 1 in |
---|
77 | bit 7 of that byte. Otherwise, the highest set bit in the |
---|
78 | syndrome will highlight the first different bit. It is therefore |
---|
79 | sufficient to extract the eight bits starting with the syndrome |
---|
80 | bit. */ |
---|
81 | clz tmp1, \synd |
---|
82 | lsl r1, \d2, tmp1 |
---|
83 | .if \restore_r6 |
---|
84 | ldrd r6, r7, [sp, #8] |
---|
85 | .endif |
---|
86 | .cfi_restore 6 |
---|
87 | .cfi_restore 7 |
---|
88 | lsl \d1, \d1, tmp1 |
---|
89 | .cfi_remember_state |
---|
90 | lsr result, \d1, #24 |
---|
91 | ldrd r4, r5, [sp], #16 |
---|
92 | .cfi_restore 4 |
---|
93 | .cfi_restore 5 |
---|
94 | sub result, result, r1, lsr #24 |
---|
95 | bx lr |
---|
96 | #else |
---|
97 | /* To use the big-endian trick we'd have to reverse all three words. |
---|
98 | that's slower than this approach. */ |
---|
99 | rev \synd, \synd |
---|
100 | clz tmp1, \synd |
---|
101 | bic tmp1, tmp1, #7 |
---|
102 | lsr r1, \d2, tmp1 |
---|
103 | .cfi_remember_state |
---|
104 | .if \restore_r6 |
---|
105 | ldrd r6, r7, [sp, #8] |
---|
106 | .endif |
---|
107 | .cfi_restore 6 |
---|
108 | .cfi_restore 7 |
---|
109 | lsr \d1, \d1, tmp1 |
---|
110 | and result, \d1, #255 |
---|
111 | and r1, r1, #255 |
---|
112 | ldrd r4, r5, [sp], #16 |
---|
113 | .cfi_restore 4 |
---|
114 | .cfi_restore 5 |
---|
115 | sub result, result, r1 |
---|
116 | |
---|
117 | bx lr |
---|
118 | #endif |
---|
119 | .endm |
---|
120 | |
---|
121 | .text |
---|
122 | .p2align 5 |
---|
123 | .Lstrcmp_start_addr: |
---|
124 | #ifndef STRCMP_NO_PRECHECK |
---|
125 | .Lfastpath_exit: |
---|
126 | sub r0, r2, r3 |
---|
127 | bx lr |
---|
128 | nop |
---|
129 | #endif |
---|
130 | def_fn strcmp |
---|
131 | #ifndef STRCMP_NO_PRECHECK |
---|
132 | ldrb r2, [src1] |
---|
133 | ldrb r3, [src2] |
---|
134 | cmp r2, #1 |
---|
135 | it cs |
---|
136 | cmpcs r2, r3 |
---|
137 | bne .Lfastpath_exit |
---|
138 | #endif |
---|
139 | .cfi_sections .debug_frame |
---|
140 | .cfi_startproc |
---|
141 | strd r4, r5, [sp, #-16]! |
---|
142 | .cfi_def_cfa_offset 16 |
---|
143 | .cfi_offset 4, -16 |
---|
144 | .cfi_offset 5, -12 |
---|
145 | orr tmp1, src1, src2 |
---|
146 | strd r6, r7, [sp, #8] |
---|
147 | .cfi_offset 6, -8 |
---|
148 | .cfi_offset 7, -4 |
---|
149 | mvn const_m1, #0 |
---|
150 | lsl r2, tmp1, #29 |
---|
151 | cbz r2, .Lloop_aligned8 |
---|
152 | |
---|
153 | .Lnot_aligned: |
---|
154 | eor tmp1, src1, src2 |
---|
155 | tst tmp1, #7 |
---|
156 | bne .Lmisaligned8 |
---|
157 | |
---|
158 | /* Deal with mutual misalignment by aligning downwards and then |
---|
159 | masking off the unwanted loaded data to prevent a difference. */ |
---|
160 | and tmp1, src1, #7 |
---|
161 | bic src1, src1, #7 |
---|
162 | and tmp2, tmp1, #3 |
---|
163 | bic src2, src2, #7 |
---|
164 | lsl tmp2, tmp2, #3 /* Bytes -> bits. */ |
---|
165 | ldrd data1a, data1b, [src1], #16 |
---|
166 | tst tmp1, #4 |
---|
167 | ldrd data2a, data2b, [src2], #16 |
---|
168 | /* In thumb code we can't use MVN with a register shift, but |
---|
169 | we do have ORN. */ |
---|
170 | S2HI tmp1, const_m1, tmp2 |
---|
171 | orn data1a, data1a, tmp1 |
---|
172 | orn data2a, data2a, tmp1 |
---|
173 | beq .Lstart_realigned8 |
---|
174 | orn data1b, data1b, tmp1 |
---|
175 | mov data1a, const_m1 |
---|
176 | orn data2b, data2b, tmp1 |
---|
177 | mov data2a, const_m1 |
---|
178 | b .Lstart_realigned8 |
---|
179 | |
---|
180 | /* Unwind the inner loop by a factor of 2, giving 16 bytes per |
---|
181 | pass. */ |
---|
182 | .p2align 5,,12 /* Don't start in the tail bytes of a cache line. */ |
---|
183 | .p2align 2 /* Always word aligned. */ |
---|
184 | .Lloop_aligned8: |
---|
185 | ldrd data1a, data1b, [src1], #16 |
---|
186 | ldrd data2a, data2b, [src2], #16 |
---|
187 | .Lstart_realigned8: |
---|
188 | uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ |
---|
189 | eor syndrome_a, data1a, data2a |
---|
190 | sel syndrome_a, syndrome_a, const_m1 |
---|
191 | cbnz syndrome_a, .Ldiff_in_a |
---|
192 | uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ |
---|
193 | eor syndrome_b, data1b, data2b |
---|
194 | sel syndrome_b, syndrome_b, const_m1 |
---|
195 | cbnz syndrome_b, .Ldiff_in_b |
---|
196 | |
---|
197 | ldrd data1a, data1b, [src1, #-8] |
---|
198 | ldrd data2a, data2b, [src2, #-8] |
---|
199 | uadd8 syndrome_b, data1a, const_m1 /* Only want GE bits, */ |
---|
200 | eor syndrome_a, data1a, data2a |
---|
201 | sel syndrome_a, syndrome_a, const_m1 |
---|
202 | uadd8 syndrome_b, data1b, const_m1 /* Only want GE bits. */ |
---|
203 | eor syndrome_b, data1b, data2b |
---|
204 | sel syndrome_b, syndrome_b, const_m1 |
---|
205 | /* Can't use CBZ for backwards branch. */ |
---|
206 | orrs syndrome_b, syndrome_b, syndrome_a /* Only need if s_a == 0 */ |
---|
207 | beq .Lloop_aligned8 |
---|
208 | |
---|
209 | .Ldiff_found: |
---|
210 | cbnz syndrome_a, .Ldiff_in_a |
---|
211 | |
---|
212 | .Ldiff_in_b: |
---|
213 | strcmp_epilogue_aligned syndrome_b, data1b, data2b 1 |
---|
214 | |
---|
215 | .Ldiff_in_a: |
---|
216 | .cfi_restore_state |
---|
217 | strcmp_epilogue_aligned syndrome_a, data1a, data2a 1 |
---|
218 | |
---|
219 | .cfi_restore_state |
---|
220 | .Lmisaligned8: |
---|
221 | tst tmp1, #3 |
---|
222 | bne .Lmisaligned4 |
---|
223 | ands tmp1, src1, #3 |
---|
224 | bne .Lmutual_align4 |
---|
225 | |
---|
226 | /* Unrolled by a factor of 2, to reduce the number of post-increment |
---|
227 | operations. */ |
---|
228 | .Lloop_aligned4: |
---|
229 | ldr data1, [src1], #8 |
---|
230 | ldr data2, [src2], #8 |
---|
231 | .Lstart_realigned4: |
---|
232 | uadd8 syndrome, data1, const_m1 /* Only need GE bits. */ |
---|
233 | eor syndrome, data1, data2 |
---|
234 | sel syndrome, syndrome, const_m1 |
---|
235 | cbnz syndrome, .Laligned4_done |
---|
236 | ldr data1, [src1, #-4] |
---|
237 | ldr data2, [src2, #-4] |
---|
238 | uadd8 syndrome, data1, const_m1 |
---|
239 | eor syndrome, data1, data2 |
---|
240 | sel syndrome, syndrome, const_m1 |
---|
241 | cmp syndrome, #0 |
---|
242 | beq .Lloop_aligned4 |
---|
243 | |
---|
244 | .Laligned4_done: |
---|
245 | strcmp_epilogue_aligned syndrome, data1, data2, 0 |
---|
246 | |
---|
247 | .Lmutual_align4: |
---|
248 | .cfi_restore_state |
---|
249 | /* Deal with mutual misalignment by aligning downwards and then |
---|
250 | masking off the unwanted loaded data to prevent a difference. */ |
---|
251 | lsl tmp1, tmp1, #3 /* Bytes -> bits. */ |
---|
252 | bic src1, src1, #3 |
---|
253 | ldr data1, [src1], #8 |
---|
254 | bic src2, src2, #3 |
---|
255 | ldr data2, [src2], #8 |
---|
256 | |
---|
257 | /* In thumb code we can't use MVN with a register shift, but |
---|
258 | we do have ORN. */ |
---|
259 | S2HI tmp1, const_m1, tmp1 |
---|
260 | orn data1, data1, tmp1 |
---|
261 | orn data2, data2, tmp1 |
---|
262 | b .Lstart_realigned4 |
---|
263 | |
---|
264 | .Lmisaligned4: |
---|
265 | ands tmp1, src1, #3 |
---|
266 | beq .Lsrc1_aligned |
---|
267 | sub src2, src2, tmp1 |
---|
268 | bic src1, src1, #3 |
---|
269 | lsls tmp1, tmp1, #31 |
---|
270 | ldr data1, [src1], #4 |
---|
271 | beq .Laligned_m2 |
---|
272 | bcs .Laligned_m1 |
---|
273 | |
---|
274 | #ifdef STRCMP_NO_PRECHECK |
---|
275 | ldrb data2, [src2, #1] |
---|
276 | uxtb tmp1, data1, ror #BYTE1_OFFSET |
---|
277 | subs tmp1, tmp1, data2 |
---|
278 | bne .Lmisaligned_exit |
---|
279 | cbz data2, .Lmisaligned_exit |
---|
280 | |
---|
281 | .Laligned_m2: |
---|
282 | ldrb data2, [src2, #2] |
---|
283 | uxtb tmp1, data1, ror #BYTE2_OFFSET |
---|
284 | subs tmp1, tmp1, data2 |
---|
285 | bne .Lmisaligned_exit |
---|
286 | cbz data2, .Lmisaligned_exit |
---|
287 | |
---|
288 | .Laligned_m1: |
---|
289 | ldrb data2, [src2, #3] |
---|
290 | uxtb tmp1, data1, ror #BYTE3_OFFSET |
---|
291 | subs tmp1, tmp1, data2 |
---|
292 | bne .Lmisaligned_exit |
---|
293 | add src2, src2, #4 |
---|
294 | cbnz data2, .Lsrc1_aligned |
---|
295 | #else /* STRCMP_NO_PRECHECK */ |
---|
296 | /* If we've done the pre-check, then we don't need to check the |
---|
297 | first byte again here. */ |
---|
298 | ldrb data2, [src2, #2] |
---|
299 | uxtb tmp1, data1, ror #BYTE2_OFFSET |
---|
300 | subs tmp1, tmp1, data2 |
---|
301 | bne .Lmisaligned_exit |
---|
302 | cbz data2, .Lmisaligned_exit |
---|
303 | |
---|
304 | .Laligned_m2: |
---|
305 | ldrb data2, [src2, #3] |
---|
306 | uxtb tmp1, data1, ror #BYTE3_OFFSET |
---|
307 | subs tmp1, tmp1, data2 |
---|
308 | bne .Lmisaligned_exit |
---|
309 | cbnz data2, .Laligned_m1 |
---|
310 | #endif |
---|
311 | |
---|
312 | .Lmisaligned_exit: |
---|
313 | .cfi_remember_state |
---|
314 | mov result, tmp1 |
---|
315 | ldr r4, [sp], #16 |
---|
316 | .cfi_restore 4 |
---|
317 | bx lr |
---|
318 | |
---|
319 | #ifndef STRCMP_NO_PRECHECK |
---|
320 | .Laligned_m1: |
---|
321 | add src2, src2, #4 |
---|
322 | #endif |
---|
323 | .Lsrc1_aligned: |
---|
324 | .cfi_restore_state |
---|
325 | /* src1 is word aligned, but src2 has no common alignment |
---|
326 | with it. */ |
---|
327 | ldr data1, [src1], #4 |
---|
328 | lsls tmp1, src2, #31 /* C=src2[1], Z=src2[0]. */ |
---|
329 | |
---|
330 | bic src2, src2, #3 |
---|
331 | ldr data2, [src2], #4 |
---|
332 | bhi .Loverlap1 /* C=1, Z=0 => src2[1:0] = 0b11. */ |
---|
333 | bcs .Loverlap2 /* C=1, Z=1 => src2[1:0] = 0b10. */ |
---|
334 | |
---|
335 | /* (overlap3) C=0, Z=0 => src2[1:0] = 0b01. */ |
---|
336 | .Loverlap3: |
---|
337 | bic tmp1, data1, #MSB |
---|
338 | uadd8 syndrome, data1, const_m1 |
---|
339 | eors syndrome, tmp1, data2, S2LO #8 |
---|
340 | sel syndrome, syndrome, const_m1 |
---|
341 | bne 4f |
---|
342 | cbnz syndrome, 5f |
---|
343 | ldr data2, [src2], #4 |
---|
344 | eor tmp1, tmp1, data1 |
---|
345 | cmp tmp1, data2, S2HI #24 |
---|
346 | bne 6f |
---|
347 | ldr data1, [src1], #4 |
---|
348 | b .Loverlap3 |
---|
349 | 4: |
---|
350 | S2LO data2, data2, #8 |
---|
351 | b .Lstrcmp_tail |
---|
352 | |
---|
353 | 5: |
---|
354 | bics syndrome, syndrome, #MSB |
---|
355 | bne .Lstrcmp_done_equal |
---|
356 | |
---|
357 | /* We can only get here if the MSB of data1 contains 0, so |
---|
358 | fast-path the exit. */ |
---|
359 | ldrb result, [src2] |
---|
360 | .cfi_remember_state |
---|
361 | ldrd r4, r5, [sp], #16 |
---|
362 | .cfi_restore 4 |
---|
363 | .cfi_restore 5 |
---|
364 | /* R6/7 Not used in this sequence. */ |
---|
365 | .cfi_restore 6 |
---|
366 | .cfi_restore 7 |
---|
367 | neg result, result |
---|
368 | bx lr |
---|
369 | |
---|
370 | 6: |
---|
371 | .cfi_restore_state |
---|
372 | S2LO data1, data1, #24 |
---|
373 | and data2, data2, #LSB |
---|
374 | b .Lstrcmp_tail |
---|
375 | |
---|
376 | .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ |
---|
377 | .Loverlap2: |
---|
378 | and tmp1, data1, const_m1, S2LO #16 |
---|
379 | uadd8 syndrome, data1, const_m1 |
---|
380 | eors syndrome, tmp1, data2, S2LO #16 |
---|
381 | sel syndrome, syndrome, const_m1 |
---|
382 | bne 4f |
---|
383 | cbnz syndrome, 5f |
---|
384 | ldr data2, [src2], #4 |
---|
385 | eor tmp1, tmp1, data1 |
---|
386 | cmp tmp1, data2, S2HI #16 |
---|
387 | bne 6f |
---|
388 | ldr data1, [src1], #4 |
---|
389 | b .Loverlap2 |
---|
390 | 4: |
---|
391 | S2LO data2, data2, #16 |
---|
392 | b .Lstrcmp_tail |
---|
393 | 5: |
---|
394 | ands syndrome, syndrome, const_m1, S2LO #16 |
---|
395 | bne .Lstrcmp_done_equal |
---|
396 | |
---|
397 | ldrh data2, [src2] |
---|
398 | S2LO data1, data1, #16 |
---|
399 | #ifdef __ARM_BIG_ENDIAN |
---|
400 | lsl data2, data2, #16 |
---|
401 | #endif |
---|
402 | b .Lstrcmp_tail |
---|
403 | |
---|
404 | 6: |
---|
405 | S2LO data1, data1, #16 |
---|
406 | and data2, data2, const_m1, S2LO #16 |
---|
407 | b .Lstrcmp_tail |
---|
408 | |
---|
409 | .p2align 5,,12 /* Ensure at least 3 instructions in cache line. */ |
---|
410 | .Loverlap1: |
---|
411 | and tmp1, data1, #LSB |
---|
412 | uadd8 syndrome, data1, const_m1 |
---|
413 | eors syndrome, tmp1, data2, S2LO #24 |
---|
414 | sel syndrome, syndrome, const_m1 |
---|
415 | bne 4f |
---|
416 | cbnz syndrome, 5f |
---|
417 | ldr data2, [src2], #4 |
---|
418 | eor tmp1, tmp1, data1 |
---|
419 | cmp tmp1, data2, S2HI #8 |
---|
420 | bne 6f |
---|
421 | ldr data1, [src1], #4 |
---|
422 | b .Loverlap1 |
---|
423 | 4: |
---|
424 | S2LO data2, data2, #24 |
---|
425 | b .Lstrcmp_tail |
---|
426 | 5: |
---|
427 | tst syndrome, #LSB |
---|
428 | bne .Lstrcmp_done_equal |
---|
429 | ldr data2, [src2] |
---|
430 | 6: |
---|
431 | S2LO data1, data1, #8 |
---|
432 | bic data2, data2, #MSB |
---|
433 | b .Lstrcmp_tail |
---|
434 | |
---|
435 | .Lstrcmp_done_equal: |
---|
436 | mov result, #0 |
---|
437 | .cfi_remember_state |
---|
438 | ldrd r4, r5, [sp], #16 |
---|
439 | .cfi_restore 4 |
---|
440 | .cfi_restore 5 |
---|
441 | /* R6/7 not used in this sequence. */ |
---|
442 | .cfi_restore 6 |
---|
443 | .cfi_restore 7 |
---|
444 | bx lr |
---|
445 | |
---|
446 | .Lstrcmp_tail: |
---|
447 | .cfi_restore_state |
---|
448 | #ifndef __ARM_BIG_ENDIAN |
---|
449 | rev data1, data1 |
---|
450 | rev data2, data2 |
---|
451 | /* Now everything looks big-endian... */ |
---|
452 | #endif |
---|
453 | uadd8 tmp1, data1, const_m1 |
---|
454 | eor tmp1, data1, data2 |
---|
455 | sel syndrome, tmp1, const_m1 |
---|
456 | clz tmp1, syndrome |
---|
457 | lsl data1, data1, tmp1 |
---|
458 | lsl data2, data2, tmp1 |
---|
459 | lsr result, data1, #24 |
---|
460 | ldrd r4, r5, [sp], #16 |
---|
461 | .cfi_restore 4 |
---|
462 | .cfi_restore 5 |
---|
463 | /* R6/7 not used in this sequence. */ |
---|
464 | .cfi_restore 6 |
---|
465 | .cfi_restore 7 |
---|
466 | sub result, result, data2, lsr #24 |
---|
467 | bx lr |
---|
468 | .cfi_endproc |
---|
469 | .size strcmp, . - .Lstrcmp_start_addr |
---|