1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 // To avoid warning about deprecated instructions, add an explicit 61 // arch. The code generated is exactly the same. 62 .arch armv7-a 63 64 .thumb 65 .thumb_func 66 67 .macro m_push 68 push {r0, r4, r5, lr} 69 .endm // m_push 70 71 .macro m_ret inst 72 \inst {r0, r4, r5, pc} 73 .endm // m_ret 74 75 .macro m_scan_byte 76 ldrb r3, [r0] 77 cbz r3, .Lstrcat_r0_scan_done 78 add r0, #1 79 .endm // m_scan_byte 80 81 .macro m_copy_byte reg, cmd, label 82 ldrb \reg, [r1], #1 83 strb \reg, [r0], #1 84 \cmd \reg, \label 85 .endm // m_copy_byte 86 87ENTRY(strcat_a9) 88 // Quick check to see if src is empty. 89 ldrb r2, [r1] 90 pld [r1, #0] 91 cbnz r2, .Lstrcat_continue 92 bx lr 93 94.Lstrcat_continue: 95 // To speed up really small dst strings, unroll checking the first 4 bytes. 96 m_push 97 m_scan_byte 98 m_scan_byte 99 m_scan_byte 100 m_scan_byte 101 102 ands r3, r0, #7 103 bne .Lstrcat_align_src 104 105 .p2align 2 106.Lstrcat_mainloop: 107 ldmia r0!, {r2, r3} 108 109 pld [r0, #64] 110 111 sub ip, r2, #0x01010101 112 bic ip, ip, r2 113 ands ip, ip, #0x80808080 114 bne .Lstrcat_zero_in_first_register 115 116 sub ip, r3, #0x01010101 117 bic ip, ip, r3 118 ands ip, ip, #0x80808080 119 bne .Lstrcat_zero_in_second_register 120 b .Lstrcat_mainloop 121 122.Lstrcat_zero_in_first_register: 123 sub r0, r0, #4 124 125.Lstrcat_zero_in_second_register: 126 // Check for zero in byte 0. 127 tst ip, #0x80 128 it ne 129 subne r0, r0, #4 130 bne .Lstrcat_r0_scan_done 131 // Check for zero in byte 1. 132 tst ip, #0x8000 133 it ne 134 subne r0, r0, #3 135 bne .Lstrcat_r0_scan_done 136 // Check for zero in byte 2. 137 tst ip, #0x800000 138 it ne 139 subne r0, r0, #2 140 it eq 141 // Zero is in byte 3. 142 subeq r0, r0, #1 143 144.Lstrcat_r0_scan_done: 145 // Unroll the first 8 bytes that will be copied. 146 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 147 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 148 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 149 m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish 150 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 151 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 152 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 153 m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue 154 155.Lstrcpy_finish: 156 m_ret inst=pop 157 158.Lstrcpy_continue: 159 pld [r1, #0] 160 ands r3, r0, #7 161 bne .Lstrcpy_align_dst 162 163.Lstrcpy_check_src_align: 164 // At this point dst is aligned to a double word, check if src 165 // is also aligned to a double word. 166 ands r3, r1, #7 167 bne .Lstrcpy_unaligned_copy 168 169 .p2align 2 170.Lstrcpy_mainloop: 171 ldmia r1!, {r2, r3} 172 173 pld [r1, #64] 174 175 sub ip, r2, #0x01010101 176 bic ip, ip, r2 177 ands ip, ip, #0x80808080 178 bne .Lstrcpy_zero_in_first_register 179 180 sub ip, r3, #0x01010101 181 bic ip, ip, r3 182 ands ip, ip, #0x80808080 183 bne .Lstrcpy_zero_in_second_register 184 185 stmia r0!, {r2, r3} 186 b .Lstrcpy_mainloop 187 188.Lstrcpy_zero_in_first_register: 189 lsls lr, ip, #17 190 itt ne 191 strbne r2, [r0] 192 m_ret inst=popne 193 itt cs 194 strhcs r2, [r0] 195 m_ret inst=popcs 196 lsls ip, ip, #1 197 itt eq 198 streq r2, [r0] 199 m_ret inst=popeq 200 strh r2, [r0], #2 201 lsr r3, r2, #16 202 strb r3, [r0] 203 m_ret inst=pop 204 205.Lstrcpy_zero_in_second_register: 206 lsls lr, ip, #17 207 ittt ne 208 stmiane r0!, {r2} 209 strbne r3, [r0] 210 m_ret inst=popne 211 ittt cs 212 strcs r2, [r0], #4 213 strhcs r3, [r0] 214 m_ret inst=popcs 215 lsls ip, ip, #1 216 itt eq 217 stmiaeq r0, {r2, r3} 218 m_ret inst=popeq 219 stmia r0!, {r2} 220 strh r3, [r0], #2 221 lsr r4, r3, #16 222 strb r4, [r0] 223 m_ret inst=pop 224 225.Lstrcpy_align_dst: 226 // Align to a double word (64 bits). 227 rsb r3, r3, #8 228 lsls ip, r3, #31 229 beq .Lstrcpy_align_to_32 230 231 ldrb r2, [r1], #1 232 strb r2, [r0], #1 233 cbz r2, .Lstrcpy_complete 234 235.Lstrcpy_align_to_32: 236 bcc .Lstrcpy_align_to_64 237 238 ldrb r4, [r1], #1 239 strb r4, [r0], #1 240 cmp r4, #0 241 it eq 242 m_ret inst=popeq 243 ldrb r5, [r1], #1 244 strb r5, [r0], #1 245 cmp r5, #0 246 it eq 247 m_ret inst=popeq 248 249.Lstrcpy_align_to_64: 250 tst r3, #4 251 beq .Lstrcpy_check_src_align 252 // Read one byte at a time since we don't know the src alignment 253 // and we don't want to read into a different page. 254 ldrb r4, [r1], #1 255 strb r4, [r0], #1 256 cbz r4, .Lstrcpy_complete 257 ldrb r5, [r1], #1 258 strb r5, [r0], #1 259 cbz r5, .Lstrcpy_complete 260 ldrb r4, [r1], #1 261 strb r4, [r0], #1 262 cbz r4, .Lstrcpy_complete 263 ldrb r5, [r1], #1 264 strb r5, [r0], #1 265 cbz r5, .Lstrcpy_complete 266 b .Lstrcpy_check_src_align 267 268.Lstrcpy_complete: 269 m_ret inst=pop 270 271.Lstrcpy_unaligned_copy: 272 // Dst is aligned to a double word, while src is at an unknown alignment. 273 // There are 7 different versions of the unaligned copy code 274 // to prevent overreading the src. The mainloop of every single version 275 // will store 64 bits per loop. The difference is how much of src can 276 // be read without potentially crossing a page boundary. 277 tbb [pc, r3] 278.Lstrcpy_unaligned_branchtable: 279 .byte 0 280 .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2) 281 .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2) 282 .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2) 283 .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2) 284 .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2) 285 .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2) 286 .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2) 287 288 .p2align 2 289 // Can read 7 bytes before possibly crossing a page. 290.Lstrcpy_unalign7: 291 ldr r2, [r1], #4 292 293 sub ip, r2, #0x01010101 294 bic ip, ip, r2 295 ands ip, ip, #0x80808080 296 bne .Lstrcpy_zero_in_first_register 297 298 ldrb r3, [r1] 299 cbz r3, .Lstrcpy_unalign7_copy5bytes 300 ldrb r4, [r1, #1] 301 cbz r4, .Lstrcpy_unalign7_copy6bytes 302 ldrb r5, [r1, #2] 303 cbz r5, .Lstrcpy_unalign7_copy7bytes 304 305 ldr r3, [r1], #4 306 pld [r1, #64] 307 308 lsrs ip, r3, #24 309 stmia r0!, {r2, r3} 310 beq .Lstrcpy_unalign_return 311 b .Lstrcpy_unalign7 312 313.Lstrcpy_unalign7_copy5bytes: 314 stmia r0!, {r2} 315 strb r3, [r0] 316.Lstrcpy_unalign_return: 317 m_ret inst=pop 318 319.Lstrcpy_unalign7_copy6bytes: 320 stmia r0!, {r2} 321 strb r3, [r0], #1 322 strb r4, [r0], #1 323 m_ret inst=pop 324 325.Lstrcpy_unalign7_copy7bytes: 326 stmia r0!, {r2} 327 strb r3, [r0], #1 328 strb r4, [r0], #1 329 strb r5, [r0], #1 330 m_ret inst=pop 331 332 .p2align 2 333 // Can read 6 bytes before possibly crossing a page. 334.Lstrcpy_unalign6: 335 ldr r2, [r1], #4 336 337 sub ip, r2, #0x01010101 338 bic ip, ip, r2 339 ands ip, ip, #0x80808080 340 bne .Lstrcpy_zero_in_first_register 341 342 ldrb r4, [r1] 343 cbz r4, .Lstrcpy_unalign_copy5bytes 344 ldrb r5, [r1, #1] 345 cbz r5, .Lstrcpy_unalign_copy6bytes 346 347 ldr r3, [r1], #4 348 pld [r1, #64] 349 350 tst r3, #0xff0000 351 beq .Lstrcpy_unalign6_copy7bytes 352 lsrs ip, r3, #24 353 stmia r0!, {r2, r3} 354 beq .Lstrcpy_unalign_return 355 b .Lstrcpy_unalign6 356 357.Lstrcpy_unalign6_copy7bytes: 358 stmia r0!, {r2} 359 strh r3, [r0], #2 360 lsr r3, #16 361 strb r3, [r0] 362 m_ret inst=pop 363 364 .p2align 2 365 // Can read 5 bytes before possibly crossing a page. 366.Lstrcpy_unalign5: 367 ldr r2, [r1], #4 368 369 sub ip, r2, #0x01010101 370 bic ip, ip, r2 371 ands ip, ip, #0x80808080 372 bne .Lstrcpy_zero_in_first_register 373 374 ldrb r4, [r1] 375 cbz r4, .Lstrcpy_unalign_copy5bytes 376 377 ldr r3, [r1], #4 378 379 pld [r1, #64] 380 381 sub ip, r3, #0x01010101 382 bic ip, ip, r3 383 ands ip, ip, #0x80808080 384 bne .Lstrcpy_zero_in_second_register 385 386 stmia r0!, {r2, r3} 387 b .Lstrcpy_unalign5 388 389.Lstrcpy_unalign_copy5bytes: 390 stmia r0!, {r2} 391 strb r4, [r0] 392 m_ret inst=pop 393 394.Lstrcpy_unalign_copy6bytes: 395 stmia r0!, {r2} 396 strb r4, [r0], #1 397 strb r5, [r0] 398 m_ret inst=pop 399 400 .p2align 2 401 // Can read 4 bytes before possibly crossing a page. 402.Lstrcpy_unalign4: 403 ldmia r1!, {r2} 404 405 sub ip, r2, #0x01010101 406 bic ip, ip, r2 407 ands ip, ip, #0x80808080 408 bne .Lstrcpy_zero_in_first_register 409 410 ldmia r1!, {r3} 411 pld [r1, #64] 412 413 sub ip, r3, #0x01010101 414 bic ip, ip, r3 415 ands ip, ip, #0x80808080 416 bne .Lstrcpy_zero_in_second_register 417 418 stmia r0!, {r2, r3} 419 b .Lstrcpy_unalign4 420 421 .p2align 2 422 // Can read 3 bytes before possibly crossing a page. 423.Lstrcpy_unalign3: 424 ldrb r2, [r1] 425 cbz r2, .Lstrcpy_unalign3_copy1byte 426 ldrb r3, [r1, #1] 427 cbz r3, .Lstrcpy_unalign3_copy2bytes 428 ldrb r4, [r1, #2] 429 cbz r4, .Lstrcpy_unalign3_copy3bytes 430 431 ldr r2, [r1], #4 432 ldr r3, [r1], #4 433 434 pld [r1, #64] 435 436 lsrs lr, r2, #24 437 beq .Lstrcpy_unalign_copy4bytes 438 439 sub ip, r3, #0x01010101 440 bic ip, ip, r3 441 ands ip, ip, #0x80808080 442 bne .Lstrcpy_zero_in_second_register 443 444 stmia r0!, {r2, r3} 445 b .Lstrcpy_unalign3 446 447.Lstrcpy_unalign3_copy1byte: 448 strb r2, [r0] 449 m_ret inst=pop 450 451.Lstrcpy_unalign3_copy2bytes: 452 strb r2, [r0], #1 453 strb r3, [r0] 454 m_ret inst=pop 455 456.Lstrcpy_unalign3_copy3bytes: 457 strb r2, [r0], #1 458 strb r3, [r0], #1 459 strb r4, [r0] 460 m_ret inst=pop 461 462 .p2align 2 463 // Can read 2 bytes before possibly crossing a page. 464.Lstrcpy_unalign2: 465 ldrb r2, [r1] 466 cbz r2, .Lstrcpy_unalign_copy1byte 467 ldrb r3, [r1, #1] 468 cbz r3, .Lstrcpy_unalign_copy2bytes 469 470 ldr r2, [r1], #4 471 ldr r3, [r1], #4 472 pld [r1, #64] 473 474 tst r2, #0xff0000 475 beq .Lstrcpy_unalign_copy3bytes 476 lsrs ip, r2, #24 477 beq .Lstrcpy_unalign_copy4bytes 478 479 sub ip, r3, #0x01010101 480 bic ip, ip, r3 481 ands ip, ip, #0x80808080 482 bne .Lstrcpy_zero_in_second_register 483 484 stmia r0!, {r2, r3} 485 b .Lstrcpy_unalign2 486 487 .p2align 2 488 // Can read 1 byte before possibly crossing a page. 489.Lstrcpy_unalign1: 490 ldrb r2, [r1] 491 cbz r2, .Lstrcpy_unalign_copy1byte 492 493 ldr r2, [r1], #4 494 ldr r3, [r1], #4 495 496 pld [r1, #64] 497 498 sub ip, r2, #0x01010101 499 bic ip, ip, r2 500 ands ip, ip, #0x80808080 501 bne .Lstrcpy_zero_in_first_register 502 503 sub ip, r3, #0x01010101 504 bic ip, ip, r3 505 ands ip, ip, #0x80808080 506 bne .Lstrcpy_zero_in_second_register 507 508 stmia r0!, {r2, r3} 509 b .Lstrcpy_unalign1 510 511.Lstrcpy_unalign_copy1byte: 512 strb r2, [r0] 513 m_ret inst=pop 514 515.Lstrcpy_unalign_copy2bytes: 516 strb r2, [r0], #1 517 strb r3, [r0] 518 m_ret inst=pop 519 520.Lstrcpy_unalign_copy3bytes: 521 strh r2, [r0], #2 522 lsr r2, #16 523 strb r2, [r0] 524 m_ret inst=pop 525 526.Lstrcpy_unalign_copy4bytes: 527 stmia r0, {r2} 528 m_ret inst=pop 529 530.Lstrcat_align_src: 531 // Align to a double word (64 bits). 532 rsb r3, r3, #8 533 lsls ip, r3, #31 534 beq .Lstrcat_align_to_32 535 ldrb r2, [r0], #1 536 cbz r2, .Lstrcat_r0_update 537 538.Lstrcat_align_to_32: 539 bcc .Lstrcat_align_to_64 540 ldrb r2, [r0], #1 541 cbz r2, .Lstrcat_r0_update 542 ldrb r2, [r0], #1 543 cbz r2, .Lstrcat_r0_update 544 545.Lstrcat_align_to_64: 546 tst r3, #4 547 beq .Lstrcat_mainloop 548 ldr r3, [r0], #4 549 550 sub ip, r3, #0x01010101 551 bic ip, ip, r3 552 ands ip, ip, #0x80808080 553 bne .Lstrcat_zero_in_second_register 554 b .Lstrcat_mainloop 555 556.Lstrcat_r0_update: 557 sub r0, r0, #1 558 b .Lstrcat_r0_scan_done 559END(strcat_a9) 560