1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .endm // m_push 66 67 .macro m_pop 68 pop {r0, r4, r5, pc} 69 .endm // m_pop 70 71 .macro m_scan_byte 72 ldrb r3, [r0] 73 cbz r3, .L_strcat_r0_scan_done 74 add r0, #1 75 .endm // m_scan_byte 76 77 .macro m_copy_byte reg, cmd, label 78 ldrb \reg, [r1], #1 79 strb \reg, [r0], #1 80 \cmd \reg, \label 81 .endm // m_copy_byte 82 83ENTRY(strcat_a15) 84 // Quick check to see if src is empty. 85 ldrb r2, [r1] 86 pld [r1, #0] 87 cbnz r2, .L_strcat_continue 88 bx lr 89 90.L_strcat_continue: 91 // To speed up really small dst strings, unroll checking the first 4 bytes. 92 m_push 93 m_scan_byte 94 m_scan_byte 95 m_scan_byte 96 m_scan_byte 97 98 ands r3, r0, #7 99 beq .L_strcat_mainloop 100 101 // Align to a double word (64 bits). 102 rsb r3, r3, #8 103 lsls ip, r3, #31 104 beq .L_strcat_align_to_32 105 106 ldrb r5, [r0] 107 cbz r5, .L_strcat_r0_scan_done 108 add r0, r0, #1 109 110.L_strcat_align_to_32: 111 bcc .L_strcat_align_to_64 112 113 ldrb r2, [r0] 114 cbz r2, .L_strcat_r0_scan_done 115 add r0, r0, #1 116 ldrb r4, [r0] 117 cbz r4, .L_strcat_r0_scan_done 118 add r0, r0, #1 119 120.L_strcat_align_to_64: 121 tst r3, #4 122 beq .L_strcat_mainloop 123 ldr r3, [r0], #4 124 125 sub ip, r3, #0x01010101 126 bic ip, ip, r3 127 ands ip, ip, #0x80808080 128 bne .L_strcat_zero_in_second_register 129 b .L_strcat_mainloop 130 131.L_strcat_r0_scan_done: 132 // For short copies, hard-code checking the first 8 bytes since this 133 // new code doesn't win until after about 8 bytes. 134 m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish 135 m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish 136 m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish 137 m_copy_byte reg=r5, cmd=cbz, label=.L_strcpy_finish 138 m_copy_byte reg=r2, cmd=cbz, label=.L_strcpy_finish 139 m_copy_byte reg=r3, cmd=cbz, label=.L_strcpy_finish 140 m_copy_byte reg=r4, cmd=cbz, label=.L_strcpy_finish 141 m_copy_byte reg=r5, cmd=cbnz, label=.L_strcpy_continue 142 143.L_strcpy_finish: 144 m_pop 145 146.L_strcpy_continue: 147 ands r3, r0, #7 148 beq .L_strcpy_check_src_align 149 150 // Align to a double word (64 bits). 151 rsb r3, r3, #8 152 lsls ip, r3, #31 153 beq .L_strcpy_align_to_32 154 155 ldrb r2, [r1], #1 156 strb r2, [r0], #1 157 cbz r2, .L_strcpy_complete 158 159.L_strcpy_align_to_32: 160 bcc .L_strcpy_align_to_64 161 162 ldrb r2, [r1], #1 163 strb r2, [r0], #1 164 cbz r2, .L_strcpy_complete 165 ldrb r2, [r1], #1 166 strb r2, [r0], #1 167 cbz r2, .L_strcpy_complete 168 169.L_strcpy_align_to_64: 170 tst r3, #4 171 beq .L_strcpy_check_src_align 172 // Read one byte at a time since we don't know the src alignment 173 // and we don't want to read into a different page. 174 ldrb r2, [r1], #1 175 strb r2, [r0], #1 176 cbz r2, .L_strcpy_complete 177 ldrb r2, [r1], #1 178 strb r2, [r0], #1 179 cbz r2, .L_strcpy_complete 180 ldrb r2, [r1], #1 181 strb r2, [r0], #1 182 cbz r2, .L_strcpy_complete 183 ldrb r2, [r1], #1 184 strb r2, [r0], #1 185 cbz r2, .L_strcpy_complete 186 187.L_strcpy_check_src_align: 188 // At this point dst is aligned to a double word, check if src 189 // is also aligned to a double word. 190 ands r3, r1, #7 191 bne .L_strcpy_unaligned_copy 192 193 .p2align 2 194.L_strcpy_mainloop: 195 ldrd r2, r3, [r1], #8 196 197 pld [r1, #64] 198 199 sub ip, r2, #0x01010101 200 bic ip, ip, r2 201 ands ip, ip, #0x80808080 202 bne .L_strcpy_zero_in_first_register 203 204 sub ip, r3, #0x01010101 205 bic ip, ip, r3 206 ands ip, ip, #0x80808080 207 bne .L_strcpy_zero_in_second_register 208 209 strd r2, r3, [r0], #8 210 b .L_strcpy_mainloop 211 212.L_strcpy_complete: 213 m_pop 214 215.L_strcpy_zero_in_first_register: 216 lsls lr, ip, #17 217 bne .L_strcpy_copy1byte 218 bcs .L_strcpy_copy2bytes 219 lsls ip, ip, #1 220 bne .L_strcpy_copy3bytes 221 222.L_strcpy_copy4bytes: 223 // Copy 4 bytes to the destiniation. 224 str r2, [r0] 225 m_pop 226 227.L_strcpy_copy1byte: 228 strb r2, [r0] 229 m_pop 230 231.L_strcpy_copy2bytes: 232 strh r2, [r0] 233 m_pop 234 235.L_strcpy_copy3bytes: 236 strh r2, [r0], #2 237 lsr r2, #16 238 strb r2, [r0] 239 m_pop 240 241.L_strcpy_zero_in_second_register: 242 lsls lr, ip, #17 243 bne .L_strcpy_copy5bytes 244 bcs .L_strcpy_copy6bytes 245 lsls ip, ip, #1 246 bne .L_strcpy_copy7bytes 247 248 // Copy 8 bytes to the destination. 249 strd r2, r3, [r0] 250 m_pop 251 252.L_strcpy_copy5bytes: 253 str r2, [r0], #4 254 strb r3, [r0] 255 m_pop 256 257.L_strcpy_copy6bytes: 258 str r2, [r0], #4 259 strh r3, [r0] 260 m_pop 261 262.L_strcpy_copy7bytes: 263 str r2, [r0], #4 264 strh r3, [r0], #2 265 lsr r3, #16 266 strb r3, [r0] 267 m_pop 268 269.L_strcpy_unaligned_copy: 270 // Dst is aligned to a double word, while src is at an unknown alignment. 271 // There are 7 different versions of the unaligned copy code 272 // to prevent overreading the src. The mainloop of every single version 273 // will store 64 bits per loop. The difference is how much of src can 274 // be read without potentially crossing a page boundary. 275 tbb [pc, r3] 276.L_strcpy_unaligned_branchtable: 277 .byte 0 278 .byte ((.L_strcpy_unalign7 - .L_strcpy_unaligned_branchtable)/2) 279 .byte ((.L_strcpy_unalign6 - .L_strcpy_unaligned_branchtable)/2) 280 .byte ((.L_strcpy_unalign5 - .L_strcpy_unaligned_branchtable)/2) 281 .byte ((.L_strcpy_unalign4 - .L_strcpy_unaligned_branchtable)/2) 282 .byte ((.L_strcpy_unalign3 - .L_strcpy_unaligned_branchtable)/2) 283 .byte ((.L_strcpy_unalign2 - .L_strcpy_unaligned_branchtable)/2) 284 .byte ((.L_strcpy_unalign1 - .L_strcpy_unaligned_branchtable)/2) 285 286 .p2align 2 287 // Can read 7 bytes before possibly crossing a page. 288.L_strcpy_unalign7: 289 ldr r2, [r1], #4 290 291 sub ip, r2, #0x01010101 292 bic ip, ip, r2 293 ands ip, ip, #0x80808080 294 bne .L_strcpy_zero_in_first_register 295 296 ldrb r3, [r1] 297 cbz r3, .L_strcpy_unalign7_copy5bytes 298 ldrb r4, [r1, #1] 299 cbz r4, .L_strcpy_unalign7_copy6bytes 300 ldrb r5, [r1, #2] 301 cbz r5, .L_strcpy_unalign7_copy7bytes 302 303 ldr r3, [r1], #4 304 pld [r1, #64] 305 306 lsrs ip, r3, #24 307 strd r2, r3, [r0], #8 308 beq .L_strcpy_unalign_return 309 b .L_strcpy_unalign7 310 311.L_strcpy_unalign7_copy5bytes: 312 str r2, [r0], #4 313 strb r3, [r0] 314.L_strcpy_unalign_return: 315 m_pop 316 317.L_strcpy_unalign7_copy6bytes: 318 str r2, [r0], #4 319 strb r3, [r0], #1 320 strb r4, [r0], #1 321 m_pop 322 323.L_strcpy_unalign7_copy7bytes: 324 str r2, [r0], #4 325 strb r3, [r0], #1 326 strb r4, [r0], #1 327 strb r5, [r0], #1 328 m_pop 329 330 .p2align 2 331 // Can read 6 bytes before possibly crossing a page. 332.L_strcpy_unalign6: 333 ldr r2, [r1], #4 334 335 sub ip, r2, #0x01010101 336 bic ip, ip, r2 337 ands ip, ip, #0x80808080 338 bne .L_strcpy_zero_in_first_register 339 340 ldrb r4, [r1] 341 cbz r4, .L_strcpy_unalign_copy5bytes 342 ldrb r5, [r1, #1] 343 cbz r5, .L_strcpy_unalign_copy6bytes 344 345 ldr r3, [r1], #4 346 pld [r1, #64] 347 348 tst r3, #0xff0000 349 beq .L_strcpy_copy7bytes 350 lsrs ip, r3, #24 351 strd r2, r3, [r0], #8 352 beq .L_strcpy_unalign_return 353 b .L_strcpy_unalign6 354 355 .p2align 2 356 // Can read 5 bytes before possibly crossing a page. 357.L_strcpy_unalign5: 358 ldr r2, [r1], #4 359 360 sub ip, r2, #0x01010101 361 bic ip, ip, r2 362 ands ip, ip, #0x80808080 363 bne .L_strcpy_zero_in_first_register 364 365 ldrb r4, [r1] 366 cbz r4, .L_strcpy_unalign_copy5bytes 367 368 ldr r3, [r1], #4 369 370 pld [r1, #64] 371 372 sub ip, r3, #0x01010101 373 bic ip, ip, r3 374 ands ip, ip, #0x80808080 375 bne .L_strcpy_zero_in_second_register 376 377 strd r2, r3, [r0], #8 378 b .L_strcpy_unalign5 379 380.L_strcpy_unalign_copy5bytes: 381 str r2, [r0], #4 382 strb r4, [r0] 383 m_pop 384 385.L_strcpy_unalign_copy6bytes: 386 str r2, [r0], #4 387 strb r4, [r0], #1 388 strb r5, [r0] 389 m_pop 390 391 .p2align 2 392 // Can read 4 bytes before possibly crossing a page. 393.L_strcpy_unalign4: 394 ldr r2, [r1], #4 395 396 sub ip, r2, #0x01010101 397 bic ip, ip, r2 398 ands ip, ip, #0x80808080 399 bne .L_strcpy_zero_in_first_register 400 401 ldr r3, [r1], #4 402 pld [r1, #64] 403 404 sub ip, r3, #0x01010101 405 bic ip, ip, r3 406 ands ip, ip, #0x80808080 407 bne .L_strcpy_zero_in_second_register 408 409 strd r2, r3, [r0], #8 410 b .L_strcpy_unalign4 411 412 .p2align 2 413 // Can read 3 bytes before possibly crossing a page. 414.L_strcpy_unalign3: 415 ldrb r2, [r1] 416 cbz r2, .L_strcpy_unalign3_copy1byte 417 ldrb r3, [r1, #1] 418 cbz r3, .L_strcpy_unalign3_copy2bytes 419 ldrb r4, [r1, #2] 420 cbz r4, .L_strcpy_unalign3_copy3bytes 421 422 ldr r2, [r1], #4 423 ldr r3, [r1], #4 424 425 pld [r1, #64] 426 427 lsrs lr, r2, #24 428 beq .L_strcpy_copy4bytes 429 430 sub ip, r3, #0x01010101 431 bic ip, ip, r3 432 ands ip, ip, #0x80808080 433 bne .L_strcpy_zero_in_second_register 434 435 strd r2, r3, [r0], #8 436 b .L_strcpy_unalign3 437 438.L_strcpy_unalign3_copy1byte: 439 strb r2, [r0] 440 m_pop 441 442.L_strcpy_unalign3_copy2bytes: 443 strb r2, [r0], #1 444 strb r3, [r0] 445 m_pop 446 447.L_strcpy_unalign3_copy3bytes: 448 strb r2, [r0], #1 449 strb r3, [r0], #1 450 strb r4, [r0] 451 m_pop 452 453 .p2align 2 454 // Can read 2 bytes before possibly crossing a page. 455.L_strcpy_unalign2: 456 ldrb r2, [r1] 457 cbz r2, .L_strcpy_unalign_copy1byte 458 ldrb r4, [r1, #1] 459 cbz r4, .L_strcpy_unalign_copy2bytes 460 461 ldr r2, [r1], #4 462 ldr r3, [r1], #4 463 pld [r1, #64] 464 465 tst r2, #0xff0000 466 beq .L_strcpy_copy3bytes 467 lsrs ip, r2, #24 468 beq .L_strcpy_copy4bytes 469 470 sub ip, r3, #0x01010101 471 bic ip, ip, r3 472 ands ip, ip, #0x80808080 473 bne .L_strcpy_zero_in_second_register 474 475 strd r2, r3, [r0], #8 476 b .L_strcpy_unalign2 477 478 .p2align 2 479 // Can read 1 byte before possibly crossing a page. 480.L_strcpy_unalign1: 481 ldrb r2, [r1] 482 cbz r2, .L_strcpy_unalign_copy1byte 483 484 ldr r2, [r1], #4 485 ldr r3, [r1], #4 486 487 pld [r1, #64] 488 489 sub ip, r2, #0x01010101 490 bic ip, ip, r2 491 ands ip, ip, #0x80808080 492 bne .L_strcpy_zero_in_first_register 493 494 sub ip, r3, #0x01010101 495 bic ip, ip, r3 496 ands ip, ip, #0x80808080 497 bne .L_strcpy_zero_in_second_register 498 499 strd r2, r3, [r0], #8 500 b .L_strcpy_unalign1 501 502.L_strcpy_unalign_copy1byte: 503 strb r2, [r0] 504 m_pop 505 506.L_strcpy_unalign_copy2bytes: 507 strb r2, [r0], #1 508 strb r4, [r0] 509 m_pop 510 511 .p2align 2 512.L_strcat_mainloop: 513 ldrd r2, r3, [r0], #8 514 515 pld [r0, #64] 516 517 sub ip, r2, #0x01010101 518 bic ip, ip, r2 519 ands ip, ip, #0x80808080 520 bne .L_strcat_zero_in_first_register 521 522 sub ip, r3, #0x01010101 523 bic ip, ip, r3 524 ands ip, ip, #0x80808080 525 bne .L_strcat_zero_in_second_register 526 b .L_strcat_mainloop 527 528.L_strcat_zero_in_first_register: 529 // Prefetch the src now, it's going to be used soon. 530 pld [r1, #0] 531 lsls lr, ip, #17 532 bne .L_strcat_sub8 533 bcs .L_strcat_sub7 534 lsls ip, ip, #1 535 bne .L_strcat_sub6 536 537 sub r0, r0, #5 538 b .L_strcat_r0_scan_done 539 540.L_strcat_sub8: 541 sub r0, r0, #8 542 b .L_strcat_r0_scan_done 543 544.L_strcat_sub7: 545 sub r0, r0, #7 546 b .L_strcat_r0_scan_done 547 548.L_strcat_sub6: 549 sub r0, r0, #6 550 b .L_strcat_r0_scan_done 551 552.L_strcat_zero_in_second_register: 553 // Prefetch the src now, it's going to be used soon. 554 pld [r1, #0] 555 lsls lr, ip, #17 556 bne .L_strcat_sub4 557 bcs .L_strcat_sub3 558 lsls ip, ip, #1 559 bne .L_strcat_sub2 560 561 sub r0, r0, #1 562 b .L_strcat_r0_scan_done 563 564.L_strcat_sub4: 565 sub r0, r0, #4 566 b .L_strcat_r0_scan_done 567 568.L_strcat_sub3: 569 sub r0, r0, #3 570 b .L_strcat_r0_scan_done 571 572.L_strcat_sub2: 573 sub r0, r0, #2 574 b .L_strcat_r0_scan_done 575END(strcat_a15) 576