Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
halo
rockchip_bionic
Commits
f355096a
Commit
f355096a
authored
15 years ago
by
David 'Digit' Turner
Browse files
Options
Download
Email Patches
Plain Diff
Remove NEON optimizations for memcpy
parent
bc10cd29
No related merge requests found
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
107 deletions
+0
-107
libc/arch-arm/bionic/memcpy.S
libc/arch-arm/bionic/memcpy.S
+0
-107
No files found.
libc/arch-arm/bionic/memcpy.S
View file @
f355096a
...
...
@@ -28,111 +28,6 @@
#include <machine/cpu-features.h>
#if __ARM_ARCH__ == 7 || defined(__ARM_NEON__)
.
text
.
fpu
neon
.
global
memcpy
.
type
memcpy
,
%
function
.
align
4
#define NEON_MAX_PREFETCH_DISTANCE 320
memcpy
:
.
fnstart
mov
ip
,
r0
cmp
r2
,
#
16
blt
4
f
@
Have
less
than
16
bytes
to
copy
@
First
ensure
16
byte
alignment
for
the
destination
buffer
tst
r0
,
#
0xF
beq
2
f
tst
r0
,
#
1
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
subne
r2
,
r2
,
#
1
tst
ip
,
#
2
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
subne
r2
,
r2
,
#
2
tst
ip
,
#
4
beq
1
f
vld4.8
{
d0
[
0
],
d1
[
0
],
d2
[
0
],
d3
[
0
]},
[
r1
]!
vst4.8
{
d0
[
0
],
d1
[
0
],
d2
[
0
],
d3
[
0
]},
[
ip
,
:
32
]!
sub
r2
,
r2
,
#
4
1
:
tst
ip
,
#
8
beq
2
f
vld1.8
{
d0
},
[
r1
]!
vst1.8
{
d0
},
[
ip
,
:
64
]!
sub
r2
,
r2
,
#
8
2
:
subs
r2
,
r2
,
#
32
blt
3
f
mov
r3
,
#
32
@
Main
copy
loop
,
32
bytes
are
processed
per
iteration
.
@
ARM
instructions
are
used
for
doing
fine
-
grained
prefetch
,
@
increasing
prefetch
distance
progressively
up
to
@
NEON_MAX_PREFETCH_DISTANCE
at
runtime
1
:
vld1.8
{
d0
-
d3
},
[
r1
]!
cmp
r3
,
#(
NEON_MAX_PREFETCH_DISTANCE
-
32
)
pld
[
r1
,
r3
]
addle
r3
,
r3
,
#
32
vst1.8
{
d0
-
d3
},
[
ip
,
:
128
]!
sub
r2
,
r2
,
#
32
cmp
r2
,
r3
bge
1
b
cmp
r2
,
#
0
blt
3
f
1
:
@
Copy
the
remaining
part
of
the
buffer
(
already
prefetched
)
vld1.8
{
d0
-
d3
},
[
r1
]!
subs
r2
,
r2
,
#
32
vst1.8
{
d0
-
d3
},
[
ip
,
:
128
]!
bge
1
b
3
:
@
Copy
up
to
31
remaining
bytes
tst
r2
,
#
16
beq
4
f
vld1.8
{
d0
,
d1
},
[
r1
]!
vst1.8
{
d0
,
d1
},
[
ip
,
:
128
]!
4
:
@
Use
ARM
instructions
exclusively
for
the
final
trailing
part
@
not
fully
fitting
into
full
16
byte
aligned
block
in
order
@
to
avoid
"ARM store after NEON store"
hazard
.
Also
NEON
@
pipeline
will
be
(
mostly
)
flushed
by
the
time
when
the
@
control
returns
to
the
caller
,
making
the
use
of
NEON
mostly
@
transparent
(
and
avoiding
hazards
in
the
caller
code
)
movs
r3
,
r2
,
lsl
#
29
bcc
1
f
.
rept
8
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
.
endr
1
:
bpl
1
f
.
rept
4
ldrmib
r3
,
[
r1
],
#
1
strmib
r3
,
[
ip
],
#
1
.
endr
1
:
movs
r2
,
r2
,
lsl
#
31
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
ldrmib
r3
,
[
r1
],
#
1
strmib
r3
,
[
ip
],
#
1
bx
lr
.
fnend
#else /* __ARM_ARCH__ < 7 */
.
text
.
global
memcpy
...
...
@@ -490,5 +385,3 @@ copy_last_3_and_return:
bx
lr
.
fnend
#endif
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment