Skip to content
GitLab
Projects
Groups
Snippets
Help
Loading...
Help
Help
Support
Community forum
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
Open sidebar
halo
rockchip_bionic
Commits
f355096a
Commit
f355096a
authored
15 years ago
by
David 'Digit' Turner
Browse files
Options
Download
Email Patches
Plain Diff
Remove NEON optimizations for memcpy
parent
bc10cd29
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
0 additions
and
107 deletions
+0
-107
libc/arch-arm/bionic/memcpy.S
libc/arch-arm/bionic/memcpy.S
+0
-107
No files found.
libc/arch-arm/bionic/memcpy.S
View file @
f355096a
...
...
@@ -28,111 +28,6 @@
#include <machine/cpu-features.h>
#if __ARM_ARCH__ == 7 || defined(__ARM_NEON__)
.
text
.
fpu
neon
.
global
memcpy
.
type
memcpy
,
%
function
.
align
4
#define NEON_MAX_PREFETCH_DISTANCE 320
memcpy
:
.
fnstart
mov
ip
,
r0
cmp
r2
,
#
16
blt
4
f
@
Have
less
than
16
bytes
to
copy
@
First
ensure
16
byte
alignment
for
the
destination
buffer
tst
r0
,
#
0xF
beq
2
f
tst
r0
,
#
1
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
subne
r2
,
r2
,
#
1
tst
ip
,
#
2
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
ldrneb
r3
,
[
r1
],
#
1
strneb
r3
,
[
ip
],
#
1
subne
r2
,
r2
,
#
2
tst
ip
,
#
4
beq
1
f
vld4.8
{
d0
[
0
],
d1
[
0
],
d2
[
0
],
d3
[
0
]},
[
r1
]!
vst4.8
{
d0
[
0
],
d1
[
0
],
d2
[
0
],
d3
[
0
]},
[
ip
,
:
32
]!
sub
r2
,
r2
,
#
4
1
:
tst
ip
,
#
8
beq
2
f
vld1.8
{
d0
},
[
r1
]!
vst1.8
{
d0
},
[
ip
,
:
64
]!
sub
r2
,
r2
,
#
8
2
:
subs
r2
,
r2
,
#
32
blt
3
f
mov
r3
,
#
32
@
Main
copy
loop
,
32
bytes
are
processed
per
iteration
.
@
ARM
instructions
are
used
for
doing
fine
-
grained
prefetch
,
@
increasing
prefetch
distance
progressively
up
to
@
NEON_MAX_PREFETCH_DISTANCE
at
runtime
1
:
vld1.8
{
d0
-
d3
},
[
r1
]!
cmp
r3
,
#(
NEON_MAX_PREFETCH_DISTANCE
-
32
)
pld
[
r1
,
r3
]
addle
r3
,
r3
,
#
32
vst1.8
{
d0
-
d3
},
[
ip
,
:
128
]!
sub
r2
,
r2
,
#
32
cmp
r2
,
r3
bge
1
b
cmp
r2
,
#
0
blt
3
f
1
:
@
Copy
the
remaining
part
of
the
buffer
(
already
prefetched
)
vld1.8
{
d0
-
d3
},
[
r1
]!
subs
r2
,
r2
,
#
32
vst1.8
{
d0
-
d3
},
[
ip
,
:
128
]!
bge
1
b
3
:
@
Copy
up
to
31
remaining
bytes
tst
r2
,
#
16
beq
4
f
vld1.8
{
d0
,
d1
},
[
r1
]!
vst1.8
{
d0
,
d1
},
[
ip
,
:
128
]!
4
:
@
Use
ARM
instructions
exclusively
for
the
final
trailing
part
@
not
fully
fitting
into
full
16
byte
aligned
block
in
order
@
to
avoid
"ARM store after NEON store"
hazard
.
Also
NEON
@
pipeline
will
be
(
mostly
)
flushed
by
the
time
when
the
@
control
returns
to
the
caller
,
making
the
use
of
NEON
mostly
@
transparent
(
and
avoiding
hazards
in
the
caller
code
)
movs
r3
,
r2
,
lsl
#
29
bcc
1
f
.
rept
8
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
.
endr
1
:
bpl
1
f
.
rept
4
ldrmib
r3
,
[
r1
],
#
1
strmib
r3
,
[
ip
],
#
1
.
endr
1
:
movs
r2
,
r2
,
lsl
#
31
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
ldrcsb
r3
,
[
r1
],
#
1
strcsb
r3
,
[
ip
],
#
1
ldrmib
r3
,
[
r1
],
#
1
strmib
r3
,
[
ip
],
#
1
bx
lr
.
fnend
#else /* __ARM_ARCH__ < 7 */
.
text
.
global
memcpy
...
...
@@ -490,5 +385,3 @@ copy_last_3_and_return:
bx
lr
.
fnend
#endif
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
.
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment