Commit 489e8e68 authored by Harish Mahendrakar's avatar Harish Mahendrakar
Browse files

Fixed few issues in SAO arm assemblies

There were few mismatches seen because of wrong clipping and wrong increments
in SAO assemblies

Change-Id: I8ab28d847b1708b6949eac514f99e475e792cde1
parent 3426916a
......@@ -829,6 +829,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
LDR r7,[sp,#0x114] @Loads wd
LDR r0,[sp,#0x02] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP
......
......@@ -691,6 +691,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
LDR r7,[sp,#0xD0] @Loads wd
LDR r0,[sp,#0x90] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
......
......@@ -851,6 +851,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS r6,r6,#16 @Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP @Jump to re-assigning loop
LDR r7,[sp,#0x114] @Loads wd
LDR r0,[sp,#0x02] @Loads *pu1_src
SUB r7,r7,r6
ADD r0,r0,r7
BGT WD_16_HT_4_LOOP @If not equal jump to width_loop
WIDTH_RESIDUE:
......
......@@ -146,6 +146,9 @@ PU1_AVAIL_4_LOOP:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP:
LDRB w14,[x5,#7] //pu1_avail[7]
......@@ -190,6 +193,9 @@ PU1_AVAIL_7_LOOP:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL:
MOV x12,x8 //Move ht
......
......@@ -165,6 +165,9 @@ PU1_AVAIL_4_LOOP_U:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_4_LOOP_V:
......@@ -201,6 +204,9 @@ PU1_AVAIL_4_LOOP_V:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[0] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP_U:
STRB w10,[sp,#7]
......@@ -249,6 +255,9 @@ PU1_AVAIL_7_LOOP_U:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_7_LOOP_V:
ADD x12,x12,#1
......@@ -286,6 +295,9 @@ PU1_AVAIL_7_LOOP_V:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[wd - 1 + (ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
STRB w10,[sp,#8]
......@@ -924,6 +936,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
mov w7, w24 //Loads wd
mov x0, x27 //Loads *pu1_src
SUB x7,x7,x6
ADD x0,x0,x7
BGT WD_16_HT_4_LOOP
......
......@@ -151,6 +151,9 @@ PU1_AVAIL_5_LOOP:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_0_0_tmp = CLIP3(pu1_src[0] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP:
LDRB w10,[x5,#6] //pu1_avail[6]
......@@ -198,6 +201,9 @@ PU1_AVAIL_6_LOOP:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
MOV x21,x2
......@@ -713,6 +719,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
MOV x7,x16 //Loads wd
MOV x0,x15 //Loads *pu1_src
SUB x7,x7,x6
ADD x0,x0,x7
BGT WD_16_HT_4_LOOP //If not equal jump to width_loop
......
......@@ -160,6 +160,9 @@ PU1_AVAIL_5_LOOP_U:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_0_0_tmp_u = CLIP3(pu1_src[wd - 2] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_5_LOOP_V:
......@@ -194,6 +197,9 @@ PU1_AVAIL_5_LOOP_V:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_0_0_tmp_v = CLIP3(pu1_src[wd - 1] + pi1_sao_offset_v[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP_U:
STRB w9,[sp,#6]
......@@ -240,6 +246,9 @@ PU1_AVAIL_6_LOOP_U:
mov x20,#255
cmp x10,x20
csel x10, x20, x10, ge //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x10,x20
csel x10, x20, x10, LT //u1_pos_wd_ht_tmp = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_6_LOOP_V:
ADD x12,x12,#1 //pu1_src[(ht - 1) * src_strd + 1]
......@@ -276,6 +285,9 @@ PU1_AVAIL_6_LOOP_V:
mov x20,#255
cmp x9,x20
csel x9, x20, x9, ge //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
mov x20,#0
cmp x9,x20
csel x9, x20, x9, LT //u1_pos_wd_ht_tmp_v = CLIP3(pu1_src[(ht - 1) * src_strd] + pi1_sao_offset[edge_idx], 0, (1 << bit_depth) - 1)
PU1_AVAIL_3_LOOP:
STRB w10,[sp,#8]
......@@ -933,6 +945,10 @@ SRC_LEFT_LOOP_WD_16_HT_4:
SUBS x6,x6,#16 //Decrement the wd loop count by 16
BLE RE_ASSINING_LOOP //Jump to re-assigning loop
mov w7, w24 //Loads wd
mov x0, x28 //Loads *pu1_src
SUB x7,x7,x6
ADD x0,x0,x7
BGT WD_16_HT_4_LOOP //If not equal jump to width_loop
WIDTH_RESIDUE:
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment