diff --git a/common/arm/cpu-a.S b/common/arm/cpu-a.S index a459553..bc91505 100644 --- a/common/arm/cpu-a.S +++ b/common/arm/cpu-a.S @@ -26,7 +26,7 @@ #include "asm.S" .fpu neon -.align +// .align // done in gas because .fpu neon overrides the refusal to assemble // instructions the selected -march/-mcpu doesn't support @@ -95,7 +95,7 @@ average_loop: sub r2, r2, r1 cmpgt r2, #30 << 3 // assume context switch if it took over 30 cycles addle r3, r3, r2 - subles ip, ip, #1 + suble ip, ip, #1 bgt average_loop // disable counters if we enabled them diff --git a/common/arm/mc-a.S b/common/arm/mc-a.S index 507bbba..d5554be 100644 --- a/common/arm/mc-a.S +++ b/common/arm/mc-a.S @@ -166,7 +166,7 @@ function x264_pixel_avg_\w\()x\h\()_neon ldr ip, [sp, #8] push {r4-r6,lr} cmp ip, #32 - ldrd r4, [sp, #16] + ldrd r4, r5, [sp, #16] mov lr, #\h beq x264_pixel_avg_w\w\()_neon rsbs r6, ip, #64 @@ -446,7 +446,7 @@ avg2_w20_loop: .ifc \type, full ldr lr, [r4, #32] // denom .endif - ldrd r4, [r4, #32+4] // scale, offset + ldrd r4, r5, [r4, #32+4] // scale, offset vdup.16 q0, r4 vdup.16 q1, r5 .ifc \type, full @@ -815,7 +815,7 @@ copy_w16_aligned_loop: // int dx, int dy, int i_width, int i_height ); function x264_mc_chroma_neon push {r4-r6, lr} - ldrd r4, [sp, #16] + ldrd r4, r5, [sp, #16] ldr r6, [sp, #24] asr lr, r5, #3 @@ -1271,8 +1271,8 @@ filter_h_loop: function x264_frame_init_lowres_core_neon push {r4-r10,lr} vpush {d8-d15} - ldrd r4, [sp, #96] - ldrd r6, [sp, #104] + ldrd r4, r5, [sp, #96] + ldrd r6, r7, [sp, #104] ldr lr, [sp, #112] sub r10, r6, r7 // dst_stride - width and r10, r10, #~15 diff --git a/common/arm/pixel-a.S b/common/arm/pixel-a.S index 8bce3b6..0784ae6 100644 --- a/common/arm/pixel-a.S +++ b/common/arm/pixel-a.S @@ -328,9 +328,9 @@ SAD_FUNC_DUAL 16, 16 function x264_pixel_sad_x\x\()_\w\()x\h\()_neon push {r6-r7,lr} .if \x == 3 - ldrd r6, [sp, #12] + ldrd r6, r7, [sp, #12] .else - ldrd r6, [sp, #16] + ldrd r6, r7, [sp, #16] ldr r12, [sp, #12] .endif mov lr, #FENC_STRIDE @@ -596,7 +596,7 @@ function x264_pixel_var2_8x8_neon vadd.s32 d1, d2, d3 vpadd.s32 d0, d0, d1 - vmov.32 r0, r1, d0 + vmov r0, r1, d0 vst1.32 {d0[1]}, [ip,:32] mul r0, r0, r0 sub r0, r1, r0, lsr #6 diff --git a/common/arm/predict-a.S b/common/arm/predict-a.S index af65bd7..8cdaf50 100644 --- a/common/arm/predict-a.S +++ b/common/arm/predict-a.S @@ -181,9 +181,9 @@ function x264_predict_4x4_ddl_neon function x264_predict_8x8_dc_neon mov ip, #0 - ldrd r2, [r1, #8] + ldrd r2, r3, [r1, #8] push {r4-r5,lr} - ldrd r4, [r1, #16] + ldrd r4, r5, [r1, #16] lsl r3, r3, #8 ldrb lr, [r1, #7] usad8 r2, r2, ip diff --git a/common/arm/quant-a.S b/common/arm/quant-a.S index e851562..c159f9e 100644 --- a/common/arm/quant-a.S +++ b/common/arm/quant-a.S @@ -271,7 +271,7 @@ dequant_4x4_dc_rshift: // int coeff_last( int16_t *l ) function x264_coeff_last4_arm - ldrd r2, [r0] + ldrd r2, r3, [r0] subs r0, r3, #0 movne r0, #2 movne r2, r3 @@ -300,7 +300,7 @@ function x264_coeff_last\size\()_neon subs r1, ip, r1, lsr #2 addge r0, r1, #\size - 8 - sublts r0, r3, r0, lsr #2 + sublt r0, r3, r0, lsr #2 movlt r0, #0 bx lr .endfunc @@ -349,7 +349,7 @@ function x264_coeff_last64_neon subs r1, ip, r1 addge r0, r1, #32 - sublts r0, ip, r0 + sublt r0, ip, r0 movlt r0, #0 bx lr .endfunc