%PDF- %PDF-
Direktori : /usr/share/gnome-remote-desktop/ |
Current File : //usr/share/gnome-remote-desktop/grd-cuda-avc-utils_30.ptx |
// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-27506705 // Cuda compilation tools, release 10.2, V10.2.89 // Based on LLVM 3.4svn // .version 6.5 .target sm_30 .address_size 64 // .globl convert_2x2_bgrx_area_to_yuv420_nv12 .visible .entry convert_2x2_bgrx_area_to_yuv420_nv12( .param .u64 convert_2x2_bgrx_area_to_yuv420_nv12_param_0, .param .u64 convert_2x2_bgrx_area_to_yuv420_nv12_param_1, .param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_2, .param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_3, .param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_4, .param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_5, .param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_6 ) { .reg .pred %p<17>; .reg .b16 %rs<18>; .reg .b32 %r<127>; .reg .b64 %rd<40>; ld.param.u64 %rd6, [convert_2x2_bgrx_area_to_yuv420_nv12_param_0]; ld.param.u64 %rd7, [convert_2x2_bgrx_area_to_yuv420_nv12_param_1]; ld.param.u16 %rs7, [convert_2x2_bgrx_area_to_yuv420_nv12_param_2]; ld.param.u16 %rs8, [convert_2x2_bgrx_area_to_yuv420_nv12_param_3]; ld.param.u16 %rs9, [convert_2x2_bgrx_area_to_yuv420_nv12_param_5]; ld.param.u16 %rs10, [convert_2x2_bgrx_area_to_yuv420_nv12_param_6]; mov.u32 %r30, %ntid.x; mov.u32 %r31, %ctaid.x; mov.u32 %r32, %tid.x; mad.lo.s32 %r1, %r30, %r31, %r32; mov.u32 %r33, %ntid.y; mov.u32 %r34, %ctaid.y; mov.u32 %r35, %tid.y; mad.lo.s32 %r2, %r33, %r34, %r35; and.b32 %r36, %r1, 65535; ld.param.u16 %r37, [convert_2x2_bgrx_area_to_yuv420_nv12_param_4]; shr.u32 %r38, %r37, 1; and.b32 %r3, %r2, 65535; cvt.u32.u16 %r4, %rs9; shr.u32 %r5, %r4, 1; setp.ge.u32 %p1, %r3, %r5; setp.ge.u32 %p2, %r36, %r38; or.pred %p3, %p1, %p2; @%p3 bra BB0_10; cvta.to.global.u64 %rd8, %rd6; shl.b32 %r42, %r1, 1; and.b32 %r6, %r42, 65534; mov.u32 %r43, 1; shl.b32 %r44, %r2, 1; and.b32 %r7, %r44, 65534; cvt.u32.u16 %r45, %rs7; mul.lo.s32 %r46, %r7, %r45; cvt.u64.u32 %rd9, %r46; cvt.u64.u32 %rd10, %r6; add.s64 %rd1, %rd9, %rd10; cvta.to.global.u64 %rd11, %rd7; shl.b64 %rd12, %rd1, 2; add.s64 %rd2, %rd11, %rd12; setp.lt.u32 %p4, %r7, %r5; shl.b32 %r47, %r2, 2; sub.s32 %r48, %r43, %r4; selp.b32 %r49, 0, %r48, %p4; mov.u32 %r124, 0; add.s32 %r50, %r49, %r47; cvt.u64.u32 %rd13, %r50; and.b64 %rd14, %rd13, 65535; cvt.u64.u16 %rd15, %rs10; mul.lo.s64 %rd16, %rd14, %rd15; add.s64 %rd17, %rd16, %rd10; add.s64 %rd3, %rd8, %rd17; add.s32 %r51, %r7, 1; and.b32 %r52, %r51, 65535; setp.lt.u32 %p5, %r52, %r5; shl.b32 %r53, %r51, 1; selp.b32 %r54, 0, %r48, %p5; add.s32 %r55, %r54, %r53; cvt.u64.u32 %rd18, %r55; and.b64 %rd19, %rd18, 65535; mul.lo.s64 %rd20, %rd19, %rd15; add.s64 %rd21, %rd20, %rd10; add.s64 %rd4, %rd8, %rd21; shr.u32 %r56, %r4, 2; setp.lt.u32 %p6, %r3, %r56; sub.s32 %r57, %r43, %r5; selp.b32 %r58, 0, %r57, %p6; shl.b32 %r59, %r3, 1; add.s32 %r60, %r59, %r58; cvt.u64.u32 %rd22, %r60; and.b64 %rd23, %rd22, 65535; cvt.u64.u16 %rd24, %rs9; add.s64 %rd25, %rd23, %rd24; mul.lo.s64 %rd26, %rd25, %rd15; add.s64 %rd27, %rd26, %rd10; add.s64 %rd5, %rd8, %rd27; setp.ge.u32 %p7, %r6, %r45; cvt.u32.u16 %r61, %rs8; setp.ge.u32 %p8, %r7, %r61; mov.u16 %rs16, 0; or.pred %p9, %p7, %p8; mov.u16 %rs15, %rs16; mov.u32 %r125, %r124; mov.u32 %r126, %r124; @%p9 bra BB0_3; ld.global.u32 %r62, [%rd2]; and.b32 %r126, %r62, 255; bfe.u32 %r125, %r62, 8, 8; bfe.u32 %r124, %r62, 16, 8; mul.lo.s32 %r63, %r126, 18; mad.lo.s32 %r64, %r124, 54, %r63; mad.lo.s32 %r65, %r125, 183, %r64; shr.u32 %r66, %r65, 8; cvt.u16.u32 %rs15, %r66; BB0_3: st.global.u8 [%rd3], %rs15; add.s32 %r14, %r6, 1; setp.ge.u32 %p11, %r14, %r45; or.pred %p12, %p11, %p8; @%p12 bra BB0_5; ld.global.u32 %r69, [%rd2+4]; and.b32 %r70, %r69, 255; add.s32 %r126, %r70, %r126; bfe.u32 %r71, %r69, 8, 8; add.s32 %r125, %r71, %r125; bfe.u32 %r72, %r69, 16, 8; add.s32 %r124, %r72, %r124; mul.lo.s32 %r73, %r70, 18; mad.lo.s32 %r74, %r72, 54, %r73; mad.lo.s32 %r75, %r71, 183, %r74; shr.u32 %r76, %r75, 8; cvt.u16.u32 %rs16, %r76; BB0_5: setp.lt.u32 %p13, %r6, %r45; st.global.u8 [%rd3+1], %rs16; and.b32 %r82, %r2, 32767; shl.b32 %r83, %r82, 1; add.s32 %r84, %r83, 1; setp.lt.u32 %p14, %r84, %r61; and.pred %p15, %p13, %p14; @%p15 bra BB0_7; bra.uni BB0_6; BB0_7: cvt.u64.u16 %rd28, %rs7; add.s64 %rd29, %rd1, %rd28; shl.b64 %rd31, %rd29, 2; add.s64 %rd32, %rd11, %rd31; ld.global.u32 %r87, [%rd32]; and.b32 %r88, %r87, 255; add.s32 %r126, %r88, %r126; bfe.u32 %r89, %r87, 8, 8; add.s32 %r125, %r89, %r125; bfe.u32 %r90, %r87, 16, 8; add.s32 %r124, %r90, %r124; mul.lo.s32 %r91, %r88, 18; mad.lo.s32 %r92, %r90, 54, %r91; mad.lo.s32 %r93, %r89, 183, %r92; shr.u32 %r94, %r93, 8; st.global.u8 [%rd4], %r94; mov.u16 %rs17, 0; @%p11 bra BB0_9; add.s64 %rd34, %rd28, 1; and.b64 %rd35, %rd34, 65535; add.s64 %rd36, %rd1, %rd35; shl.b64 %rd38, %rd36, 2; add.s64 %rd39, %rd11, %rd38; ld.global.u32 %r95, [%rd39]; and.b32 %r96, %r95, 255; add.s32 %r126, %r96, %r126; bfe.u32 %r97, %r95, 8, 8; add.s32 %r125, %r97, %r125; bfe.u32 %r98, %r95, 16, 8; add.s32 %r124, %r98, %r124; mul.lo.s32 %r99, %r96, 18; mad.lo.s32 %r100, %r98, 54, %r99; mad.lo.s32 %r101, %r97, 183, %r100; shr.u32 %r102, %r101, 8; cvt.u16.u32 %rs17, %r102; bra.uni BB0_9; BB0_6: mov.u16 %rs17, 0; st.global.u8 [%rd4], %rs17; BB0_9: st.global.u8 [%rd4+1], %rs17; bfe.u32 %r103, %r124, 2, 8; mul.lo.s32 %r104, %r103, -29; bfe.u32 %r105, %r125, 2, 8; mad.lo.s32 %r106, %r105, -99, %r104; bfe.u32 %r107, %r126, 2, 8; shl.b32 %r108, %r107, 7; add.s32 %r109, %r106, %r108; shr.u32 %r110, %r109, 8; add.s32 %r111, %r110, 128; st.global.u8 [%rd5], %r111; shl.b32 %r112, %r124, 5; and.b32 %r113, %r112, 32640; mad.lo.s32 %r114, %r105, -116, %r113; mad.lo.s32 %r115, %r107, -12, %r114; shr.u32 %r116, %r115, 8; add.s32 %r117, %r116, 128; st.global.u8 [%rd5+1], %r117; BB0_10: ret; }