%PDF- %PDF-
| Direktori : /usr/share/gnome-remote-desktop/ |
| Current File : //usr/share/gnome-remote-desktop/grd-cuda-avc-utils_30.ptx |
//
// Generated by NVIDIA NVVM Compiler
//
// Compiler Build ID: CL-27506705
// Cuda compilation tools, release 10.2, V10.2.89
// Based on LLVM 3.4svn
//
.version 6.5
.target sm_30
.address_size 64
// .globl convert_2x2_bgrx_area_to_yuv420_nv12
.visible .entry convert_2x2_bgrx_area_to_yuv420_nv12(
.param .u64 convert_2x2_bgrx_area_to_yuv420_nv12_param_0,
.param .u64 convert_2x2_bgrx_area_to_yuv420_nv12_param_1,
.param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_2,
.param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_3,
.param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_4,
.param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_5,
.param .u16 convert_2x2_bgrx_area_to_yuv420_nv12_param_6
)
{
.reg .pred %p<17>;
.reg .b16 %rs<18>;
.reg .b32 %r<127>;
.reg .b64 %rd<40>;
ld.param.u64 %rd6, [convert_2x2_bgrx_area_to_yuv420_nv12_param_0];
ld.param.u64 %rd7, [convert_2x2_bgrx_area_to_yuv420_nv12_param_1];
ld.param.u16 %rs7, [convert_2x2_bgrx_area_to_yuv420_nv12_param_2];
ld.param.u16 %rs8, [convert_2x2_bgrx_area_to_yuv420_nv12_param_3];
ld.param.u16 %rs9, [convert_2x2_bgrx_area_to_yuv420_nv12_param_5];
ld.param.u16 %rs10, [convert_2x2_bgrx_area_to_yuv420_nv12_param_6];
mov.u32 %r30, %ntid.x;
mov.u32 %r31, %ctaid.x;
mov.u32 %r32, %tid.x;
mad.lo.s32 %r1, %r30, %r31, %r32;
mov.u32 %r33, %ntid.y;
mov.u32 %r34, %ctaid.y;
mov.u32 %r35, %tid.y;
mad.lo.s32 %r2, %r33, %r34, %r35;
and.b32 %r36, %r1, 65535;
ld.param.u16 %r37, [convert_2x2_bgrx_area_to_yuv420_nv12_param_4];
shr.u32 %r38, %r37, 1;
and.b32 %r3, %r2, 65535;
cvt.u32.u16 %r4, %rs9;
shr.u32 %r5, %r4, 1;
setp.ge.u32 %p1, %r3, %r5;
setp.ge.u32 %p2, %r36, %r38;
or.pred %p3, %p1, %p2;
@%p3 bra BB0_10;
cvta.to.global.u64 %rd8, %rd6;
shl.b32 %r42, %r1, 1;
and.b32 %r6, %r42, 65534;
mov.u32 %r43, 1;
shl.b32 %r44, %r2, 1;
and.b32 %r7, %r44, 65534;
cvt.u32.u16 %r45, %rs7;
mul.lo.s32 %r46, %r7, %r45;
cvt.u64.u32 %rd9, %r46;
cvt.u64.u32 %rd10, %r6;
add.s64 %rd1, %rd9, %rd10;
cvta.to.global.u64 %rd11, %rd7;
shl.b64 %rd12, %rd1, 2;
add.s64 %rd2, %rd11, %rd12;
setp.lt.u32 %p4, %r7, %r5;
shl.b32 %r47, %r2, 2;
sub.s32 %r48, %r43, %r4;
selp.b32 %r49, 0, %r48, %p4;
mov.u32 %r124, 0;
add.s32 %r50, %r49, %r47;
cvt.u64.u32 %rd13, %r50;
and.b64 %rd14, %rd13, 65535;
cvt.u64.u16 %rd15, %rs10;
mul.lo.s64 %rd16, %rd14, %rd15;
add.s64 %rd17, %rd16, %rd10;
add.s64 %rd3, %rd8, %rd17;
add.s32 %r51, %r7, 1;
and.b32 %r52, %r51, 65535;
setp.lt.u32 %p5, %r52, %r5;
shl.b32 %r53, %r51, 1;
selp.b32 %r54, 0, %r48, %p5;
add.s32 %r55, %r54, %r53;
cvt.u64.u32 %rd18, %r55;
and.b64 %rd19, %rd18, 65535;
mul.lo.s64 %rd20, %rd19, %rd15;
add.s64 %rd21, %rd20, %rd10;
add.s64 %rd4, %rd8, %rd21;
shr.u32 %r56, %r4, 2;
setp.lt.u32 %p6, %r3, %r56;
sub.s32 %r57, %r43, %r5;
selp.b32 %r58, 0, %r57, %p6;
shl.b32 %r59, %r3, 1;
add.s32 %r60, %r59, %r58;
cvt.u64.u32 %rd22, %r60;
and.b64 %rd23, %rd22, 65535;
cvt.u64.u16 %rd24, %rs9;
add.s64 %rd25, %rd23, %rd24;
mul.lo.s64 %rd26, %rd25, %rd15;
add.s64 %rd27, %rd26, %rd10;
add.s64 %rd5, %rd8, %rd27;
setp.ge.u32 %p7, %r6, %r45;
cvt.u32.u16 %r61, %rs8;
setp.ge.u32 %p8, %r7, %r61;
mov.u16 %rs16, 0;
or.pred %p9, %p7, %p8;
mov.u16 %rs15, %rs16;
mov.u32 %r125, %r124;
mov.u32 %r126, %r124;
@%p9 bra BB0_3;
ld.global.u32 %r62, [%rd2];
and.b32 %r126, %r62, 255;
bfe.u32 %r125, %r62, 8, 8;
bfe.u32 %r124, %r62, 16, 8;
mul.lo.s32 %r63, %r126, 18;
mad.lo.s32 %r64, %r124, 54, %r63;
mad.lo.s32 %r65, %r125, 183, %r64;
shr.u32 %r66, %r65, 8;
cvt.u16.u32 %rs15, %r66;
BB0_3:
st.global.u8 [%rd3], %rs15;
add.s32 %r14, %r6, 1;
setp.ge.u32 %p11, %r14, %r45;
or.pred %p12, %p11, %p8;
@%p12 bra BB0_5;
ld.global.u32 %r69, [%rd2+4];
and.b32 %r70, %r69, 255;
add.s32 %r126, %r70, %r126;
bfe.u32 %r71, %r69, 8, 8;
add.s32 %r125, %r71, %r125;
bfe.u32 %r72, %r69, 16, 8;
add.s32 %r124, %r72, %r124;
mul.lo.s32 %r73, %r70, 18;
mad.lo.s32 %r74, %r72, 54, %r73;
mad.lo.s32 %r75, %r71, 183, %r74;
shr.u32 %r76, %r75, 8;
cvt.u16.u32 %rs16, %r76;
BB0_5:
setp.lt.u32 %p13, %r6, %r45;
st.global.u8 [%rd3+1], %rs16;
and.b32 %r82, %r2, 32767;
shl.b32 %r83, %r82, 1;
add.s32 %r84, %r83, 1;
setp.lt.u32 %p14, %r84, %r61;
and.pred %p15, %p13, %p14;
@%p15 bra BB0_7;
bra.uni BB0_6;
BB0_7:
cvt.u64.u16 %rd28, %rs7;
add.s64 %rd29, %rd1, %rd28;
shl.b64 %rd31, %rd29, 2;
add.s64 %rd32, %rd11, %rd31;
ld.global.u32 %r87, [%rd32];
and.b32 %r88, %r87, 255;
add.s32 %r126, %r88, %r126;
bfe.u32 %r89, %r87, 8, 8;
add.s32 %r125, %r89, %r125;
bfe.u32 %r90, %r87, 16, 8;
add.s32 %r124, %r90, %r124;
mul.lo.s32 %r91, %r88, 18;
mad.lo.s32 %r92, %r90, 54, %r91;
mad.lo.s32 %r93, %r89, 183, %r92;
shr.u32 %r94, %r93, 8;
st.global.u8 [%rd4], %r94;
mov.u16 %rs17, 0;
@%p11 bra BB0_9;
add.s64 %rd34, %rd28, 1;
and.b64 %rd35, %rd34, 65535;
add.s64 %rd36, %rd1, %rd35;
shl.b64 %rd38, %rd36, 2;
add.s64 %rd39, %rd11, %rd38;
ld.global.u32 %r95, [%rd39];
and.b32 %r96, %r95, 255;
add.s32 %r126, %r96, %r126;
bfe.u32 %r97, %r95, 8, 8;
add.s32 %r125, %r97, %r125;
bfe.u32 %r98, %r95, 16, 8;
add.s32 %r124, %r98, %r124;
mul.lo.s32 %r99, %r96, 18;
mad.lo.s32 %r100, %r98, 54, %r99;
mad.lo.s32 %r101, %r97, 183, %r100;
shr.u32 %r102, %r101, 8;
cvt.u16.u32 %rs17, %r102;
bra.uni BB0_9;
BB0_6:
mov.u16 %rs17, 0;
st.global.u8 [%rd4], %rs17;
BB0_9:
st.global.u8 [%rd4+1], %rs17;
bfe.u32 %r103, %r124, 2, 8;
mul.lo.s32 %r104, %r103, -29;
bfe.u32 %r105, %r125, 2, 8;
mad.lo.s32 %r106, %r105, -99, %r104;
bfe.u32 %r107, %r126, 2, 8;
shl.b32 %r108, %r107, 7;
add.s32 %r109, %r106, %r108;
shr.u32 %r110, %r109, 8;
add.s32 %r111, %r110, 128;
st.global.u8 [%rd5], %r111;
shl.b32 %r112, %r124, 5;
and.b32 %r113, %r112, 32640;
mad.lo.s32 %r114, %r105, -116, %r113;
mad.lo.s32 %r115, %r107, -12, %r114;
shr.u32 %r116, %r115, 8;
add.s32 %r117, %r116, 128;
st.global.u8 [%rd5+1], %r117;
BB0_10:
ret;
}