%PDF- %PDF-
Direktori : /usr/share/gnome-remote-desktop/ |
Current File : //usr/share/gnome-remote-desktop/grd-cuda-damage-utils_30.ptx |
// // Generated by NVIDIA NVVM Compiler // // Compiler Build ID: CL-27506705 // Cuda compilation tools, release 10.2, V10.2.89 // Based on LLVM 3.4svn // .version 6.5 .target sm_30 .address_size 64 // .globl check_damaged_pixel .visible .entry check_damaged_pixel( .param .u64 check_damaged_pixel_param_0, .param .u64 check_damaged_pixel_param_1, .param .u64 check_damaged_pixel_param_2, .param .u64 check_damaged_pixel_param_3, .param .u32 check_damaged_pixel_param_4, .param .u32 check_damaged_pixel_param_5, .param .u32 check_damaged_pixel_param_6, .param .u32 check_damaged_pixel_param_7 ) { .reg .pred %p<5>; .reg .b16 %rs<5>; .reg .b32 %r<17>; .reg .b64 %rd<14>; ld.param.u64 %rd1, [check_damaged_pixel_param_0]; ld.param.u64 %rd2, [check_damaged_pixel_param_1]; ld.param.u64 %rd3, [check_damaged_pixel_param_2]; ld.param.u64 %rd4, [check_damaged_pixel_param_3]; ld.param.u32 %r3, [check_damaged_pixel_param_4]; ld.param.u32 %r5, [check_damaged_pixel_param_5]; ld.param.u32 %r6, [check_damaged_pixel_param_6]; ld.param.u32 %r4, [check_damaged_pixel_param_7]; mov.u32 %r7, %ntid.x; mov.u32 %r8, %ctaid.x; mov.u32 %r9, %tid.x; mad.lo.s32 %r1, %r7, %r8, %r9; mov.u32 %r10, %ntid.y; mov.u32 %r11, %ctaid.y; mov.u32 %r12, %tid.y; mad.lo.s32 %r2, %r10, %r11, %r12; setp.ge.u32 %p1, %r2, %r6; setp.ge.u32 %p2, %r1, %r5; or.pred %p3, %p1, %p2; @%p3 bra BB0_4; cvta.to.global.u64 %rd5, %rd4; mad.lo.s32 %r13, %r2, %r4, %r1; mul.wide.u32 %rd6, %r13, 4; add.s64 %rd7, %rd5, %rd6; cvta.to.global.u64 %rd8, %rd3; add.s64 %rd9, %rd8, %rd6; ld.global.u32 %r14, [%rd9]; ld.global.u32 %r15, [%rd7]; setp.eq.s32 %p4, %r15, %r14; mov.u16 %rs4, 0; @%p4 bra BB0_3; cvta.to.global.u64 %rd10, %rd2; mov.u16 %rs4, 1; st.global.u8 [%rd10], %rs4; BB0_3: mad.lo.s32 %r16, %r2, %r3, %r1; cvt.u64.u32 %rd11, %r16; cvta.to.global.u64 %rd12, %rd1; add.s64 %rd13, %rd12, %rd11; st.global.u8 [%rd13], %rs4; BB0_4: ret; } // .globl combine_damage_array_cols .visible .entry combine_damage_array_cols( .param .u64 combine_damage_array_cols_param_0, .param .u32 combine_damage_array_cols_param_1, .param .u32 combine_damage_array_cols_param_2, .param .u32 combine_damage_array_cols_param_3, .param .u32 combine_damage_array_cols_param_4 ) { .reg .pred %p<6>; .reg .b16 %rs<3>; .reg .b32 %r<20>; .reg .b64 %rd<7>; ld.param.u64 %rd2, [combine_damage_array_cols_param_0]; ld.param.u32 %r5, [combine_damage_array_cols_param_1]; ld.param.u32 %r8, [combine_damage_array_cols_param_2]; ld.param.u32 %r6, [combine_damage_array_cols_param_3]; ld.param.u32 %r7, [combine_damage_array_cols_param_4]; cvta.to.global.u64 %rd1, %rd2; mov.u32 %r9, %ntid.x; mov.u32 %r10, %ctaid.x; mov.u32 %r11, %tid.x; mad.lo.s32 %r12, %r9, %r10, %r11; mov.u32 %r13, %ntid.y; mov.u32 %r14, %ctaid.y; mov.u32 %r15, %tid.y; mad.lo.s32 %r1, %r13, %r14, %r15; add.s32 %r16, %r7, 1; shl.b32 %r2, %r12, %r16; setp.ge.u32 %p1, %r1, %r8; setp.ge.u32 %p2, %r2, %r5; or.pred %p3, %p1, %p2; @%p3 bra BB1_4; mov.u32 %r17, 1; shl.b32 %r3, %r17, %r7; add.s32 %r18, %r2, %r3; setp.ge.u32 %p4, %r18, %r5; @%p4 bra BB1_4; mad.lo.s32 %r4, %r1, %r6, %r2; add.s32 %r19, %r4, %r3; cvt.u64.u32 %rd3, %r19; add.s64 %rd4, %rd1, %rd3; ld.global.u8 %rs1, [%rd4]; setp.eq.s16 %p5, %rs1, 0; @%p5 bra BB1_4; cvt.u64.u32 %rd5, %r4; add.s64 %rd6, %rd1, %rd5; mov.u16 %rs2, 1; st.global.u8 [%rd6], %rs2; BB1_4: ret; } // .globl combine_damage_array_rows .visible .entry combine_damage_array_rows( .param .u64 combine_damage_array_rows_param_0, .param .u32 combine_damage_array_rows_param_1, .param .u32 combine_damage_array_rows_param_2, .param .u32 combine_damage_array_rows_param_3, .param .u32 combine_damage_array_rows_param_4 ) { .reg .pred %p<6>; .reg .b16 %rs<3>; .reg .b32 %r<21>; .reg .b64 %rd<7>; ld.param.u64 %rd2, [combine_damage_array_rows_param_0]; ld.param.u32 %r7, [combine_damage_array_rows_param_1]; ld.param.u32 %r4, [combine_damage_array_rows_param_2]; ld.param.u32 %r5, [combine_damage_array_rows_param_3]; ld.param.u32 %r6, [combine_damage_array_rows_param_4]; cvta.to.global.u64 %rd1, %rd2; mov.u32 %r8, %ntid.x; mov.u32 %r9, %ctaid.x; mov.u32 %r10, %tid.x; mad.lo.s32 %r1, %r8, %r9, %r10; mov.u32 %r11, %ntid.y; mov.u32 %r12, %ctaid.y; mov.u32 %r13, %tid.y; mad.lo.s32 %r14, %r11, %r12, %r13; add.s32 %r15, %r6, 1; shl.b32 %r2, %r14, %r15; setp.ge.u32 %p1, %r2, %r4; setp.ge.u32 %p2, %r1, %r7; or.pred %p3, %p1, %p2; @%p3 bra BB2_4; mov.u32 %r16, 1; shl.b32 %r17, %r16, %r6; add.s32 %r18, %r2, %r17; setp.ge.u32 %p4, %r18, %r4; @%p4 bra BB2_4; mad.lo.s32 %r3, %r2, %r5, %r1; shl.b32 %r19, %r5, %r6; add.s32 %r20, %r3, %r19; cvt.u64.u32 %rd3, %r20; add.s64 %rd4, %rd1, %rd3; ld.global.u8 %rs1, [%rd4]; setp.eq.s16 %p5, %rs1, 0; @%p5 bra BB2_4; cvt.u64.u32 %rd5, %r3; add.s64 %rd6, %rd1, %rd5; mov.u16 %rs2, 1; st.global.u8 [%rd6], %rs2; BB2_4: ret; } // .globl simplify_damage_array .visible .entry simplify_damage_array( .param .u64 simplify_damage_array_param_0, .param .u64 simplify_damage_array_param_1, .param .u32 simplify_damage_array_param_2, .param .u32 simplify_damage_array_param_3, .param .u32 simplify_damage_array_param_4, .param .u32 simplify_damage_array_param_5 ) { .reg .pred %p<4>; .reg .b16 %rs<2>; .reg .b32 %r<17>; .reg .b64 %rd<9>; ld.param.u64 %rd1, [simplify_damage_array_param_0]; ld.param.u64 %rd2, [simplify_damage_array_param_1]; ld.param.u32 %r5, [simplify_damage_array_param_2]; ld.param.u32 %r7, [simplify_damage_array_param_3]; ld.param.u32 %r8, [simplify_damage_array_param_4]; ld.param.u32 %r6, [simplify_damage_array_param_5]; mov.u32 %r9, %ctaid.x; mov.u32 %r10, %ntid.x; mov.u32 %r11, %tid.x; mad.lo.s32 %r1, %r10, %r9, %r11; mov.u32 %r12, %ntid.y; mov.u32 %r13, %ctaid.y; mov.u32 %r14, %tid.y; mad.lo.s32 %r2, %r12, %r13, %r14; shl.b32 %r3, %r1, 6; shl.b32 %r4, %r2, 6; setp.ge.u32 %p1, %r4, %r8; setp.ge.u32 %p2, %r3, %r7; or.pred %p3, %p1, %p2; @%p3 bra BB3_2; cvta.to.global.u64 %rd3, %rd2; mad.lo.s32 %r15, %r4, %r6, %r3; mad.lo.s32 %r16, %r2, %r5, %r1; cvt.u64.u32 %rd4, %r15; add.s64 %rd5, %rd3, %rd4; ld.global.u8 %rs1, [%rd5]; cvt.u64.u32 %rd6, %r16; cvta.to.global.u64 %rd7, %rd1; add.s64 %rd8, %rd7, %rd6; st.global.u8 [%rd8], %rs1; BB3_2: ret; }