-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathhybridsort.glsl
945 lines (862 loc) · 73.6 KB
/
hybridsort.glsl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764
765
766
767
768
769
770
771
772
773
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801
802
803
804
805
806
807
808
809
810
811
812
813
814
815
816
817
818
819
820
821
822
823
824
825
826
827
828
829
830
831
832
833
834
835
836
837
838
839
840
841
842
843
844
845
846
847
848
849
850
851
852
853
854
855
856
857
858
859
860
861
862
863
864
865
866
867
868
869
870
871
872
873
874
875
876
877
878
879
880
881
882
883
884
885
886
887
888
889
890
891
892
893
894
895
896
897
898
899
900
901
902
903
904
905
906
907
908
909
910
911
912
913
914
915
916
917
918
919
920
921
922
923
924
925
926
927
928
929
930
931
932
933
934
935
936
937
938
939
940
941
942
943
944
945
#define MAX_REGISTERS 32
LFB_FRAG_TYPE registers[MAX_REGISTERS];
#if MAX_FRAGS > MAX_REGISTERS
#define MERGE_SIZE (MAX_FRAGS / MAX_REGISTERS)
#else
#define MERGE_SIZE 1
#endif
#define BLOCKSORT_LMEM 0
#define BLOCKSORT_GMEM 0
#define BLOCKSORT_BASE 0
#if BLOCKSORT_LMEM
#if 1
//count MUST be less than MAX_REGISTERS
void registerSortRange(int offset, int count)
{
#pragma optionNV(unroll all)
//load from lmem
for (int i = 0; i < MAX_REGISTERS; ++i)
if (i < count)
registers[i] = frags[offset+i];
//sort in registers
LFB_FRAG_TYPE tmp;
#define SWAP_FRAGS(a, b) {tmp = a; a = b; b = tmp;}
#define CSWAP(a, b) \
if (LFB_FRAG_DEPTH(a) > LFB_FRAG_DEPTH(b)) SWAP_FRAGS(a, b);
#define CSWAP_I(i, j) \
if (LFB_FRAG_DEPTH(registers[i]) > LFB_FRAG_DEPTH(registers[j])) SWAP_FRAGS(registers[i], registers[j]);
#define IF_CSWAP_I(i, j) \
if (LFB_FRAG_DEPTH(registers[i]) > LFB_FRAG_DEPTH(registers[j])) {SWAP_FRAGS(registers[i], registers[j]);
#define IF_CMOVE_I(i, j) \
if (LFB_FRAG_DEPTH(registers[i]) > LFB_FRAG_DEPTH(tmp)) {registers[j]=tmp;} else {registers[j]=registers[i];
#if 0
for (int i = 1; i < MAX_REGISTERS; ++i)
{
if (i < count)
{
for (int j = i; j > 0; --j)
CSWAP(registers[j-1], registers[j])
continue;
}
break;
}
#else
#if 1
if (count > 1) {IF_CSWAP_I(0, 1)}
if (count > 2) {IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}
if (count > 3) {IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}
if (count > 4) {IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}
if (count > 5) {IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}
if (count > 6) {IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}
if (count > 7) {IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}
if (count > 8) {IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}
if (count > 9) {IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}
if (count > 10) {IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}
if (count > 11) {IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}
if (count > 12) {IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}
if (count > 13) {IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}
if (count > 14) {IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}
if (count > 15) {IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}
#if MAX_REGISTERS > 16
if (count > 16) {IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}
if (count > 17) {IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}
if (count > 18) {IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}
if (count > 19) {IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}
if (count > 20) {IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}
if (count > 21) {IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}
if (count > 22) {IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}
if (count > 23) {IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}
if (count > 24) {IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 25) {IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 26) {IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 27) {IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 28) {IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 29) {IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 30) {IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 31) {IF_CSWAP_I(30, 31)IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}}}}}}}}}}}}}}}}
#if MAX_REGISTERS > 32
if (count > 32) {IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}
if (count > 33) {IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}
if (count > 34) {IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}
if (count > 35) {IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}
if (count > 36) {IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}
if (count > 37) {IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}
if (count > 38) {IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}
if (count > 39) {IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}
if (count > 40) {IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}
if (count > 41) {IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}
if (count > 42) {IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}
if (count > 43) {IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}
if (count > 44) {IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}
if (count > 45) {IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 46) {IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 47) {IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 48) {IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 49) {IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 50) {IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 51) {IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 52) {IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 53) {IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 54) {IF_CSWAP_I(53,54);CSWAP_I(52,53);IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 55) {IF_CSWAP_I(54,55);CSWAP_I(53,54);IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 56) {IF_CSWAP_I(55,56);CSWAP_I(54,55);IF_CSWAP_I(53,54);CSWAP_I(52,53);IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 57) {IF_CSWAP_I(56,57);CSWAP_I(55,56);IF_CSWAP_I(54,55);CSWAP_I(53,54);IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 58) {IF_CSWAP_I(57,58);CSWAP_I(56,57);IF_CSWAP_I(55,56);CSWAP_I(54,55);IF_CSWAP_I(53,54);CSWAP_I(52,53);IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 59) {IF_CSWAP_I(58,59);CSWAP_I(57,58);IF_CSWAP_I(56,57);CSWAP_I(55,56);IF_CSWAP_I(54,55);CSWAP_I(53,54);IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 60) {IF_CSWAP_I(59,60);CSWAP_I(58,59);IF_CSWAP_I(57,58);CSWAP_I(56,57);IF_CSWAP_I(55,56);CSWAP_I(54,55);IF_CSWAP_I(53,54);CSWAP_I(52,53);IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 61) {IF_CSWAP_I(60,61);CSWAP_I(59,60);IF_CSWAP_I(58,59);CSWAP_I(57,58);IF_CSWAP_I(56,57);CSWAP_I(55,56);IF_CSWAP_I(54,55);CSWAP_I(53,54);IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 62) {IF_CSWAP_I(61,62);CSWAP_I(60,61);IF_CSWAP_I(59,60);CSWAP_I(58,59);IF_CSWAP_I(57,58);CSWAP_I(56,57);IF_CSWAP_I(55,56);CSWAP_I(54,55);IF_CSWAP_I(53,54);CSWAP_I(52,53);IF_CSWAP_I(51,52);CSWAP_I(50,51);IF_CSWAP_I(49,50);CSWAP_I(48,49);IF_CSWAP_I(47,48);CSWAP_I(46,47);IF_CSWAP_I(45,46);CSWAP_I(44,45);IF_CSWAP_I(43,44);CSWAP_I(42,43);IF_CSWAP_I(41,42);CSWAP_I(40,41);IF_CSWAP_I(39,40);CSWAP_I(38,39);IF_CSWAP_I(37,38);CSWAP_I(36,37);IF_CSWAP_I(35,36);CSWAP_I(34,35);IF_CSWAP_I(33,34);CSWAP_I(32,33);IF_CSWAP_I(31,32);CSWAP_I(30,31);IF_CSWAP_I(29,30);CSWAP_I(28,29);IF_CSWAP_I(27,28);CSWAP_I(26,27);IF_CSWAP_I(25,26);CSWAP_I(24,25);IF_CSWAP_I(23,24);CSWAP_I(22,23);IF_CSWAP_I(21,22);CSWAP_I(20,21);IF_CSWAP_I(19,20);CSWAP_I(18,19);IF_CSWAP_I(17,18);CSWAP_I(16,17);IF_CSWAP_I(15,16);CSWAP_I(14,15);IF_CSWAP_I(13,14);CSWAP_I(12,13);IF_CSWAP_I(11,12);CSWAP_I(10,11);IF_CSWAP_I(9,10);CSWAP_I(8,9);IF_CSWAP_I(7,8);CSWAP_I(6,7);IF_CSWAP_I(5,6);CSWAP_I(4,5);IF_CSWAP_I(3,4);CSWAP_I(2,3);IF_CSWAP_I(1,2);CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 63) {IF_CSWAP_I(62,63);CSWAP_I(61,62);IF_CSWAP_I(60,61);CSWAP_I(59,60);IF_CSWAP_I(58,59);CSWAP_I(57,58);IF_CSWAP_I(56,57);CSWAP_I(55,56);IF_CSWAP_I(54,55);CSWAP_I(53,54);IF_CSWAP_I(52,53);CSWAP_I(51,52);IF_CSWAP_I(50,51);CSWAP_I(49,50);IF_CSWAP_I(48,49);CSWAP_I(47,48);IF_CSWAP_I(46,47);CSWAP_I(45,46);IF_CSWAP_I(44,45);CSWAP_I(43,44);IF_CSWAP_I(42,43);CSWAP_I(41,42);IF_CSWAP_I(40,41);CSWAP_I(39,40);IF_CSWAP_I(38,39);CSWAP_I(37,38);IF_CSWAP_I(36,37);CSWAP_I(35,36);IF_CSWAP_I(34,35);CSWAP_I(33,34);IF_CSWAP_I(32,33);CSWAP_I(31,32);IF_CSWAP_I(30,31);CSWAP_I(29,30);IF_CSWAP_I(28,29);CSWAP_I(27,28);IF_CSWAP_I(26,27);CSWAP_I(25,26);IF_CSWAP_I(24,25);CSWAP_I(23,24);IF_CSWAP_I(22,23);CSWAP_I(21,22);IF_CSWAP_I(20,21);CSWAP_I(19,20);IF_CSWAP_I(18,19);CSWAP_I(17,18);IF_CSWAP_I(16,17);CSWAP_I(15,16);IF_CSWAP_I(14,15);CSWAP_I(13,14);IF_CSWAP_I(12,13);CSWAP_I(11,12);IF_CSWAP_I(10,11);CSWAP_I(9,10);IF_CSWAP_I(8,9);CSWAP_I(7,8);IF_CSWAP_I(6,7);CSWAP_I(5,6);IF_CSWAP_I(4,5);CSWAP_I(3,4);IF_CSWAP_I(2,3);CSWAP_I(1,2);IF_CSWAP_I(0,1);}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
#endif
#endif
}}}}}}}} }}}}}}}
#else
#pragma optionNV(unroll none)
for (int i = 1; i < MAX_REGISTERS && i < count; ++i)
{
#define PART(j) if (i >= j) CSWAP_I(j-1, j)
#if MAX_REGISTERS > 32
PART(63);
PART(62);
PART(61);
PART(60);
PART(59);
PART(58);
PART(57);
PART(56);
PART(55);
PART(54);
PART(53);
PART(52);
PART(51);
PART(50);
PART(49);
PART(48);
PART(47);
PART(46);
PART(45);
PART(44);
PART(43);
PART(42);
PART(41);
PART(40);
PART(39);
PART(38);
PART(37);
PART(36);
PART(35);
PART(34);
PART(33);
PART(32);
#endif
#if MAX_REGISTERS > 16
PART(31);
PART(30);
PART(29);
PART(28);
PART(27);
PART(26);
PART(25);
PART(24);
PART(23);
PART(22);
PART(21);
PART(20);
PART(19);
PART(18);
PART(17);
PART(16);
#endif
PART(15);
PART(14);
PART(13);
PART(12);
PART(11);
PART(10);
PART(9);
PART(8);
PART(7);
PART(6);
PART(5);
PART(4);
PART(3);
PART(2);
PART(1);
}
#pragma optionNV(unroll all)
#endif
#endif
//copy back to lmem
for (int i = 0; i < MAX_REGISTERS; ++i)
if (i < count)
frags[offset+i] = registers[i];
//use to toggle compiling with registers or just testing the unroll
#if 0
frags[offset].x = registers[offset%MAX_REGISTERS].x;
#endif
}
/*
struct HeapNode {
LFB_FRAG_TYPE f;
int block, next;
};
HeapNode nodes[MERGE_SIZE];
HeapNode tmp;
#define CSWAP_HEAP(i, j) if (LFB_FRAG_DEPTH(nodes[i].f) < LFB_FRAG_DEPTH(nodes[j].f)) {tmp = nodes[i]; nodes[i] = nodes[j]; nodes[j] = tmp;}
#define CSWAP_HEAP_I(i, j) if (LFB_FRAG_DEPTH(nodes[i].f) < LFB_FRAG_DEPTH(nodes[j].f)) {tmp = nodes[i]; nodes[i] = nodes[j]; nodes[j] = tmp;
#if MERGE_SIZE <= 2
#define LOG_MERGE_SIZE 1
#elif MERGE_SIZE <= 4
#define LOG_MERGE_SIZE 2
#elif MERGE_SIZE <= 8
#define LOG_MERGE_SIZE 3
#elif MERGE_SIZE <= 16
#define LOG_MERGE_SIZE 4
#elif MERGE_SIZE <= 32
#define LOG_MERGE_SIZE 5
#endif
void heapConstruct(int mergeSize)
{
for (int i = MERGE_SIZE - 1; i >= 0 && i >= mergeSize; --i)
LFB_FRAG_DEPTH(nodes[i].f) = 99999.0;
for (int i = (MERGE_SIZE-2) / 2; i >= 0; --i)
{
CSWAP_HEAP(
}
}
void heapSiftDown()
{
CSWAP_HEAP_I(0, 1)
#if MERGE_SIZE > 2
CSWAP_HEAP_I(1, 2)
#if MERGE_SIZE > 4
CSWAP_HEAP_I(2, 4)
#if MERGE_SIZE > 8
CSWAP_HEAP(4, 8);
CSWAP_HEAP(4, 9);
#if MERGE_SIZE > 16
CSWAP_HEAP(8, 16);
CSWAP_HEAP(8, 17);
CSWAP_HEAP(9, 18);
CSWAP_HEAP(9, 19);
#endif
#endif
}
else CSWAP_HEAP_I(2, 5)
#if MERGE_SIZE > 8
CSWAP_HEAP(5, 10);
CSWAP_HEAP(5, 11);
#if MERGE_SIZE > 16
CSWAP_HEAP(10, 20);
CSWAP_HEAP(10, 21);
CSWAP_HEAP(11, 22);
CSWAP_HEAP(11, 23);
#endif
#endif
}
#endif
}
else CSWAP_HEAP_I(1, 3)
#if MERGE_SIZE > 4
CSWAP_HEAP_I(3, 6)
#if MERGE_SIZE > 8
CSWAP_HEAP(6, 12);
CSWAP_HEAP(6, 13);
#if MERGE_SIZE > 16
CSWAP_HEAP(12, 24);
CSWAP_HEAP(12, 25);
CSWAP_HEAP(13, 26);
CSWAP_HEAP(13, 27);
#endif
#endif
}
else CSWAP_HEAP_I(3, 7)
#if MERGE_SIZE > 8
CSWAP_HEAP(7, 14);
CSWAP_HEAP(7, 15);
#if MERGE_SIZE > 16
CSWAP_HEAP(14, 28);
CSWAP_HEAP(14, 29);
CSWAP_HEAP(15, 30);
CSWAP_HEAP(15, 31);
#endif
#endif
}
#endif
}
#endif
}
//CSWAP_HEAP(1, 0);
}
*/
void sortAndCompositeBlocks(int fragIndex)
{
//read into lmem
LFB_INIT(lfb, fragIndex);
int fragCount = 0;
LFB_FOREACH(lfb, frag)
if (fragCount < MAX_FRAGS)
{
FRAGS(fragCount) = frag;
++fragCount;
}
}
//sort blocks in registers
int mergeCount = 0;
for (int i = 0; i < fragCount; i += MAX_REGISTERS)
{
registerSortRange(i, min(fragCount - i, MAX_REGISTERS));
++mergeCount;
}
//prime the merge heads
int next[MERGE_SIZE];
for (int i = 0; i < MERGE_SIZE && i < mergeCount; ++i)
{
next[i] = min(fragCount, (i + 1) * MAX_REGISTERS) - 1;
registers[i] = FRAGS(next[i]);
}
#if DEBUG
if (fragCount > MAX_FRAGS)
{
//warning: hit max frags!
fragColour = vec4(1,0,1,1);
return;
}
float lastDepth = 99999.0;
#endif
//merge and composite blocks
fragColour = vec4(1.0);
for (int i = 0; i < fragCount; ++i)
{
int n; //I'll assume n *will* be set by the end of the loop
LFB_FRAG_TYPE f;
LFB_FRAG_DEPTH(f) = 0.0;
for (int j = 0; j < MERGE_SIZE; ++j)
{
if (next[j] >= j * MAX_REGISTERS)
{
if (LFB_FRAG_DEPTH(registers[j]) > LFB_FRAG_DEPTH(f))
{
f = registers[j];
n = j;
}
}
}
for (int j = 0; j < MERGE_SIZE; ++j)
if (n == j)
if (--next[j] >= j * MAX_REGISTERS)
registers[j] = FRAGS(next[j]);
#if DEBUG
float thisDepth = LFB_FRAG_DEPTH(f);
if (thisDepth > lastDepth)
{
//error: out of order!
fragColour = vec4(1,0,0,1);
return;
}
lastDepth = thisDepth;
#endif
vec4 col = floatToRGBA8(f.x); //extract rgba from rg
fragColour.rgb = mix(fragColour.rgb, col.rgb, col.a);
//fragColour.rgb += col.rgb * col.a * fragColour.a;
//fragColour.a *= (1.0 - col.a);
}
}
#else
void sortAndCompositeBlocks(int fragIndex)
{
//read into lmem
int fragCount = 0;
LFB_INIT(lfb, fragIndex);
LFB_ITER_BEGIN(lfb);
#define SHOULD_SWAP(a, b) (LFB_FRAG_DEPTH(a) > LFB_FRAG_DEPTH(b))
#define SWAP_FRAGS(a, b) {tmp = a; a = b; b = tmp;}
#define IF_CSWAP_I(a, b) if (SHOULD_SWAP(registers[a], registers[b])) {SWAP_FRAGS(registers[a], registers[b]);
#define HAS_FRAGS LFB_ITER_CONDITION(lfb)
#define GET_FRAG(i) registers[i] = LFB_GET(lfb); LFB_ITER_INC(lfb); ++count
LFB_FRAG_TYPE tmp;
//sort blocks in registers
int mergeCount = 0;
while (mergeCount < 16 && LFB_ITER_CONDITION(lfb))
{
int count = 0;
GET_FRAG(0);
if (HAS_FRAGS) {GET_FRAG(1); IF_CSWAP_I(0, 1)}
if (HAS_FRAGS) {GET_FRAG(2); IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}
if (HAS_FRAGS) {GET_FRAG(3); IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}
if (HAS_FRAGS) {GET_FRAG(4); IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}
if (HAS_FRAGS) {GET_FRAG(5); IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}
if (HAS_FRAGS) {GET_FRAG(6); IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}
if (HAS_FRAGS) {GET_FRAG(7); IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}
if (HAS_FRAGS) {GET_FRAG(8); IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(9); IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(10); IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(11); IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(12); IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(13); IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(14); IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(15); IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}
#if MAX_REGISTERS > 16
if (HAS_FRAGS) {GET_FRAG(16); IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(17); IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(18); IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(19); IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(20); IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(21); IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(22); IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(23); IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(24); IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(25); IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(26); IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(27); IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(28); IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(29); IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(30); IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (HAS_FRAGS) {GET_FRAG(31); IF_CSWAP_I(30, 31)IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}}}}}}}}}}}}}}}}
#endif
}}}}}}}} }}}}}}}
//copy to lmem
//for (int i = 0; i < MAX_REGISTERS && i < count; ++i)
// frags[fragCount + i] = registers[i];
#define CR(i) if (count > i) {frags[fragCount + i] = registers[i]
CR(0);CR(1);CR(2);CR(3);CR(4);CR(5);CR(6);CR(7);
CR(8);CR(9);CR(10);CR(11);CR(12);CR(13);CR(14);CR(15);
#if MAX_REGISTERS > 16
CR(16);CR(17);CR(18);CR(19);CR(20);CR(21);CR(22);CR(23);CR(24);CR(25);CR(26);CR(27);CR(28);CR(29);CR(30);CR(31);
}}}}}}}}}}}}}}}}
#endif
}}}}}}}}}}}}}}}}
fragCount += count;
++mergeCount;
}
int next[MERGE_SIZE];
for (int i = 0; i < MERGE_SIZE && i < mergeCount; ++i)
{
next[i] = min(fragCount, (i + 1) * MAX_REGISTERS) - 1;
registers[i] = FRAGS(next[i]);
}
#if DEBUG
if (fragCount == MAX_FRAGS)
{
//warning: hit max frags!
fragColour = vec4(1,0,1,1);
return;
}
float lastDepth = LFB_FRAG_DEPTH(FRAGS(fragCount-1));
#endif
//merge and composite blocks
fragColour = vec4(1.0);
for (int i = 0; i < fragCount; ++i)
{
int n; //I'll assume n *will* be set by the end of the loop
LFB_FRAG_TYPE f;
LFB_FRAG_DEPTH(f) = 0.0;
for (int j = 0; j < MERGE_SIZE; ++j)
{
if (next[j] >= j * MAX_REGISTERS)
{
if (LFB_FRAG_DEPTH(registers[j]) > LFB_FRAG_DEPTH(f))
{
f = registers[j];
n = j;
}
}
}
for (int j = 0; j < MERGE_SIZE; ++j)
if (n == j)
if (--next[j] >= j * MAX_REGISTERS)
registers[j] = FRAGS(next[j]);
#if DEBUG
float thisDepth = LFB_FRAG_DEPTH(f);
if (thisDepth > lastDepth)
{
//error: out of order!
fragColour = vec4(1,0,0,1);
return;
}
lastDepth = thisDepth;
#endif
vec4 col = floatToRGBA8(f.x); //extract rgba from rg
fragColour.rgb = mix(fragColour.rgb, col.rgb, col.a);
//fragColour.rgb += col.rgb * col.a * fragColour.a;
//fragColour.a *= (1.0 - col.a);
}
}
#endif
#endif
//RBS global memory
#if BLOCKSORT_GMEM
void sortAndCompositeBlocks(int fragIndex)
{
#if MERGE_SIZE > MAX_REGISTERS
#error Need more registers for merge cache
#endif
#undef SWAP_FRAGS
#define SWAP_FRAGS(a, b) {tmp = a; a = b; b = tmp;}
#undef IF_CSWAP_I
#define IF_CSWAP_I(i, j) \
if (LFB_FRAG_DEPTH(registers[i]) > LFB_FRAG_DEPTH(registers[j])) {SWAP_FRAGS(registers[i], registers[j])
int mergeSize = 0;
int next[MERGE_SIZE];
int left[MERGE_SIZE];
int reader, writer;
LFB_INIT(lfb, fragIndex);
reader = writer = LFB_EXPOSE_TABLE_GET(headPtrslfb, lfbTmplfb.fragIndex);
LFB_FRAG_TYPE tmp;
int fragCount = 0;
while (reader != 0 && fragCount < MAX_FRAGS)
{
//#pragma optionNV(unroll all)
int count = 0;
#if 1
for (int i = 0; i < MAX_REGISTERS && reader != 0; ++i)
{
registers[i] = LFB_EXPOSE_DATA_GET(datalfb, reader);
reader = LFB_EXPOSE_TABLE_GET(nextPtrslfb, reader);
++count;
}
#else
#define READ_FRAG(i) registers[i] = LFB_EXPOSE_DATA_GET(datalfb, reader); reader = LFB_EXPOSE_TABLE_GET(nextPtrslfb, reader); ++count;
READ_FRAG(0);
if (reader != 0) {READ_FRAG(1);
if (reader != 0) {READ_FRAG(2);
if (reader != 0) {READ_FRAG(3);
if (reader != 0) {READ_FRAG(4);
if (reader != 0) {READ_FRAG(5);
if (reader != 0) {READ_FRAG(6);
if (reader != 0) {READ_FRAG(7);
if (reader != 0) {READ_FRAG(8);
if (reader != 0) {READ_FRAG(9);
if (reader != 0) {READ_FRAG(10);
if (reader != 0) {READ_FRAG(11);
if (reader != 0) {READ_FRAG(12);
if (reader != 0) {READ_FRAG(13);
if (reader != 0) {READ_FRAG(14);
if (reader != 0) {READ_FRAG(15);
#if MAX_REGISTERS > 16
if (reader != 0) {READ_FRAG(16);
if (reader != 0) {READ_FRAG(17);
if (reader != 0) {READ_FRAG(18);
if (reader != 0) {READ_FRAG(19);
if (reader != 0) {READ_FRAG(20);
if (reader != 0) {READ_FRAG(21);
if (reader != 0) {READ_FRAG(22);
if (reader != 0) {READ_FRAG(23);
if (reader != 0) {READ_FRAG(24);
if (reader != 0) {READ_FRAG(25);
if (reader != 0) {READ_FRAG(26);
if (reader != 0) {READ_FRAG(27);
if (reader != 0) {READ_FRAG(28);
if (reader != 0) {READ_FRAG(29);
if (reader != 0) {READ_FRAG(30);
if (reader != 0) {READ_FRAG(31);
}}}}}}}}}}}}}}}}
#endif
}}}}}}}} }}}}}}}
#endif
fragCount += count;
#if 1
if (count > 1) {IF_CSWAP_I(0, 1)}
if (count > 2) {IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}
if (count > 3) {IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}
if (count > 4) {IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}
if (count > 5) {IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}
if (count > 6) {IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}
if (count > 7) {IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}
if (count > 8) {IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}
if (count > 9) {IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}
if (count > 10) {IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}
if (count > 11) {IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}
if (count > 12) {IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}
if (count > 13) {IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}
if (count > 14) {IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}
if (count > 15) {IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}
#if MAX_REGISTERS > 16
if (count > 16) {IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}
if (count > 17) {IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}
if (count > 18) {IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}
if (count > 19) {IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}
if (count > 20) {IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}
if (count > 21) {IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}
if (count > 22) {IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}
if (count > 23) {IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}
if (count > 24) {IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 25) {IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 26) {IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 27) {IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 28) {IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 29) {IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 30) {IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
if (count > 31) {IF_CSWAP_I(30, 31)IF_CSWAP_I(29, 30)IF_CSWAP_I(28, 29)IF_CSWAP_I(27, 28)IF_CSWAP_I(26, 27)IF_CSWAP_I(25, 26)IF_CSWAP_I(24, 25)IF_CSWAP_I(23, 24)IF_CSWAP_I(22, 23)IF_CSWAP_I(21, 22)IF_CSWAP_I(20, 21)IF_CSWAP_I(19, 20)IF_CSWAP_I(18, 19)IF_CSWAP_I(17, 18)IF_CSWAP_I(16, 17)IF_CSWAP_I(15, 16)IF_CSWAP_I(14, 15)IF_CSWAP_I(13, 14)IF_CSWAP_I(12, 13)IF_CSWAP_I(11, 12)IF_CSWAP_I(10, 11)IF_CSWAP_I(9, 10)IF_CSWAP_I(8, 9)IF_CSWAP_I(7, 8)IF_CSWAP_I(6, 7)IF_CSWAP_I(5, 6)IF_CSWAP_I(4, 5)IF_CSWAP_I(3, 4)IF_CSWAP_I(2, 3)IF_CSWAP_I(1, 2)IF_CSWAP_I(0, 1)}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}
}}}}}}}}}}}}}}}}
#endif
}}}}}}}} }}}}}}}
#else //bitonic
#define PAD_FRAG(i) LFB_FRAG_DEPTH(registers[i]) = 0.0;
#undef SWAP
#define SWAP(i, j) if (LFB_FRAG_DEPTH(registers[i]) < LFB_FRAG_DEPTH(registers[j])) SWAP_FRAGS(registers[i], registers[j])
if (count < 1) PAD_FRAG(0);
if (count < 2) PAD_FRAG(1);
if (count < 3) PAD_FRAG(2);
if (count < 4) PAD_FRAG(3);
if (count < 5) PAD_FRAG(4);
if (count < 6) PAD_FRAG(5);
if (count < 7) PAD_FRAG(6);
if (count < 8) PAD_FRAG(7);
#if MAX_REGISTERS > 8
if (count < 9) PAD_FRAG(8);
if (count < 10) PAD_FRAG(9);
if (count < 11) PAD_FRAG(10);
if (count < 12) PAD_FRAG(11);
if (count < 13) PAD_FRAG(12);
if (count < 14) PAD_FRAG(13);
if (count < 15) PAD_FRAG(14);
if (count < 16) PAD_FRAG(15);
#if MAX_REGISTERS > 16
if (count < 17) PAD_FRAG(16);
if (count < 18) PAD_FRAG(17);
if (count < 19) PAD_FRAG(18);
if (count < 20) PAD_FRAG(19);
if (count < 21) PAD_FRAG(20);
if (count < 22) PAD_FRAG(21);
if (count < 23) PAD_FRAG(22);
if (count < 24) PAD_FRAG(23);
if (count < 25) PAD_FRAG(24);
if (count < 26) PAD_FRAG(25);
if (count < 27) PAD_FRAG(26);
if (count < 28) PAD_FRAG(27);
if (count < 29) PAD_FRAG(28);
if (count < 30) PAD_FRAG(29);
if (count < 31) PAD_FRAG(30);
if (count < 32) PAD_FRAG(31);
#endif
#endif
SWAP(0, 1);SWAP(3, 2);SWAP(4, 5);SWAP(7, 6);
#if MAX_REGISTERS > 8
SWAP(8, 9);SWAP(11, 10);SWAP(12, 13);SWAP(15, 14);
#if MAX_REGISTERS > 16
SWAP(16, 17);SWAP(19, 18);SWAP(20, 21);SWAP(23, 22);SWAP(24, 25);SWAP(27, 26);SWAP(28, 29);SWAP(31, 30);
#endif
#endif
SWAP(0, 2);SWAP(1, 3);SWAP(6, 4);SWAP(7, 5);
#if MAX_REGISTERS > 8
SWAP(8, 10);SWAP(9, 11);SWAP(14, 12);SWAP(15, 13);
#if MAX_REGISTERS > 16
SWAP(16, 18);SWAP(17, 19);SWAP(22, 20);SWAP(23, 21);SWAP(24, 26);SWAP(25, 27);SWAP(30, 28);SWAP(31, 29);
#endif
#endif
SWAP(0, 1);SWAP(2, 3);SWAP(5, 4);SWAP(7, 6);
#if MAX_REGISTERS > 8
SWAP(8, 9);SWAP(10, 11);SWAP(13, 12);SWAP(15, 14);
#if MAX_REGISTERS > 16
SWAP(16, 17);SWAP(18, 19);SWAP(21, 20);SWAP(23, 22);SWAP(24, 25);SWAP(26, 27);SWAP(29, 28);SWAP(31, 30);
#endif
#endif
SWAP(0, 4);SWAP(1, 5);SWAP(2, 6);SWAP(3, 7);
#if MAX_REGISTERS > 8
SWAP(12, 8);SWAP(13, 9);SWAP(14, 10);SWAP(15, 11);
#if MAX_REGISTERS > 16
SWAP(16, 20);SWAP(17, 21);SWAP(18, 22);SWAP(19, 23);SWAP(28, 24);SWAP(29, 25);SWAP(30, 26);SWAP(31, 27);
#endif
#endif
SWAP(0, 2);SWAP(1, 3);SWAP(4, 6);SWAP(5, 7);
#if MAX_REGISTERS > 8
SWAP(10, 8);SWAP(11, 9);SWAP(14, 12);SWAP(15, 13);
#if MAX_REGISTERS > 16
SWAP(16, 18);SWAP(17, 19);SWAP(20, 22);SWAP(21, 23);SWAP(26, 24);SWAP(27, 25);SWAP(30, 28);SWAP(31, 29);
#endif
#endif
SWAP(0, 1);SWAP(2, 3);SWAP(4, 5);SWAP(6, 7);
#if MAX_REGISTERS > 8
SWAP(9, 8);SWAP(11, 10);SWAP(13, 12);SWAP(15, 14);
#if MAX_REGISTERS > 16
SWAP(16, 17);SWAP(18, 19);SWAP(20, 21);SWAP(22, 23);SWAP(25, 24);SWAP(27, 26);SWAP(29, 28);SWAP(31, 30);
#endif
#endif
#if MAX_REGISTERS > 8
SWAP(0, 8);SWAP(1, 9);SWAP(2, 10);SWAP(3, 11);SWAP(4, 12);SWAP(5, 13);SWAP(6, 14);SWAP(7, 15);
#if MAX_REGISTERS > 16
SWAP(24, 16);SWAP(25, 17);SWAP(26, 18);SWAP(27, 19);SWAP(28, 20);SWAP(29, 21);SWAP(30, 22);SWAP(31, 23);
#endif
SWAP(0, 4);SWAP(1, 5);SWAP(2, 6);SWAP(3, 7);SWAP(8, 12);SWAP(9, 13);SWAP(10, 14);SWAP(11, 15);
#if MAX_REGISTERS > 16
SWAP(20, 16);SWAP(21, 17);SWAP(22, 18);SWAP(23, 19);SWAP(28, 24);SWAP(29, 25);SWAP(30, 26);SWAP(31, 27);
#endif
SWAP(0, 2);SWAP(1, 3);SWAP(4, 6);SWAP(5, 7);SWAP(8, 10);SWAP(9, 11);SWAP(12, 14);SWAP(13, 15);
#if MAX_REGISTERS > 16
SWAP(18, 16);SWAP(19, 17);SWAP(22, 20);SWAP(23, 21);SWAP(26, 24);SWAP(27, 25);SWAP(30, 28);SWAP(31, 29);
#endif
SWAP(0, 1);SWAP(2, 3);SWAP(4, 5);SWAP(6, 7);SWAP(8, 9);SWAP(10, 11);SWAP(12, 13);SWAP(14, 15);
#if MAX_REGISTERS > 16
SWAP(17, 16);SWAP(19, 18);SWAP(21, 20);SWAP(23, 22);SWAP(25, 24);SWAP(27, 26);SWAP(29, 28);SWAP(31, 30);
#endif
#if MAX_REGISTERS > 16
SWAP(0, 16);SWAP(1, 17);SWAP(2, 18);SWAP(3, 19);SWAP(4, 20);SWAP(5, 21);SWAP(6, 22);SWAP(7, 23);SWAP(8, 24);SWAP(9, 25);SWAP(10, 26);SWAP(11, 27);SWAP(12, 28);SWAP(13, 29);SWAP(14, 30);SWAP(15, 31);
SWAP(0, 8);SWAP(1, 9);SWAP(2, 10);SWAP(3, 11);SWAP(4, 12);SWAP(5, 13);SWAP(6, 14);SWAP(7, 15);SWAP(16, 24);SWAP(17, 25);SWAP(18, 26);SWAP(19, 27);SWAP(20, 28);SWAP(21, 29);SWAP(22, 30);SWAP(23, 31);
SWAP(0, 4);SWAP(1, 5);SWAP(2, 6);SWAP(3, 7);SWAP(8, 12);SWAP(9, 13);SWAP(10, 14);SWAP(11, 15);SWAP(16, 20);SWAP(17, 21);SWAP(18, 22);SWAP(19, 23);SWAP(24, 28);SWAP(25, 29);SWAP(26, 30);SWAP(27, 31);
SWAP(0, 2);SWAP(1, 3);SWAP(4, 6);SWAP(5, 7);SWAP(8, 10);SWAP(9, 11);SWAP(12, 14);SWAP(13, 15);SWAP(16, 18);SWAP(17, 19);SWAP(20, 22);SWAP(21, 23);SWAP(24, 26);SWAP(25, 27);SWAP(28, 30);SWAP(29, 31);
SWAP(0, 1);SWAP(2, 3);SWAP(4, 5);SWAP(6, 7);SWAP(8, 9);SWAP(10, 11);SWAP(12, 13);SWAP(14, 15);SWAP(16, 17);SWAP(18, 19);SWAP(20, 21);SWAP(22, 23);SWAP(24, 25);SWAP(26, 27);SWAP(28, 29);SWAP(30, 31);
#endif
#endif
#endif
for (int i = 0; i < MERGE_SIZE; ++i)
{
if (i == mergeSize)
{
left[i] = count;
next[i] = writer;
}
}
++mergeSize;
#if 1
for (int i = 0; i < MAX_REGISTERS && writer != 0; ++i)
{
LFB_EXPOSE_DATA_SET(datalfb, writer, registers[i]);
writer = LFB_EXPOSE_TABLE_GET(nextPtrslfb, writer);
}
#else
#define WRITE_FRAG(i) LFB_EXPOSE_DATA_SET(datalfb, writer, registers[i]); writer = LFB_EXPOSE_TABLE_GET(nextPtrslfb, writer);
WRITE_FRAG(0);
if (writer != 0) {WRITE_FRAG(1);
if (writer != 0) {WRITE_FRAG(2);
if (writer != 0) {WRITE_FRAG(3);
if (writer != 0) {WRITE_FRAG(4);
if (writer != 0) {WRITE_FRAG(5);
if (writer != 0) {WRITE_FRAG(6);
if (writer != 0) {WRITE_FRAG(7);
if (writer != 0) {WRITE_FRAG(8);
if (writer != 0) {WRITE_FRAG(9);
if (writer != 0) {WRITE_FRAG(10);
if (writer != 0) {WRITE_FRAG(11);
if (writer != 0) {WRITE_FRAG(12);
if (writer != 0) {WRITE_FRAG(13);
if (writer != 0) {WRITE_FRAG(14);
if (writer != 0) {WRITE_FRAG(15);
#if MAX_REGISTERS > 16
if (writer != 0) {WRITE_FRAG(16);
if (writer != 0) {WRITE_FRAG(17);
if (writer != 0) {WRITE_FRAG(18);
if (writer != 0) {WRITE_FRAG(19);
if (writer != 0) {WRITE_FRAG(20);
if (writer != 0) {WRITE_FRAG(21);
if (writer != 0) {WRITE_FRAG(22);
if (writer != 0) {WRITE_FRAG(23);
if (writer != 0) {WRITE_FRAG(24);
if (writer != 0) {WRITE_FRAG(25);
if (writer != 0) {WRITE_FRAG(26);
if (writer != 0) {WRITE_FRAG(27);
if (writer != 0) {WRITE_FRAG(28);
if (writer != 0) {WRITE_FRAG(29);
if (writer != 0) {WRITE_FRAG(30);
if (writer != 0) {WRITE_FRAG(31);
}}}}}}}}}}}}}}}}
#endif
}}}}}}}} }}}}}}}
#endif
}
//prime the n-way merge cache
for (int i = 0; i < MERGE_SIZE && i < mergeSize; ++i)
{
registers[i] = LFB_EXPOSE_DATA_GET(datalfb, next[i]);
next[i] = LFB_EXPOSE_TABLE_GET(nextPtrslfb, next[i]);
}
//merge and composite blocks
fragColour = vec4(0.0, 0.0, 0.0, 1.0);
for (int i = 0; i < fragCount; ++i)
{
int n; //n *should* be set by the end of the loop
LFB_FRAG_TYPE f;
LFB_FRAG_DEPTH(f) = 999999.0;
for (int j = 0; j < MERGE_SIZE && j < mergeSize; ++j)
{
if (left[j] > 0 && LFB_FRAG_DEPTH(registers[j]) < LFB_FRAG_DEPTH(f))
{
f = registers[j];
n = j;
}
}
//FIXME: replace with switch?
for (int j = 0; j < MERGE_SIZE; ++j)
{
if (n == j)
{
registers[j] = LFB_EXPOSE_DATA_GET(datalfb, next[j]);
next[j] = LFB_EXPOSE_TABLE_GET(nextPtrslfb, next[j]);
--left[j];
}
}
vec4 col = floatToRGBA8(f.x); //extract rgba from rg
//fragColour.rgb = mix(fragColour.rgb, col.rgb, col.a);
fragColour.rgb += col.rgb * col.a * fragColour.a;
fragColour.a *= (1.0 - col.a);
}
fragColour.rgb += fragColour.a;
fragColour.a = 1.0;
}
#endif
#if BLOCKSORT_BASE
void sortBlock(int offset, int end)
{
for (int j = offset + 1; j < end; ++j)
{
LFB_FRAG_TYPE key = FRAGS(j);
int i = j;
while (i > offset && LFB_FRAG_DEPTH(FRAGS(i-1)) > LFB_FRAG_DEPTH(key))
{
FRAGS(i) = FRAGS(i-1);
--i;
}
FRAGS(i) = key;
}
}
void sortAndCompositeBlocks(int fragIndex)
{
LFB_INIT(lfb, fragIndex);
int fragCount = 0;
LFB_FOREACH(lfb, frag)
if (fragCount < MAX_FRAGS)
{
FRAGS(fragCount) = frag;
++fragCount;
}
}
#define BLOCK_SIZE 32
#if MAX_FRAGS > BLOCK_SIZE
#define BLOCK_MERGE_SIZE (MAX_FRAGS / BLOCK_SIZE)
#else
#define BLOCK_MERGE_SIZE 1
#endif
int mergeSize = (fragCount / BLOCK_SIZE) + (fragCount % BLOCK_SIZE == 0 ? 0 : 1);
int next[BLOCK_MERGE_SIZE];
for (int i = 0; i < mergeSize; ++i)
{
int end = min((i+1) * BLOCK_SIZE, fragCount);
sortBlock(i * BLOCK_SIZE, end);
next[i] = end - 1;
}
fragColour = vec4(1.0);
for (int i = 0; i < fragCount; ++i)
{
int n; //I'll assume n *will* be set by the end of the loop
LFB_FRAG_TYPE f;
LFB_FRAG_DEPTH(f) = 0.0;
for (int j = 0; j < mergeSize; ++j)
{
if (next[j] >= j * BLOCK_SIZE)
{
if (LFB_FRAG_DEPTH(frags[next[j]]) > LFB_FRAG_DEPTH(f))
{
f = FRAGS(next[j]);
n = j;
}
}
}
--next[n];
vec4 col = floatToRGBA8(f.x); //extract rgba from rg
//col.a = 0.1;
fragColour.rgb = mix(fragColour.rgb, col.rgb, col.a);
}
}
#endif