1 # Script to optimize SSYRK
12 dumpcode Interchange.1
14 # =================================================
17 PT_tile3d({0}, 1,2,3, 51,52,50)
18 dumpcode afterCacheTile
19 # Now we have jcT, kcT, icT, j, k, i
23 PT_tile3d({0}, 4,5,6, 3,4,50)
24 dumpcode ssyrk.afterRegTile
25 # Now we have jcT, kcT, icT, jrT, krT, irT, j, k, i
29 PT_interchangebystmt(1,8,9)
30 dumpcode interchange.2
31 # Now we have jcT, kcT, icT, jrT, krT, irT, j, i, k
34 PT_interchangebystmt(1,7,8)
35 dumpcode interchange.3
36 # Now we have jcT, kcT, icT, jrT, krT, irT, i, j, k
39 # Compute dependences without ADA and RAR
40 #PT_depcompute(0,0,0,0)
43 # Detect parallel loops
44 CG_codegenopt(markparallel, {{0}}, 0, BMT_exact)
45 #CG_codegenopt(detectparallelism, {{0}}, BMT_exact)
48 # NOTE: outer-tile sizes should be a multiple of inner-tile sizes.
49 CG_codegenopt(insetsplitandunrollwithtrans, {{0,0,0,0,0,0,0}}, {7,8,9}, {6,4,5}, {50,3,4}, 2, {{0,1,0}, {0,0,1}, {1,0,0}}, BMT_exact)