By Manuel Wiesinger (0825632) and Florian Schweikert (0825224)

C-File

Original

  tsc branch mis. L2 accesses u+sys instr u cycles sys cycles
Original 5959477640 8715955 31014072 2208801396 5396134850 563609274
Code Motion Out of Loops (cube-only) 5942767490 8714386 30964939 2208228686 5383009066 560006140
Precompute Logical Functions 5966822300 8718101 31061537 2208721466 5401281670 565803216
Short circuiting Monotone Functions 5948175840 8714332 30991731 2207619456 5388212693 560236818
Code Motion Out of Loops (malloc) 4022532060 5714512 33334407 718810209 3569633339 453076908
Defer init 4029017660 5716107 33383267 719205239 3575807222 453386998
cube to makro 3751759080 4943210 33293716 680737223 3299406269 452554596
hash to makro 3705133350 4972240 33270328 568827250 3254863619 450430868

In detail

Original

68   for (i=0; cube(i)<=n; i++)
69     for (j=i+1; cube(i)+cube(j)<=n; j++) {
70       long sum = cube(i)+cube(j);
71       struct node **sumnodepp = lookup(sum,table,table_size);
72       if (*sumnodepp != NULL) {
73         (*sumnodepp)->count++;
74         if ((*sumnodepp)->count==2) /* don't count additional hits */
75           count++;
76       } else {
77         struct node *new = malloc(sizeof(struct node));
78         new->next = NULL;
79         new->value = sum;
80         new->count = 1;
81         *sumnodepp = new;
82         occupation++;
83       }
84     }

Code Motion Out of Loops (cube-only)

70   for (i=0; cubei<=n; i++) {
71     cubei = cube(i);
72     cubej = cube(i+1);
73
74     sum = cubei + cubej;
75     for (j=i+1; sum<=n; j++) {
76       struct node **sumnodepp = lookup(sum,table,table_size);
77       if (*sumnodepp != NULL) {
78         (*sumnodepp)->count++;
79         if ((*sumnodepp)->count==2) /* don't count additional hits */
80           count++;
81       } else {
82         struct node *new = malloc(sizeof(struct node));
83         new->next = NULL;
84         new->value = sum;
85         new->count = 1;
86         *sumnodepp = new;
87         occupation++;
88       }
89       cubej = cube(j+1);
90       sum = cubei + cubej;
91     }
92   }

Precompute Logical Functions

15,16d14
< #define factor (2.0/(3.0*log(2.0)))
<
32c30
<   return 1<<(long)(log((double)n)* factor);
---
>   return 1<<(long)(log((double)n)*(2.0/(3.0*log(2.0))));
61,62d58

Short circuiting Monotone Functions

We analised the inner loop, it is called 1918 times unnecessarly by the outer loop. (for n= 10^11)

72   for (i=0; cubei<=n; i++) {
73     cubei = cube(i);
74     cubej = cube(i+1);
75
76     sum = cubei + cubej;
77     if (sum>n)
78       break;
79
80     for (j=i+1; sum<=n; j++) {
81       struct node **sumnodepp = lookup(sum,table,table_size);
82       if (*sumnodepp != NULL) {
83         (*sumnodepp)->count++;
84         if ((*sumnodepp)->count==2) /* don't count additional hits */
85           count++;
86       } else {
87         struct node *new = malloc(sizeof(struct node));
88         new->next = NULL;
89         new->value = sum;
90         new->count = 1;
91         *sumnodepp = new;
92         occupation++;
93       }
94       cubej = cube(j+1);
95       sum = cubei + cubej;
96     }
97   }

Code Motion Out of Loops (malloc)

72   int c = 0;
73   for(i=0;cubei <= n; i++) {
74     cubei = cube(i);
75     cubej = cube(i+1);
76     sum = cubei + cubej;
77     if (sum>n)
78       break;
79     for(j=i+1; sum <= n; j++ ) {
80       c++;
81
82       cubej = cube(j+1);
83       sum = cubei + cubej;
84     }
85   }
86
87   struct node *new = malloc(c*sizeof(struct node));
88
89   cubei = 0;
90   cubej = 0;
91   sum = 0;
92
93   for (i=0; cubei<=n; i++) {
94     cubei = cube(i);
95     cubej = cube(i+1);
96
97     sum = cubei + cubej;
98     if (sum>n)
99       break;
100
101     for (j=i+1; sum<=n; j++) {
102       struct node **sumnodepp = lookup(sum,table,table_size);
103       if (*sumnodepp != NULL) {
104         (*sumnodepp)->count++;
105         if ((*sumnodepp)->count==2) /* don't count additional hits */
106           count++;
107       } else {
108  //       struct node *new = malloc(sizeof(struct node));
109         new++;
110         new->next = NULL;
111         new->value = sum;
112         new->count = 1;
113         *sumnodepp = new;
114         occupation++;
115       }
116       cubej = cube(j+1);
117       sum = cubei + cubej;
118     }
119   }

-32.5 % compared to the original

Total

-37.8 % compared to the original