By Manuel Wiesinger (0825632) and Florian Schweikert (0825224)
tsc | branch mis. | L2 accesses | u+sys instr | u cycles | sys cycles | |
---|---|---|---|---|---|---|
Original | 5959477640 | 8715955 | 31014072 | 2208801396 | 5396134850 | 563609274 |
Code Motion Out of Loops (cube-only) | 5942767490 | 8714386 | 30964939 | 2208228686 | 5383009066 | 560006140 |
Precompute Logical Functions | 5966822300 | 8718101 | 31061537 | 2208721466 | 5401281670 | 565803216 |
Short circuiting Monotone Functions | 5948175840 | 8714332 | 30991731 | 2207619456 | 5388212693 | 560236818 |
Code Motion Out of Loops (malloc) | 4022532060 | 5714512 | 33334407 | 718810209 | 3569633339 | 453076908 |
Defer init | 4029017660 | 5716107 | 33383267 | 719205239 | 3575807222 | 453386998 |
cube to makro | 3751759080 | 4943210 | 33293716 | 680737223 | 3299406269 | 452554596 |
hash to makro | 3705133350 | 4972240 | 33270328 | 568827250 | 3254863619 | 450430868 |
In detail
68 for (i=0; cube(i)<=n; i++) 69 for (j=i+1; cube(i)+cube(j)<=n; j++) { 70 long sum = cube(i)+cube(j); 71 struct node **sumnodepp = lookup(sum,table,table_size); 72 if (*sumnodepp != NULL) { 73 (*sumnodepp)->count++; 74 if ((*sumnodepp)->count==2) /* don't count additional hits */ 75 count++; 76 } else { 77 struct node *new = malloc(sizeof(struct node)); 78 new->next = NULL; 79 new->value = sum; 80 new->count = 1; 81 *sumnodepp = new; 82 occupation++; 83 } 84 }
70 for (i=0; cubei<=n; i++) { 71 cubei = cube(i); 72 cubej = cube(i+1); 73 74 sum = cubei + cubej; 75 for (j=i+1; sum<=n; j++) { 76 struct node **sumnodepp = lookup(sum,table,table_size); 77 if (*sumnodepp != NULL) { 78 (*sumnodepp)->count++; 79 if ((*sumnodepp)->count==2) /* don't count additional hits */ 80 count++; 81 } else { 82 struct node *new = malloc(sizeof(struct node)); 83 new->next = NULL; 84 new->value = sum; 85 new->count = 1; 86 *sumnodepp = new; 87 occupation++; 88 } 89 cubej = cube(j+1); 90 sum = cubei + cubej; 91 } 92 }
15,16d14 < #define factor (2.0/(3.0*log(2.0))) < 32c30 < return 1<<(long)(log((double)n)* factor); --- > return 1<<(long)(log((double)n)*(2.0/(3.0*log(2.0)))); 61,62d58
We analised the inner loop, it is called 1918 times unnecessarly by the outer loop. (for n= 10^11)
72 for (i=0; cubei<=n; i++) { 73 cubei = cube(i); 74 cubej = cube(i+1); 75 76 sum = cubei + cubej; 77 if (sum>n) 78 break; 79 80 for (j=i+1; sum<=n; j++) { 81 struct node **sumnodepp = lookup(sum,table,table_size); 82 if (*sumnodepp != NULL) { 83 (*sumnodepp)->count++; 84 if ((*sumnodepp)->count==2) /* don't count additional hits */ 85 count++; 86 } else { 87 struct node *new = malloc(sizeof(struct node)); 88 new->next = NULL; 89 new->value = sum; 90 new->count = 1; 91 *sumnodepp = new; 92 occupation++; 93 } 94 cubej = cube(j+1); 95 sum = cubei + cubej; 96 } 97 }
72 int c = 0; 73 for(i=0;cubei <= n; i++) { 74 cubei = cube(i); 75 cubej = cube(i+1); 76 sum = cubei + cubej; 77 if (sum>n) 78 break; 79 for(j=i+1; sum <= n; j++ ) { 80 c++; 81 82 cubej = cube(j+1); 83 sum = cubei + cubej; 84 } 85 } 86 87 struct node *new = malloc(c*sizeof(struct node)); 88 89 cubei = 0; 90 cubej = 0; 91 sum = 0; 92 93 for (i=0; cubei<=n; i++) { 94 cubei = cube(i); 95 cubej = cube(i+1); 96 97 sum = cubei + cubej; 98 if (sum>n) 99 break; 100 101 for (j=i+1; sum<=n; j++) { 102 struct node **sumnodepp = lookup(sum,table,table_size); 103 if (*sumnodepp != NULL) { 104 (*sumnodepp)->count++; 105 if ((*sumnodepp)->count==2) /* don't count additional hits */ 106 count++; 107 } else { 108 // struct node *new = malloc(sizeof(struct node)); 109 new++; 110 new->next = NULL; 111 new->value = sum; 112 new->count = 1; 113 *sumnodepp = new; 114 occupation++; 115 } 116 cubej = cube(j+1); 117 sum = cubei + cubej; 118 } 119 }
-32.5 % compared to the original
-37.8 % compared to the original