|
| 1 | +#include <stdio.h> |
| 2 | +#include <stdlib.h> |
| 3 | +#include <gmp.h> |
| 4 | +#include "clock.h" |
| 5 | + |
| 6 | +/* we rely on mask_t to be an integer type |
| 7 | + * with at least 2 ** MAX_DIMENSION bits |
| 8 | + */ |
| 9 | +#define MAX_DIMENSION (6) |
| 10 | +typedef unsigned long long mask_t; |
| 11 | + |
| 12 | +mask_t fullmask[MAX_DIMENSION + 1] = { |
| 13 | + 0x0000000000000001, |
| 14 | + 0x0000000000000003, |
| 15 | + 0x000000000000000f, |
| 16 | + 0x00000000000000ff, |
| 17 | + 0x000000000000ffff, |
| 18 | + 0x00000000ffffffff, |
| 19 | + 0xffffffffffffffff |
| 20 | +}; |
| 21 | +unsigned int step = 0; |
| 22 | +clock_t t0; |
| 23 | + |
| 24 | +mpz_t sum[MAX_DIMENSION + 1]; |
| 25 | +mpz_t scratch[MAX_DIMENSION + 1]; |
| 26 | + |
| 27 | +void init_gmp(void) { |
| 28 | + int i; |
| 29 | + for (i = 0; i <= MAX_DIMENSION; ++i) { |
| 30 | + mpz_init(sum[i]); |
| 31 | + mpz_init(scratch[i]); |
| 32 | + } |
| 33 | +} |
| 34 | + |
| 35 | +void clear_gmp(void) { |
| 36 | + int i; |
| 37 | + for (i = 0; i <= MAX_DIMENSION; ++i) { |
| 38 | + mpz_clear(sum[i]); |
| 39 | + mpz_clear(scratch[i]); |
| 40 | + } |
| 41 | +} |
| 42 | + |
| 43 | +/* |
| 44 | + * count_part(d, mask): set sum[d] to the number of ways the specified |
| 45 | + * shape can be partitioned into polyominoes of size at most 2. |
| 46 | + * The shape is some or all of a I<d>-dimensional hypercube of side 2; |
| 47 | + * I<mask> is a vector of C<2 ** d> bits specifying the unit I<d>-cubes |
| 48 | + * that are included in the shape. |
| 49 | + * |
| 50 | + * Overwrites scratch[0..d] and sum[0..d] during calculation. |
| 51 | + */ |
| 52 | +void count_part(int d, mask_t mask) { |
| 53 | + mask_t left, right, shared, u; |
| 54 | + if (d == 0) { |
| 55 | + mpz_set_ui(sum[d], 1); |
| 56 | + return; |
| 57 | + } |
| 58 | + left = mask & fullmask[d - 1]; |
| 59 | + right = mask >> (1 << (d - 1)); |
| 60 | + shared = left & right; |
| 61 | + |
| 62 | + u = 0; |
| 63 | + mpz_set_ui(sum[d], 0); |
| 64 | + while (1) { |
| 65 | + count_part(d - 1, left ^ u); |
| 66 | + mpz_set(scratch[d], sum[d - 1]); |
| 67 | + count_part(d - 1, right ^ u); |
| 68 | + mpz_mul(scratch[d], scratch[d], sum[d - 1]); |
| 69 | + mpz_add(sum[d], sum[d], scratch[d]); |
| 70 | + u = (u - shared) & shared; |
| 71 | + if (u == 0) |
| 72 | + break; |
| 73 | + } |
| 74 | +} |
| 75 | + |
| 76 | +/* |
| 77 | + * count_full(d): same as count_part(d, fullmask[d]) |
| 78 | + */ |
| 79 | +void count_full(int d) { |
| 80 | + mask_t lim, u; |
| 81 | + if (d == 0) { |
| 82 | + mpz_set_ui(sum[d], 1); |
| 83 | + return; |
| 84 | + } |
| 85 | + lim = fullmask[d - 1]; |
| 86 | + mpz_set_ui(sum[d], 0); |
| 87 | + for (u = 0; u <= lim; ++u) { |
| 88 | + count_part(d - 1, u); |
| 89 | + mpz_mul(scratch[d], sum[d - 1], sum[d - 1]); |
| 90 | + mpz_add(sum[d], sum[d], scratch[d]); |
| 91 | + if ((++step & 0xfffff) == 0) { |
| 92 | + gmp_printf("300 at (%d, %llx) sum is %Zu (%.2f)\n", |
| 93 | + d, u, sum[d], difftime(t0, curtime())); |
| 94 | + } |
| 95 | + } |
| 96 | +} |
| 97 | + |
| 98 | +#define LIM4 (1 << (1 << 4)) |
| 99 | +unsigned int cache4[LIM4]; /* the sum of these is 41025, int is ample room */ |
| 100 | +void init_cache4(void) { |
| 101 | + int u; |
| 102 | + for (u = 0; u < LIM4; ++u) { |
| 103 | + count_part(4, (mask_t)u); |
| 104 | + cache4[u] = mpz_get_ui(sum[4]); |
| 105 | + } |
| 106 | +} |
| 107 | + |
| 108 | +/* |
| 109 | + * count_5_part(mask): same as count_part(5, mask) |
| 110 | + * |
| 111 | + * Uses a prepared cache of results for d=4, to minimize recursion. |
| 112 | + */ |
| 113 | +void count_5_part(mask_t mask) { |
| 114 | + mask_t left, right, shared, u; |
| 115 | + left = mask & fullmask[4]; |
| 116 | + right = mask >> (1 << 4); |
| 117 | + shared = left & right; |
| 118 | + u = 0; |
| 119 | + mpz_set_ui(sum[5], 0); |
| 120 | + while (1) { |
| 121 | + mpz_add_ui(sum[5], sum[5], cache4[left ^ u] * cache4[right ^ u]); |
| 122 | + u = (u - shared) & shared; |
| 123 | + if (u == 0) |
| 124 | + break; |
| 125 | + } |
| 126 | +} |
| 127 | + |
| 128 | +/* |
| 129 | + * count_6_full(): same as count_full(6) |
| 130 | + * |
| 131 | + * Uses a prepared cache of results for d=4 (via count_5_part()) to reduce |
| 132 | + * recursion at a reasonable memory cost. |
| 133 | + */ |
| 134 | +void count_6_full(void) { |
| 135 | + mask_t lim, u; |
| 136 | + init_cache4(); |
| 137 | + lim = fullmask[5]; |
| 138 | + mpz_set_ui(sum[6], 0); |
| 139 | + for (u = 0; u <= lim; ++u) { |
| 140 | + count_5_part(u); |
| 141 | + mpz_mul(scratch[6], sum[5], sum[5]); |
| 142 | + mpz_add(sum[6], sum[6], scratch[6]); |
| 143 | + if ((++step & 0x3ffffff) == 0) { |
| 144 | + gmp_printf("300 at (%d, %llx) sum is %Zu (%.2f)\n", |
| 145 | + 6, u, sum[6], difftime(t0, curtime())); |
| 146 | + } |
| 147 | + } |
| 148 | +} |
| 149 | + |
| 150 | +/* |
| 151 | + * Calculates the number of ways a I<d>-dimensional hypercube of side 2 |
| 152 | + * can be partitioned into polyominoes of size at most 2. |
| 153 | + */ |
| 154 | +int main(int argc, char** argv) { |
| 155 | + int d; |
| 156 | + |
| 157 | + if (argc != 2) { |
| 158 | + fprintf(stderr, "usage: %s <dimension>\n", argv[0]); |
| 159 | + return 1; |
| 160 | + } |
| 161 | + d = atoi(argv[1]); |
| 162 | + if (d < 0 || d > MAX_DIMENSION) { |
| 163 | + fprintf(stderr, "dimension must be in the range 0 to %u\n", |
| 164 | + MAX_DIMENSION); |
| 165 | + return 1; |
| 166 | + } |
| 167 | + |
| 168 | + setup_clock(); |
| 169 | + t0 = curtime(); |
| 170 | + init_gmp(); |
| 171 | + (d == 6) ? count_6_full() : count_full(d); |
| 172 | + gmp_printf("200 a(2, %d) = %Zu (%.2f)\n", |
| 173 | + d, sum[d], difftime(t0, curtime())); |
| 174 | + clear_gmp(); |
| 175 | + return 0; |
| 176 | +} |
0 commit comments