2 Data set collection manipulation functions.
4 A data set file is defined as 4 bytes (unsigned int) indicating the
5 number of training cases, followed by the elements of struct
8 The examples are stored unique and invariant of flips and rotations, when
9 loaded via the data_set_load function they are fliped and rotated to increase
28 static u32 data_set_size
;
29 static training_example
** data_set
= NULL
;
32 Shuffle all first num entries.
35 void data_set_shuffle(
38 assert(num
> 1 && num
<= data_set_size
);
41 for (i
= num
- 1; i
> 0; --i
) {
42 u32 j
= rand_u32(i
+ 1);
43 training_example
* tmp
= data_set
[i
];
44 data_set
[i
] = data_set
[j
];
53 void data_set_shuffle_all() {
54 data_set_shuffle(data_set_size
);
58 Read a data set and shuffle it.
59 RETURNS table set size (number of cases)
62 return data_set_load2(UINT32_MAX
);
66 Read a data set, with a maximum size, and shuffles it.
67 RETURNS table set size (number of cases)
72 assert(data_set
== NULL
);
74 char * filename
= alloc();
75 snprintf(filename
, MAX_PAGE_SIZ
, "%s%dx%d.ds", data_folder(), BOARD_SIZ
, BOARD_SIZ
);
76 FILE * fp
= fopen(filename
, "rb");
80 flog_crit("dset", "could not open file for reading\n");
84 size_t r
= fread(&ds_elems
, sizeof(u32
), 1, fp
);
87 flog_crit("dset", "communication failure\n");
92 ds_elems
= MIN(ds_elems
, max
);
94 data_set
= malloc(sizeof(training_example
*) * ds_elems
* 8);
95 if (data_set
== NULL
) {
96 flog_crit("dset", "system out of memory\n");
101 for (i
= 0; i
< ds_elems
; ++i
) {
102 data_set
[insert
] = malloc(sizeof(training_example
));
104 if (data_set
[insert
] == NULL
) {
105 flog_crit("dset", "system out of memory (1)\n");
108 r
= fread(data_set
[insert
], sizeof(training_example
), 1, fp
);
110 u32 base_insert
= insert
;
114 Generate more (0-7) cases from reduced ones
117 for (d8 r
= 2; r
< 9; ++r
) {
118 memcpy(&tmp
.p
, &data_set
[base_insert
]->p
, TOTAL_BOARD_SIZ
);
119 tmp
.last_played
= tmp
.last_eaten
= NONE
;
120 reduce_fixed(&tmp
, r
);
122 bool repeated
= false;
123 for (u32 j
= base_insert
; j
< insert
; ++j
) {
124 if (memcmp(&tmp
.p
, &data_set
[j
]->p
, TOTAL_BOARD_SIZ
) == 0) {
134 data_set
[insert
] = malloc(sizeof(training_example
));
135 if (data_set
[insert
] == NULL
) {
136 flog_crit("dset", "system out of memory (2)\n");
139 memcpy(&data_set
[insert
]->p
, &tmp
.p
, TOTAL_BOARD_SIZ
);
141 data_set
[insert
]->m
= data_set
[base_insert
]->m
;
142 data_set
[insert
]->m
= reduce_move(data_set
[insert
]->m
, r
);
146 data_set_size
= insert
;
151 data_set_shuffle_all();
154 snprintf(s
, MAX_PAGE_SIZ
, "Data set loaded with %u examples, yielding %u examples\n", ds_elems
, data_set_size
);
155 flog_info("dset", s
);
157 return data_set_size
;
161 Get a specific data set element by position.
163 training_example
* data_set_get(
166 return data_set
[pos
];