|
- #include <stdio.h>
- #include <string.h>
- #include <stdlib.h>
- #include "utils/base64.h"
-
- // calculate distance between s1 and s2
- // -1 : failure (different sizes of the strings)
- int block_distance(const unsigned char* const s1, const unsigned char* const s2, const unsigned i_size)
- {
- int dist = 0;
- int counter = 0;
-
- for(counter=0; counter<i_size; ++counter)
- {
- // calc nb bits set
- unsigned char ch = s1[counter]^s2[counter];
- while(ch!=0)
- {
- dist++;
- ch &= ch - 1; // this is neet. Wegner technique for counting ones in binary computer
- // http://gurmeet.net/puzzles/fast-bit-counting-routines/
- }
- }
-
- return dist;
- }
-
- int choose_min_block_size(const unsigned char* const i_string,
- const unsigned long i_size,
- const unsigned i_min_block_size,
- const unsigned i_max_block_size)
- {
- unsigned block_size = 0;
- double min_dist=i_max_block_size*8;
- int min_block_size = i_max_block_size;
-
- // temporary results
- double tmp_dist1, tmp_dist2, tmp_dist3;
- tmp_dist1 = tmp_dist2 = tmp_dist3 = 0;
-
- for(block_size=i_min_block_size; block_size<=i_max_block_size; block_size++)
- {
- // take 4 blocks, calc distance, normalize and get min
-
- tmp_dist1 = block_distance(i_string, i_string+block_size, block_size);
- tmp_dist1 = tmp_dist1/(block_size*8);
- tmp_dist2 = block_distance(i_string+(block_size*2), i_string+(block_size*3), block_size);
- tmp_dist2 = tmp_dist2/(block_size*8);
-
- if(tmp_dist2<tmp_dist1)
- tmp_dist1 = tmp_dist2;
- if(tmp_dist1<min_dist) {
- min_dist = tmp_dist1;
- min_block_size = block_size;
- }
- // printf("DIST: %f BS: %d\n", tmp_dist1, block_size);
-
- }
- return min_block_size;
- }
|