You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
 
 
 
 

61 lines
1.7 KiB

  1. #include <stdio.h>
  2. #include <string.h>
  3. #include <stdlib.h>
  4. #include "utils/base64.h"
  5. // calculate distance between s1 and s2
  6. // -1 : failure (different sizes of the strings)
  7. int block_distance(const unsigned char* const s1, const unsigned char* const s2, const unsigned i_size)
  8. {
  9. int dist = 0;
  10. int counter = 0;
  11. for(counter=0; counter<i_size; ++counter)
  12. {
  13. // calc nb bits set
  14. unsigned char ch = s1[counter]^s2[counter];
  15. while(ch!=0)
  16. {
  17. dist++;
  18. ch &= ch - 1; // this is neet. Wegner technique for counting ones in binary computer
  19. // http://gurmeet.net/puzzles/fast-bit-counting-routines/
  20. }
  21. }
  22. return dist;
  23. }
  24. int choose_min_block_size(const unsigned char* const i_string,
  25. const unsigned long i_size,
  26. const unsigned i_min_block_size,
  27. const unsigned i_max_block_size)
  28. {
  29. unsigned block_size = 0;
  30. double min_dist=i_max_block_size*8;
  31. int min_block_size = i_max_block_size;
  32. // temporary results
  33. double tmp_dist1, tmp_dist2, tmp_dist3;
  34. tmp_dist1 = tmp_dist2 = tmp_dist3 = 0;
  35. for(block_size=i_min_block_size; block_size<=i_max_block_size; block_size++)
  36. {
  37. // take 4 blocks, calc distance, normalize and get min
  38. tmp_dist1 = block_distance(i_string, i_string+block_size, block_size);
  39. tmp_dist1 = tmp_dist1/(block_size*8);
  40. tmp_dist2 = block_distance(i_string+(block_size*2), i_string+(block_size*3), block_size);
  41. tmp_dist2 = tmp_dist2/(block_size*8);
  42. if(tmp_dist2<tmp_dist1)
  43. tmp_dist1 = tmp_dist2;
  44. if(tmp_dist1<min_dist) {
  45. min_dist = tmp_dist1;
  46. min_block_size = block_size;
  47. }
  48. // printf("DIST: %f BS: %d\n", tmp_dist1, block_size);
  49. }
  50. return min_block_size;
  51. }