merge3.c

   1 /*
   2         Prueba de concepto de buscador de diferencias por bloques.
   3 */
   4
   5 #include <gcrypt.h>
   6 #include <stdio.h>
   7 #include <stdlib.h>
   8 #include <ctype.h>
   9 #include <string.h>
  10 // Tipos de hashes :
  11 // GCRY_MD_CRC32 - CRC32, sencillo, rápido.
  12 // GCRY_MD_SHA1 - SHA1 potente y seguro.
  13 #define HASH_TYPE GCRY_MD_CRC32
  14 #define HASH_TYPE2 GCRY_MD_SHA1
  15
  16
  17 // Establece cual es el ancho máximo de línea para trabajar.
  18 // Establecer como mínimo a 90. Recomendado 128 o 256.
  19 #define MAX_LINE 256
  20 #define MAX_LOADED_LINES 8096
  21
  22 const int block_size=8;
  23
  24 int hashfile1[MAX_LOADED_LINES];
  25 int hashfile1_size=0;
  26
  27 int hbfile1[MAX_LOADED_LINES];
  28 int hbfile1_size=0;
  29
  30
  31 unsigned int ihash(char *txt);
  32 unsigned int ihash2(unsigned int *data,int nblocks);
  33 int hash_loadfile(char *filename);
  34
  35 int main(int argc, char **argv) {
  36         // Comprobar la entrada de argumentos.
  37         if ( argc < 2 ) {
  38                 fprintf( stderr, "Usage: %s <string>\n", argv[0] );
  39                 fprintf( stderr, "Usage: %s -f <filename>\n", argv[0] );
  40                 exit( 1 );
  41         }
  42         if (argc > 2 && strcmp(argv[1],"-f")==0)
  43         {
  44                 hash_loadfile(argv[2]);
  45                 return 1;
  46         }
  47         int i,b;
  48         int nhashes=argc-1;
  49         unsigned int *int_hash=malloc((nhashes)*sizeof(unsigned int));
  50         fprintf( stderr, "nhashes: %d\n",nhashes );
  51         for (i=1;i<argc;i++)
  52         {
  53                 int_hash[i-1]=ihash(argv[i]);
  54         }
  55
  56         for (i=0;i<nhashes-block_size+1;i++)
  57         {
  58                 for (b=0;b<block_size;b++)
  59                 {
  60                         printf( "%08X", int_hash[i+b]);
  61                 }
  62                 printf( "\n");
  63         }
  64         free(int_hash);
  65         return 1;
  66 }
  67
  68 void reducetext(char * txt) {
  69
  70         int n=0, nn=0;
  71         char newline[256];
  72         char lastc=0;
  73         char c=txt[n];
  74         char type=0; // Tipos de palabras o grupos:
  75         // a -> texto, variable.
  76         // 1 -> números, con, sin decimales.
  77         // % -> símbolos unarios, binarios.
  78         // 0 -> huecos y espacios
  79
  80         for (n=0;n<MAX_LINE;n++) {
  81                 c=tolower(txt[n]); // Captura del carácter en minúscula.
  82
  83                 // Traducción del carácter.
  84                 switch(c)
  85                 {
  86                         // Retonos de carro y fin de fichero: salir de la función.
  87                         case 10:
  88                         case 13:
  89                         case 0:
  90                                 n=MAX_LINE; continue;
  91                         // Tabuladores y espacios: cuentan como espacio.
  92                         case ' ':
  93                         case '\t':
  94                                 c=' '; break;
  95                         // Acentos.
  96                         case 'á': c='a'; break;
  97                         case 'é': c='e'; break;
  98                         case 'í': c='i'; break;
  99                         case 'ó': c='o'; break;
 100                         case 'ú': c='u'; break;
 101                 }
 102
 103                 switch(type) // Cambios de tipos según algunos datos.
 104                 {
 105                         case 0: // Segun si estábamos en un espacio.
 106                                 if (c>='0' && c<='9') {
 107                                         type='1';
 108                                 } else if (isalpha(c)) {
 109                                         type='a';
 110                                 } else type='%';
 111                         break;
 112                         case '1': // Segun si estábamos en un espacio.
 113                                 if (c>='0' && c<='9') {
 114                                         type='1';
 115                                 } else if (c=='.') {
 116                                         type='1';
 117                                 } else if (isalpha(c)) {
 118                                         type='a';
 119                                 } else type='%';
 120                         break;
 121                         case 'a': // Segun si estábamos en un espacio.
 122                                 if (c>='0' && c<='9') {
 123                                         type='a';
 124                                 } else if (isalpha(c)) {
 125                                         type='a';
 126                                 } else type='%';
 127                         break;
 128                         default:
 129                         case '%':
 130                                 if (c==' ') {
 131                                         continue;
 132                                 } else
 133                                 if (c>='0' && c<='9') {
 134                                         type='1';
 135                                 } else if (isalpha(c)) {
 136                                         type='a';
 137                                 } else type='%';
 138                         break;
 139                 }
 140                 if (c==' ') type=0;
 141                 if (!nn && c==' ') continue; // Si está tabulando al inicio, tampoco tiene efecto.
 142                 if (c==lastc && (type=='a' || type==0)) continue; // Desperdiciar letras repetidas.
 143
 144                 if (type=='%' && newline[nn-1]==' ') nn--;
 145                 newline[nn]=c;
 146
 147                 nn++;
 148                 lastc=c;
 149         }
 150         newline[nn]=0;
 151         strcpy(txt,newline);
 152
 153 }
 154
 155 int hash_loadfile(char *filename) {
 156         char *nombre=filename, linea[MAX_LINE];
 157         FILE *fichero;
 158
 159         fichero = fopen( nombre, "r" );
 160         printf( "Fichero: %s -> ", nombre );
 161         if( !fichero ) {
 162                 printf( "Error (NO ABIERTO)\n" );
 163                 return 1;
 164         }
 165         char *txt;
 166         /*
 167 int hashfile1[MAX_LOADED_LINES];
 168 int hashfile1_size=0;
 169
 170 int hbfile1[MAX_LOADED_LINES];
 171 int hbfile1_size=0;
 172         */
 173         hashfile1_size=0;
 174
 175         while (txt=fgets(linea, MAX_LINE, fichero)) {
 176                 reducetext(txt);
 177                 hashfile1[hashfile1_size++]=ihash(txt);
 178                 // printf( "%d:%08X >%s\n", n,hashfile1[hashfile1_size-1],txt);
 179                 hbfile1_size=hashfile1_size-block_size;
 180                 if (hbfile1_size>=0 && hbfile1_size<MAX_LOADED_LINES)
 181                 {
 182                         hbfile1[hbfile1_size]=ihash2(hashfile1+hbfile1_size,block_size);
 183                         hbfile1_size++;
 184                 }
 185         }
 186
 187         if( fclose(fichero)!=0 ) {
 188                 printf( "\nError: fichero NO CERRADO\n" );
 189                 return 1;
 190         }
 191         int i;
 192         for (i=0;i<hbfile1_size;i++)
 193         {
 194                 printf( "%08X\n", hbfile1[i]);
 195         }
 196
 197         return 0;
 198
 199 }
 200
 201 // ihash calcula un hash de 32 bits para un texto.
 202 unsigned int ihash(char *txt) {
 203         // Longitud del mensaje a cifrar
 204         int msg_len = strlen( txt );
 205
 206         //  Longitud del hash resultante - gcry_md_get_algo_dlen
 207         // devuelve la longitud del resumen hash para un algoritmo
 208         int hash_len = gcry_md_get_algo_dlen( HASH_TYPE );
 209
 210         // Salida del hash SHA1 - esto serán datos binarios
 211         unsigned char hash[ hash_len ];
 212
 213         // Calcular el resumen SHA1. Esto es una especie de función-atajo,
 214         // ya que la mayoría de funciones gcrypt requieren
 215         // la creación de un handle, etc.
 216         gcry_md_hash_buffer( HASH_TYPE, hash, txt, msg_len );
 217
 218         //      unsigned int ihash=*((unsigned int *)hash);
 219         return *((unsigned int *)hash);
 220 }
 221
 222 // ihash2 calcula un hash fuerte de 32 bits para un array de hashes
 223 unsigned int ihash2(unsigned int *data,int nblocks) {
 224         // Longitud del mensaje a cifrar
 225         int msg_len = nblocks * sizeof(unsigned int) ;
 226
 227         //  Longitud del hash resultante - gcry_md_get_algo_dlen
 228         // devuelve la longitud del resumen hash para un algoritmo
 229         int hash_len = gcry_md_get_algo_dlen( HASH_TYPE2 );
 230
 231         // Salida del hash SHA1 - esto serán datos binarios
 232         unsigned char hash[ hash_len ];
 233
 234         // Calcular el resumen SHA1. Esto es una especie de función-atajo,
 235         // ya que la mayoría de funciones gcrypt requieren
 236         // la creación de un handle, etc.
 237         gcry_md_hash_buffer( HASH_TYPE2, hash, data, msg_len );
 238
 239         //      unsigned int ihash=*((unsigned int *)hash);
 240         return *((unsigned int *)hash);
 241 }