util.c

   1 #include <gcrypt.h>
   2 #include <ctype.h>
   3 #include <string.h>
   4
   5 #include <stdio.h>
   6 #include <stdlib.h>
   7 #include <dlfcn.h>
   8
   9 #include "util.h"
  10
  11 int loadsplitplugin(void) {
  12         void *handle;
  13
  14         int (*split)(int);
  15         const char *error;
  16         handle = dlopen ("./plugin-split-lines.so", RTLD_LAZY);
  17         if (!handle) {
  18                 fputs (dlerror(), stderr);
  19                 return 0;
  20         }
  21
  22         split = dlsym(handle, "split");
  23         if ((error = dlerror()) != NULL)  {
  24                 fputs(error, stderr);
  25                 return 0;
  26         }
  27
  28         printf ("%d\n", (*split)(1));
  29         printf ("%d\n", (*split)(2));
  30         printf ("%d\n", (*split)(3));
  31         dlclose(handle);
  32
  33         return 1;
  34 }
  35
  36 unsigned int *hash_loadfile(char *filename, int *size) {
  37         char *nombre=filename, linea[MAX_LINE];
  38         FILE *fichero;
  39         fichero = fopen( nombre, "r" );
  40
  41         if( !fichero ) {
  42                 printf( "Error (NO ABIERTO)\n" );
  43                 return NULL;
  44         }
  45         char *txt;
  46
  47         int lines;
  48         for (lines=0;fgets(linea, MAX_LINE, fichero); lines++);
  49         rewind(fichero);
  50         *size=lines;
  51         unsigned int *data=malloc(lines*sizeof(unsigned int*));
  52         int line=0;
  53         while (txt=fgets(linea, MAX_LINE, fichero)) {
  54                 reducetext(txt);
  55                 data[line++]=ihash(txt);
  56         }
  57
  58         if( fclose(fichero)!=0 ) {
  59                 printf( "\nError: fichero NO CERRADO\n" );
  60                 return NULL;
  61         }
  62
  63         return data;
  64
  65 }
  66
  67 // ihash calcula un hash de 32 bits para un texto.
  68 unsigned int ihash(char *txt) {
  69         // Longitud del mensaje a cifrar
  70         int msg_len = strlen( txt );
  71
  72         //  Longitud del hash resultante - gcry_md_get_algo_dlen
  73         // devuelve la longitud del resumen hash para un algoritmo
  74         int hash_len = gcry_md_get_algo_dlen( HASH_TYPE );
  75
  76         // Salida del hash SHA1 - esto serán datos binarios
  77         unsigned char hash[ hash_len ];
  78
  79         // Calcular el resumen SHA1. Esto es una especie de función-atajo,
  80         // ya que la mayoría de funciones gcrypt requieren
  81         // la creación de un handle, etc.
  82         gcry_md_hash_buffer( HASH_TYPE, hash, txt, msg_len );
  83
  84         //      unsigned int ihash=*((unsigned int *)hash);
  85         return *((unsigned int *)hash);
  86 }
  87
  88
  89 void reducetext(char * txt) {
  90
  91         int n=0, nn=0;
  92         char newline[256];
  93         char lastc=0;
  94         char c=txt[n];
  95         char type=0; // Tipos de palabras o grupos:
  96         // a -> texto, variable.
  97         // 1 -> números, con, sin decimales.
  98         // % -> símbolos unarios, binarios.
  99         // 0 -> huecos y espacios
 100
 101         for (n=0;n<MAX_LINE;n++) {
 102                 c=tolower(txt[n]); // Captura del carácter en minúscula.
 103
 104                 // Traducción del carácter.
 105                 switch(c)
 106                 {
 107                         // Retonos de carro y fin de fichero: salir de la función.
 108                         case 10:
 109                         case 13:
 110                         case 0:
 111                                 n=MAX_LINE; continue;
 112                         // Tabuladores y espacios: cuentan como espacio.
 113                         case ' ':
 114                         case '\t':
 115                                 c=' '; break;
 116                         // Acentos.
 117 /*
 118                         case 'á': c='a'; break;
 119                         case 'é': c='e'; break;
 120                         case 'í': c='i'; break;
 121                         case 'ó': c='o'; break;
 122                         case 'ú': c='u'; break;
 123                         */
 124                 }
 125
 126                 switch(type) // Cambios de tipos según algunos datos.
 127                 {
 128                         case 0: // Segun si estábamos en un espacio.
 129                                 if (c>='0' && c<='9') {
 130                                         type='1';
 131                                 } else if (isalpha(c)) {
 132                                         type='a';
 133                                 } else type='%';
 134                         break;
 135                         case '1': // Segun si estábamos en un espacio.
 136                                 if (c>='0' && c<='9') {
 137                                         type='1';
 138                                 } else if (c=='.') {
 139                                         type='1';
 140                                 } else if (isalpha(c)) {
 141                                         type='a';
 142                                 } else type='%';
 143                         break;
 144                         case 'a': // Segun si estábamos en un espacio.
 145                                 if (c>='0' && c<='9') {
 146                                         type='a';
 147                                 } else if (isalpha(c)) {
 148                                         type='a';
 149                                 } else type='%';
 150                         break;
 151                         default:
 152                         case '%':
 153                                 if (c==' ') {
 154                                         continue;
 155                                 } else
 156                                 if (c>='0' && c<='9') {
 157                                         type='1';
 158                                 } else if (isalpha(c)) {
 159                                         type='a';
 160                                 } else type='%';
 161                         break;
 162                 }
 163                 if (c==' ') type=0;
 164                 if (!nn && c==' ') continue; // Si está tabulando al inicio, tampoco tiene efecto.
 165                 if (c==lastc && (type=='a' || type==0)) continue; // Desperdiciar letras repetidas.
 166
 167                 if (type=='%' && newline[nn-1]==' ') nn--;
 168                 newline[nn]=c;
 169
 170                 nn++;
 171                 lastc=c;
 172         }
 173         newline[nn]=0;
 174         strcpy(txt,newline);
 175
 176 }
 177
 178
 179 int compare2hashvectors(int *Bvector, int Bsize, int *Mvector, int Msize,
 180         int MaxPassSize, hashblock *blocks, int blocksize)
 181 {
 182                 // Bvector: vector of hashes of Base, original or unmodified file.
 183                 // Mvector: vector of hashes of modified file.
 184                 // Bsize and Msize: Stores the size of their arrays.
 185                 // MaxPassSize: (default: 256) Which is the maximum block-size of algorithm
 186
 187                 int line_base,line_local,size;
 188                 int maxsize=1,total=0;
 189                 int i,k,m;
 190                 hashblock *bloque=blocks;
 191                 int nbloques=0;
 192                 int lbb=0;
 193                 int conf_pasada[]={256,128,64,32,16,8,4,2,1,0};
 194                 int p;
 195                 int min_bloque=0;
 196                 for (p=0;min_bloque=conf_pasada[p];p++)
 197                 {
 198                         if (conf_pasada[p]>MaxPassSize) continue;
 199
 200                         for (i=0;i<Bsize;i+=maxsize) {
 201                                 maxsize=1;
 202                                 int j;
 203                                 for (j=0;j<nbloques;j++)
 204                                 {
 205                                         if (i>=bloque[j].line1 && i<=bloque[j].line1+bloque[j].size) break;
 206                                 }
 207                                 if (j<nbloques)
 208                                 {
 209                                         i=bloque[j].line1+bloque[j].size; continue;
 210                                 }
 211
 212
 213                                 for (k=0;k<Msize;k++)
 214                                 {
 215                                         int j;
 216                                         for (j=0;j<nbloques;j++)
 217                                         {
 218                                                 if (k>=bloque[j].line2 && k<=bloque[j].line2+bloque[j].size) break;
 219                                         }
 220                                         if (j<nbloques)
 221                                         {
 222                                                 k=bloque[j].line2+bloque[j].size; continue;
 223                                         }
 224
 225                                         if (Bvector[i]==Mvector[k])
 226                                         {
 227                                                 int nz=0,nzbl=0;
 228
 229                                                 for(m=0;k+m<Msize && i+m<Bsize;m++)
 230                                                 {
 231                                                         for (j=0;j<nbloques;j++)
 232                                                         {
 233                                                                 if (k+m>=bloque[j].line2 && k+m<=bloque[j].line2+bloque[j].size) break;
 234                                                                 if (i+m>=bloque[j].line1 && i+m<=bloque[j].line1+bloque[j].size) break;
 235                                                         }
 236                                                         if (j<nbloques) break;
 237                                                         if (Bvector[i+m]!=Mvector[k+m])
 238                                                         {
 239                                                                 nz++;
 240                                                                 if (nz>size/4) break;
 241                                                                 continue;
 242                                                         }
 243                                                         if (nz==0) size=m;
 244                                                         else
 245                                                         {
 246                                                                 nzbl++;
 247                                                                 if (nzbl>2)
 248                                                                 {
 249                                                                         nzbl=0;
 250                                                                         nz--;
 251                                                                 }
 252
 253                                                         }
 254                                                 }
 255
 256                                                 if (size>maxsize)
 257                                                 {
 258                                                         maxsize=size;
 259                                                         line_base=i;
 260                                                         line_local=k;
 261                                                 }
 262                                         }
 263                                 }
 264
 265                                 if (maxsize>min_bloque)
 266                                 {
 267                                         if (nbloques<blocksize)
 268                                         {
 269                                         bloque[nbloques].line1=line_base;
 270                                         bloque[nbloques].line2=line_local;
 271                                         bloque[nbloques].size=maxsize;
 272                                         nbloques++;
 273                                         } else printf( "Error: OUT OF BLOCKS. \n");
 274
 275
 276                                         lbb=line_base+maxsize;
 277                                         total+=maxsize;
 278                                 }
 279
 280                         }
 281
 282                 }
 283
 284                 int j;
 285                 {
 286                         hashblock auxbloque[blocksize];
 287                         int minline=0, min_j=0;
 288                         for (p=0;p<nbloques;p++)
 289                         {
 290                                 minline=Bsize;
 291                                 for (j=0;j<64 && j<nbloques;j++)
 292                                 {
 293                                         if (bloque[j].line1<minline)
 294                                         {
 295                                                 minline=bloque[j].line1;
 296                                                 min_j=j;
 297                                         }
 298                                 }
 299                                 auxbloque[p]=bloque[min_j];
 300                                 bloque[min_j].line1=Bsize;
 301                         }
 302                         memcpy(bloque,auxbloque,blocksize*sizeof(hashblock));
 303                 }
 304                 return nbloques;
 305 }