#include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "hashtable.h" extern int optind; extern char *optarg; typedef struct { char **array; int size; int allocSize; struct hashtable *h; } ARR; static unsigned int hashKey1(void* item) { unsigned int hash = 0; int c; char *p = (char *)item; while ( c = *p++ ) hash = ((hash << 5) + hash) + c; return hash; } static unsigned int hashKey2(void* item) { unsigned int hash = 0; int c; char *p = (char *)item; while ( c = *p++ ) hash = c + (hash << 6) + (hash << 16) - hash; return hash; } //#if sizeof(short)==2 //#define getint16(x) *((short *)x) //#else #define getint16(x) (((x)[0] << 8) | (x)[1]) //#endif static unsigned int hashKey3(void* item) { char *p = (char *)item; int len = strlen(item), tmp; unsigned int hash = len; int rem; if (len <0 || item NULL) return 0; rem = len & 3; len >>= 2; /* Main loop */ for (;len > 0; len--) { hash += (p[0] << 8) | p[1]; tmp = (((p[2] << 8) | p[3]) << 11) ^ hash; hash = (hash << 16) ^ tmp; p += 2; hash += hash >> 11; } /* Handle end cases */ switch (rem) { case 3: hash += (p[0] << 8) | p[1]; hash ^= hash << 16; hash ^= p[2] << 18; hash += hash >> 11; break; case 2: hash += (p[0] << 8) | p[1]; hash ^= hash << 11; hash += hash >> 17; break; case 1: hash += p[0]; hash ^= hash << 10; hash += hash >> 1; } /* Force "avalanching" of final 127 bits */ hash ^= hash << 3; hash += hash >> 5; hash ^= hash << 4; hash += hash >> 17; hash ^= hash << 25; hash += hash >> 6; return hash; } static unsigned int hashKey4(void* item) { char *p = (char *)item; const unsigned int m = 0x5bd1e995; const int r = 24; long seed = 0xc58f1a7b; int len = strlen(item), tmp; if ( len == 0 ) return 0; unsigned int hash = seed ^ len; int rem = len & 3; // mod 4 len >>= 2; // div 4 for ( ; len > 0; len-- ) { unsigned int k = *((short *)p); k *= m; k ^= k >> r; k *= m; hash *= m; hash ^= k; p += sizeof(short); } switch ( rem ) { case 3: hash ^= *((short *)p); hash ^= (*((short *)(p+2))) << 16; hash *= m; break; case 2: hash ^= *((short *)p); hash *= m; break; case 1: hash ^= *((short *)p); hash *= m; break; default: break; } // Do a few final mixes of the hash to ensure the last few // bytes are well-incorporated. hash ^= hash >> 13; hash *= m; hash ^= hash >> 15; return hash; } static int compareKey(void* k1, void* k2) { //return !strcmp(k1, k2); char *p1 k1, *p2 k2; while ( *p1 && *p1 == *p2 ) p1++, p2++; return *p1 == *p2 ? 1 : 0; } unsigned int (*hashKey)(void* item); void initArray(ARR* a, int sz, int hsz) { a->size = 0; a->allocSize = sz; a->array = (char **) malloc(sizeof(char *) * sz); a->h = create_hashtable(hsz, hashKey, compareKey); } void sortie(char *msg, int nret) { fprintf(stderr, msg); exit(nret); } long findOrAddArray(ARR *arr, char *item) { long * pi = (long *)hashtable_search(arr->h, item); if ( pi ) return *pi; long i = arr->size; int n = strlen(item); char * key = malloc(n + 1 + sizeof(long)); strcpy(key, item); pi = (long *)(&key[n + 1]); *pi = i; if ( arr->size == arr->allocSize ) { arr->allocSize *= 2; char ** p = (char **) malloc(sizeof(char *) * arr->allocSize); memcpy(p, arr->array, sizeof(char *) * arr->size); free(arr->array); arr->array = p; } arr->array[i] = key; arr->size++; hashtable_insert(arr->h, key, pi); return i; } static void usage(const char *name) { fprintf(stderr, "usage: %s [option]... Fichier_Entrée\n\n", name); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help affiche ce message\n"); fprintf(stderr, " -v, --verbose version bavarde vers stderr\n\n"); } main(int argc, char *argv[]) { int opt; int verbose = 0; const char *prgName = argv[0]; hashKey = hashKey1; while ( (opt = getopt(argc, argv, "hk:v")) != EOF ) { switch ( opt ) { case 'v': verbose = 1; break; case 'k': switch ( atoi(optarg) ) { case 2: hashKey = hashKey2; break; case 3: hashKey = hashKey3; break; case 4: hashKey = hashKey4; break; default: hashKey = hashKey1; break; } break; case 'h': usage(prgName); exit(0); break; default: usage(prgName); exit(-1); break; } } argv += optind; argc -= optind; if ( argc <= 0 ) sortie("il manque le fichier à traiter.", 1); char *inFile = argv[0]; char *outFile = NULL; if ( argc > 1 ) outFile = argv[1]; /*************************************/ ARR gsm; initArray(&gsm, 500, 1000); long i, j; int fd; if ( !(fd = open(inFile,O_RDONLY)) ) sortie("erreur d'ouverture", 1); struct stat st; stat(inFile, &st); unsigned long fsz = st.st_size; if ( fsz == 0 ) sortie("la taille du fichier est nulle", 1); if ( verbose ) fprintf(stderr, "size = %d\n", fsz); char *bufFile = malloc(st.st_size); if ( !bufFile ) sortie("Erreur malloc bufFile\n", 2); /*************************************/ int nr; char *pbf, *p; int n = 1; long nl = 0; long ncur = 0; for (pbf = bufFile; (nr=read(fd, pbf, 4096)) > 0; pbf += nr ) { for (p=pbf; p n*fsz ) { fprintf(stderr, "lecture %d%%\r", n); fflush(stderr); n += 1; } } *pbf = 0; if ( pbf>bufFile && pbf[-1] ) nl++; close(fd); if ( verbose ) fprintf(stderr, "lecture 100%% NL=%d\r\n", nl); if ( nl == 0 ) sortie("Fichier vide\n", 4); /*************************************/ char **row = (char **)malloc(nl*sizeof(char *)); if ( !row ) sortie("Erreur malloc row\n", 2); long *vgsm = (long *)malloc(nl*sizeof(long)); if ( !vgsm ) sortie("Erreur malloc vgsm\n", 2); char **prow = row; long *pvgsm = vgsm; pbf = bufFile; while ( *pbf ) { *pvgsm = 0; *prow = NULL; i = strlen(pbf)+1; if ( p = strtok(pbf, " ") ) { *pvgsm = findOrAddArray(&gsm, p); if ( (p=strtok(NULL, " ")) && strtok(NULL, " ") ) *prow = p; // pointe sur tag } prow++; pvgsm++; pbf += i; } if ( gsm.size == 0 ) sortie("Pas de gsm\n", 3); /*************************************/ long *res = (long *)malloc(gsm.size * sizeof(long)); if ( !res ) sortie("Erreur malloc res\n", 2); FILE *fpo = stdout; if ( outFile ) fpo = fopen(outFile, "w"); if ( !fpo ) { fprintf(stderr, "%s: erreur d'ouverture, utilisation de stdout\n", outFile); fpo = stdout; } fprintf(fpo, "etiquette"); for (j=0; j<gsm.size; j++) fprintf(fpo, " %s", gsm.array[j]); fprintf(fpo, "\n"); char **irow; char *curTag; //tag en cours long *ivgsm = vgsm; char *prevTag = NULL; for (i=0, irow = row; i < nl && !(prevTag = *irow); i++, irow++) ; memset(res, 0, gsm.size*sizeof(long)); // remise à 0 n = 1; int pfreq = strlen(prevTag)+1; for (i=0, irow = row; i < nl; i++, irow++, ivgsm++) { if ( curTag = *irow ) { if ( strcmp(curTag, prevTag) ) { fprintf(fpo, "%s", prevTag); for (j=0; j<gsm.size; j++) fprintf(fpo, " %d", res[j]); fprintf(fpo, "\n"); // Réinitialisation prevTag = curTag; pfreq = strlen(prevTag) + 1; memset(res, 0, gsm.size*sizeof(long)); // remise à 0 } res[*ivgsm] = atol(curTag + pfreq); } if ( verbose && i*100.0 > n*nl ) { fprintf(stderr, "écriture %d%%\r", n); fflush(stderr); n += 1; } } if ( nl > 0 ) { fprintf(fpo, "%s", curTag); for (j=0; j<gsm.size; j++) fprintf(fpo, " %d", res[j]); fprintf(fpo, "\n"); } if ( verbose ) fprintf(stderr, "écriture 100%%\r\n"); /*************************************/ hashtable_destroy(gsm.h, 0); free(gsm.array); free(res); free(bufFile); exit(0); }
Vous n’avez pas trouvé la réponse que vous recherchez ?
Posez votre question#include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "hashtable.h" extern int optind; extern char *optarg; typedef struct { char **array; int size; int allocSize; struct hashtable *h; } ARR; long nComp 0, nHash 0; static unsigned int hashKey(void* item) { unsigned int hash = 0; int c; char *p = (char *)item; while ( c = *p++ ) { //hash = ((hash << 5) + hash) + c; hash = c + (hash << 6) + (hash << 16) - hash; } return hash; } static int compKey(void* k1, void* k2) { char *p1 = k1; char *p2 = k2; while ( *p1 && *p1 == *p2 ) p1++, p2++; return *p1 == *p2 ? 1 : 0; } void initArray(ARR* a, int sz) { a->size = 0; a->allocSize = sz; a->array = (char **) malloc(sizeof(char *) * sz); a->h = create_hashtable(sz, hashKey, compKey); } long findOrAddArray(ARR *arr, char *item) { long * pi = (long *)hashtable_search(arr->h, item); if ( pi ) return *pi; long i = arr->size; int n = strlen(item); char * key = malloc(n + 1 + sizeof(long)); memcpy(key, item, n); pi = (long *)(&key[n + 1]); *pi = i; if ( i == arr->allocSize ) { arr->allocSize *= 2; char ** p = (char **) malloc(sizeof(char *) * arr->allocSize); memcpy(p, arr->array, sizeof(char *) * i); free(arr->array); arr->array = p; } arr->array[i] = key; arr->size++; hashtable_insert(arr->h, key, pi); return i; } void sortie(char *msg, int nret) { fprintf(stderr, msg); exit(nret); } static void usage(const char *name) { fprintf(stderr, "usage: %s [option]... Fichier_Entrée\n\n", name); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help affiche ce message\n"); fprintf(stderr, " -v, --verbose version bavarde vers stderr\n\n"); } void outBuf(FILE *fp, char *t, char **s, int n, int bf) { char *p; int j; fputs(t, fp); for (j=0; j<n; j++) { if ( p = *s++ ) { fputc(' ', fp); fputs(p, fp); if ( bf ) free(p); } else fputs(" 0", fp); } fputc('\n', fp); } main(int argc, char *argv[]) { int opt; int verbose = 0; const char *prgName = argv[0]; while ( (opt = getopt(argc, argv, "hv")) != EOF ) { switch ( opt ) { case 'v': verbose = 1; break; case 'h': usage(prgName); exit(0); break; default: usage(prgName); exit(-1); break; } } argv += optind; argc -= optind; if ( argc <= 0 ) sortie("il manque le fichier à traiter.", 1); char *inFile = argv[0]; char *outFile = NULL; if ( argc > 1 ) outFile = argv[1]; /*************************************/ FILE *fp = fopen(inFile, "r"); if ( !fp ) sortie("erreur d'ouverture entrée", 1); struct stat st; stat(inFile, &st); size_t fsz = st.st_size; if ( fsz == 0 ) sortie("la taille du fichier est nulle", 1); ARR gsm; initArray(&gsm, 2500); /*************************************/ char buf[512]; char *pGsm, *pTag, *pFrq; time_t t0, t1, t2; long *pi; char *p; long i, j; double n = 0; double cx = 100.0/fsz; time(&t0); while ( fgets(buf, sizeof(buf), fp) ) { if ( p = strchr(buf, ' ') ) { *p = 0; findOrAddArray(&gsm, buf); } if ( verbose && ftell(fp)*cx > n ) { fprintf(stderr, "\rLecture %.0f%%", n); fflush(stderr); n += 1; } } time(&t1); if ( verbose ) fprintf(stderr, "\rLecture 100%%, %d s.\n", t1-t0); /*************************************/ size_t szRes = gsm.size * sizeof(char *); char **res = (char **) malloc( szRes ); char **pRes; if ( !res ) sortie("Erreur malloc res\n", 2); memset(res, 0, szRes); FILE *fpo = stdout; if ( outFile ) fpo = fopen(outFile, "w"); if ( !fpo ) { fprintf(stderr, "erreur d'ouverture du fichier en sortie : utilisation de stdout\n"); fpo = stdout; } //setvbuf(fpo, NULL, _IOFBF, gsm.size * 128); /*************************************/ outBuf(fpo, "etiquette", gsm.array, gsm.size, 0); n = 0; fseek(fp, 0, 0); char oBuf[2][BUFSIZ]; int iBuf = 0; char *pBuf = oBuf[0]; char *prevTag = NULL; while ( fgets(pBuf, BUFSIZ, fp) ) { strcpy(oBuf[1], oBuf[0]); if ( pTag = strchr(pBuf, ' ') ) { *pTag++ = 0; if ( pFrq = strchr(pTag, ' ') ) { *pFrq = 0; prevTag = pTag; iBuf = 1; pBuf = oBuf[1]; break; } } } if ( prevTag ) { do { if ( pTag = strchr(pBuf, ' ') ) { *pTag++ = 0; if ( pFrq = strchr(pTag, ' ') ) { *pFrq++ = 0; long *pi = (long *)hashtable_search(gsm.h, pBuf); if ( strcmp(pTag, prevTag) ) { outBuf(fpo, prevTag, res, gsm.size, 1); // Réinitialisation //strcpy(prevTag, pTag); prevTag = pTag; iBuf = 1 - iBuf; pBuf = oBuf[iBuf]; memset(res, 0, szRes); } if ( pi ) { int np = strlen(pFrq); if ( np>0 && pFrq[np-1] == '\n' ) pFrq[np-1] = 0; p = malloc(np); memcpy(p, pFrq, np); res[*pi] = p; } } } if ( verbose && ftell(fp)*cx > n ) { time(&t2); fprintf(stderr, "\rEcriture %.1f%% %d s.", n, t2-t1); fflush(stderr); n += 0.1; } } while ( fgets(pBuf, BUFSIZ, fp) ); outBuf(fpo, prevTag, res, gsm.size, 1); } fclose(fp); fclose(fpo); time(&t2); if ( verbose ) fprintf(stderr, "\rEcriture 100%%, %d s\n", t2-t1); /*************************************/ if ( verbose ) fprintf(stderr, "temp: %d'%02d\n", (t2-t0)/60, (t2-t0)%60); hashtable_destroy(gsm.h, 0); free(gsm.array); free(res); exit(0); }
#include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> #include <stdio.h> #include <stdlib.h> #include <string.h> #include "hashtable.h" extern int optind; extern char *optarg; typedef struct { char **array; int size; int allocSize; struct hashtable *h; } ARR; static unsigned int hashKey(void* item) { unsigned int hash = 0; int c; char *p = (char *)item; while ( c = *p++ ) { //hash = c + (hash << 5) + hash; hash = c + (hash << 5) + (hash << 11) - hash; } return hash; } static int compareKey(void* k1, void* k2) { char *p1 k1, *p2 k2; while ( *p1 && *p1 == *p2 ) p1++, p2++; return *p1 == *p2 ? 1 : 0; } void initArray(ARR* a, int sz) { a->size = 0; a->allocSize = sz; a->array = (char **) malloc(sizeof(char *) * sz); a->h = create_hashtable(sz, hashKey, compareKey); } void sortie(char *msg, int nret) { fprintf(stderr, msg); exit(nret); } long findOrAddArray(ARR *arr, char *item) { long * pi = (long *)hashtable_search(arr->h, item); if ( pi ) return *pi; long i = arr->size; int n = strlen(item); char * key = malloc(n + 1 + sizeof(long)); memcpy(key, item, n+1); pi = (long *)(&key[n + 1]); *pi = i; if ( arr->size == arr->allocSize ) { arr->allocSize *= 2; char ** p = (char **) malloc(sizeof(char *) * arr->allocSize); memcpy(p, arr->array, sizeof(char *) * arr->size); free(arr->array); arr->array = p; } arr->array[i] = key; arr->size++; hashtable_insert(arr->h, key, pi); return i; } void outBuf(FILE *fp, char *t, char **s, int n) { char *p; fputs(t, fp); while ( n-- > 0 ) { if ( p = *s++ ) { fputc(' ', fp); fputs(p, fp); } else fputs(" 0", fp); } fputc('\n', fp); } static void usage(const char *name) { fprintf(stderr, "usage: %s [option]... Fichier_Entrée\n\n", name); fprintf(stderr, "options:\n"); fprintf(stderr, " -h, --help affiche ce message\n"); fprintf(stderr, " -v, --verbose version bavarde vers stderr\n\n"); } main(int argc, char *argv[]) { int opt; int verbose = 0; const char *prgName = argv[0]; time_t t0, t1, t2; while ( (opt = getopt(argc, argv, "hv")) != EOF ) { switch ( opt ) { case 'v': verbose = 1; break; case 'h': usage(prgName); exit(0); break; default: usage(prgName); exit(-1); break; } } argv += optind; argc -= optind; if ( argc <= 0 ) sortie("il manque le fichier à traiter.", 1); char *inFile = argv[0]; char *outFile = NULL; if ( argc > 1 ) outFile = argv[1]; /*************************************/ time(&t0); ARR gsm; initArray(&gsm, 3000); long i, j; int fd; if ( !(fd = open(inFile,O_RDONLY)) ) sortie("erreur d'ouverture", 1); struct stat st; stat(inFile, &st); unsigned long fsz = st.st_size; if ( fsz == 0 ) sortie("la taille du fichier est nulle", 1); if ( verbose ) fprintf(stderr, "size = %d\n", fsz); char *bufFile = malloc(st.st_size); if ( !bufFile ) sortie("Erreur malloc bufFile\n", 2); /* LECTURE ************************************/ int nr; char *pbf, *p; int n = 1; long nl = 0; long ncur = 0; for (pbf = bufFile; (nr=read(fd, pbf, 4096)) > 0; pbf += nr ) { for (p=pbf; p n ) { fprintf(stderr, "\rLecture %d%%", n); fflush(stderr); n += 1; } } *pbf = 0; if ( pbf>bufFile && pbf[-1] ) nl++; close(fd); if ( verbose ) { time(&t1); fprintf(stderr, "\rLecture 100%% NL=%d t=%d'%02d\r\n", nl, (t1-t0)/60, (t1-t0)%60); } if ( nl == 0 ) sortie("Fichier vide\n", 4); /* TRAITEMENT ************************************/ char **row = (char **)malloc(nl*sizeof(char *)); if ( !row ) sortie("Erreur malloc row\n", 2); long *vgsm = (long *)malloc(nl*sizeof(long)); if ( !vgsm ) sortie("Erreur malloc vgsm\n", 2); char **prow = row; long *pvgsm = vgsm; pbf = bufFile; while ( *pbf ) { *pvgsm = 0; *prow = NULL; i = strlen(pbf)+1; if ( p = strtok(pbf, " ") ) { *pvgsm = findOrAddArray(&gsm, p); if ( (p=strtok(NULL, " ")) && strtok(NULL, " ") ) *prow = p; // pointe sur tag } prow++; pvgsm++; pbf += i; } if ( gsm.size == 0 ) sortie("Pas de gsm\n", 3); if ( verbose ) { time(&t2); fprintf(stderr, "\rTraitement t=%d'%02d\r\n", (t2-t1)/60, (t2-t1)%60); t1 = t2; } /* ECRITURE ************************************/ size_t szRes = gsm.size * sizeof(char *); char **res = (char **)malloc(szRes); if ( !res ) sortie("Erreur malloc res\n", 2); FILE *fpo = stdout; if ( outFile ) fpo = fopen(outFile, "w"); if ( !fpo ) { fprintf(stderr, "%s: erreur d'ouverture, utilisation de stdout\n", outFile); fpo = stdout; } outBuf(fpo, "etiquette", gsm.array, gsm.size); char **irow; char *curTag; //tag en cours long *ivgsm = vgsm; char *prevTag = NULL; for (i=0, irow = row; i < nl && !(prevTag = *irow); i++, irow++) ; memset(res, 0, szRes); // remise à 0 n = 1; int pfreq = strlen(prevTag)+1; for (i=0, irow = row; i < nl; i++, irow++, ivgsm++) { if ( curTag = *irow ) { if ( strcmp(curTag, prevTag) ) { outBuf(fpo, prevTag, res, gsm.size); // Réinitialisation prevTag = curTag; pfreq = strlen(curTag) + 1; memset(res, 0, szRes); // remise à 0 } res[*ivgsm] = curTag + pfreq; } if ( verbose && i*100.0/nl > n ) { fprintf(stderr, "\rEcriture %d%%", n); fflush(stderr); n += 1; } } if ( nl > 0 ) { outBuf(fpo, prevTag, res, gsm.size); } fclose(fpo); if ( verbose ) { time(&t2); fprintf(stderr, "\rEcriture 100%% t=%d'%02d\r\n", (t2-t1)/60, (t2-t1)%60); fprintf(stderr, "Total t=%d'%02d\r\n", (t2-t0)/60, (t2-t0)%60); } /*************************************/ hashtable_destroy(gsm.h, 0); free(gsm.array); free(res); free(bufFile); exit(0); }
size_t szRes = gsm.size * sizeof(char *); char **res = (char **)malloc(szRes); if ( !res ) sortie("Erreur malloc res\n", 2); // ajouter ci-dessous : je recopierai res0 dans res pour éviter une grosse boucle char **res0 = (char **)malloc(szRes); if ( !res0 ) sortie("Erreur malloc res0\n", 2); char *zero = "0"; // plus besoin d' if/else sur les pointeurs nuls for (i=0; i<gsm.size; i++) res0[i] = zero;
for (i=0, irow = row; i < nl && !(prevTag = *irow); i++, irow++) ; // On ne fait plus : memset(res, 0, gsm.size*sizeof(long)); // remise à 0 memcpy(res, res0, szRes); // remise à 0 n = 1; int pfreq = strlen(prevTag)+1; for (i=0, irow = row; i < nl; i++, irow++, ivgsm++) {
// Réinitialisation prevTag = curTag; pfreq = strlen(curTag) + 1; //memset(res, 0, szRes); supprimé et remplacé par : memcpy(res, req0, szRes); // remise à 0
res[*ivgsm] = curTag + pfreq; // et non plus : atol(curTag + pfreq);
void outBuf(FILE *fp, char *t, char **s, int n) { fputs(t, fp); while ( n-- > 0 ) { fputc(' ', fp); fputs(*s++, fp); } fputc('\n', fp); }
hash = c + (hash << 6) + (hash << 16) - hash;