krh@0: #include krh@0: #include krh@0: #include krh@0: #include krh@0: #include krh@0: #include krh@0: #include krh@0: #include krh@0: krh@0: #include krh@0: #include "sha1.h" krh@0: krh@0: static int krh@0: write_to_fd(int fd, void *p, size_t size) krh@0: { krh@0: int rest, len; krh@0: krh@0: rest = size; krh@0: while (rest > 0) { krh@0: len = write(fd, p, rest); krh@0: if (len < 0) krh@0: return -1; krh@0: rest -= len; krh@0: } krh@0: krh@0: return 0; krh@0: } krh@0: krh@0: static int krh@0: write_to_file(const char *filename, void *p, size_t size) krh@0: { krh@0: int fd, err; krh@0: krh@0: fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666); krh@0: if (fd < 0) krh@0: return -1; krh@0: err = write_to_fd(fd, p, size); krh@0: close(fd); krh@0: krh@0: return err; krh@0: } krh@0: krh@0: struct hashtable_header { krh@0: unsigned int magic; krh@0: unsigned int version; krh@0: struct { unsigned int type, offset; } sections[0]; krh@0: }; krh@0: krh@0: #define HASHTABLE_MAGIC 0x7a7a7a7a krh@0: #define HASHTABLE_VERSION 1 krh@0: #define HASHTABLE_BUCKETS 1 krh@0: #define HASHTABLE_STRINGS 2 krh@0: krh@0: struct hashtable { krh@0: unsigned long *buckets; krh@0: int bucket_count, bucket_alloc; krh@0: char *string_pool; krh@0: int pool_size, pool_alloc; krh@0: struct hashtable_header *header; krh@0: }; krh@0: krh@0: static void * krh@0: zalloc(size_t size) krh@0: { krh@0: void *p; krh@0: krh@0: p = malloc(size); krh@0: memset(p, 0, size); krh@0: krh@0: return p; krh@0: } krh@0: krh@0: struct hashtable * krh@0: hashtable_create(void) krh@0: { krh@0: struct hashtable *ht; krh@0: krh@0: ht = zalloc(sizeof *ht); krh@0: ht->buckets = zalloc(4096 * sizeof *ht->buckets); krh@0: ht->bucket_count = 0; krh@0: ht->bucket_alloc = 4096; krh@0: krh@0: ht->string_pool = zalloc(4096); krh@0: ht->pool_size = 1; krh@0: ht->pool_alloc = 4096; krh@0: krh@0: return ht; krh@0: } krh@0: krh@0: struct hashtable * krh@0: hashtable_create_from_file(const char *filename) krh@0: { krh@0: struct hashtable *ht; krh@0: struct stat stat; krh@0: unsigned int size, offset; krh@0: int fd, i; krh@0: krh@0: ht = zalloc(sizeof *ht); krh@0: fd = open(filename, O_RDONLY); krh@0: if (fstat(fd, &stat) < 0) krh@0: return NULL; krh@0: ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0); krh@0: if (ht->header == MAP_FAILED) { krh@0: free(ht); krh@0: return NULL; krh@0: } krh@0: krh@0: for (i = 0; i < ht->header->sections[i].type; i++) { krh@0: offset = ht->header->sections[i].offset; krh@0: size = ht->header->sections[i + 1].offset - offset; krh@0: krh@0: switch (ht->header->sections[i].type) { krh@0: case HASHTABLE_BUCKETS: krh@0: ht->buckets = (void *) ht->header + offset; krh@0: ht->bucket_count = size / sizeof *ht->buckets; krh@0: ht->bucket_alloc = ht->bucket_count; krh@0: break; krh@0: case HASHTABLE_STRINGS: krh@0: ht->string_pool = (void *) ht->header + offset; krh@0: ht->pool_size = size; krh@0: ht->pool_alloc = size; krh@0: break; krh@0: } krh@0: } krh@0: close(fd); krh@0: krh@0: return ht; krh@0: } krh@0: krh@0: void krh@0: hashtable_destroy(struct hashtable *ht) krh@0: { krh@0: unsigned int size; krh@0: int i; krh@0: krh@0: if (ht->header) { krh@0: for (i = 0; ht->header->sections[i].type; i++) krh@0: ; krh@0: size = ht->header->sections[i].type; krh@0: munmap(ht->header, size); krh@0: } else { krh@0: free(ht->buckets); krh@0: free(ht->string_pool); krh@0: } krh@0: krh@0: free(ht); krh@0: } krh@0: krh@0: static int krh@0: hashtable_write(struct hashtable *ht, const char *filename) krh@0: { krh@0: int fd; krh@0: char data[4096]; krh@0: struct hashtable_header *header = (struct hashtable_header *) data; krh@0: krh@0: memset(data, 0, sizeof data); krh@0: header->magic = HASHTABLE_MAGIC; krh@0: header->version = HASHTABLE_VERSION; krh@0: krh@0: header->sections[0].type = HASHTABLE_BUCKETS; krh@0: header->sections[0].offset = sizeof data; krh@0: krh@0: header->sections[1].type = HASHTABLE_STRINGS; krh@0: header->sections[1].offset = krh@0: sizeof data + ht->bucket_alloc * sizeof *ht->buckets; krh@0: krh@0: header->sections[2].type = 0; krh@0: header->sections[2].offset = krh@0: header->sections[1].offset + ht->pool_size; krh@0: krh@0: fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666); krh@0: if (fd < 0) krh@0: return -1; krh@0: krh@0: write_to_fd(fd, data, sizeof data); krh@0: write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets); krh@0: write_to_fd(fd, ht->string_pool, ht->pool_size); krh@0: krh@0: return 0; krh@0: } krh@0: krh@0: static unsigned int krh@0: hash_string(const char *key) krh@0: { krh@0: const char *p; krh@0: unsigned int hash = 0; krh@0: krh@0: for (p = key; *p; p++) krh@0: hash = (hash << 2) ^ *p; krh@0: krh@0: return hash; krh@0: } krh@0: krh@0: unsigned long krh@0: hashtable_lookup(struct hashtable *ht, const char *key) krh@0: { krh@0: unsigned int start; krh@0: unsigned int mask; krh@0: unsigned long value; krh@0: int i; krh@0: krh@0: mask = ht->bucket_alloc - 1; krh@0: start = hash_string(key) & mask; krh@0: i = start; krh@0: do { krh@0: value = ht->buckets[i]; krh@0: krh@0: if (value == 0) krh@0: return 0; krh@0: krh@0: if (strcmp(key, &ht->string_pool[value]) == 0) krh@0: return value; krh@0: krh@0: i = (i + 1) & mask; krh@0: } while (i != start); krh@0: krh@0: return 0; krh@0: } krh@0: krh@0: static unsigned long krh@0: add_to_string_pool(struct hashtable *ht, const char *key) krh@0: { krh@0: int len, alloc; krh@0: char *pool; krh@0: unsigned long value; krh@0: krh@0: len = strlen(key) + 1; krh@0: alloc = ht->pool_alloc; krh@0: while (alloc < ht->pool_size + len) krh@0: alloc *= 2; krh@0: if (ht->pool_alloc < alloc) { krh@0: pool = realloc(ht->string_pool, alloc); krh@0: if (pool == NULL) krh@0: return 0; krh@0: ht->string_pool = pool; krh@0: ht->pool_alloc = alloc; krh@0: } krh@0: krh@0: memcpy(ht->string_pool + ht->pool_size, key, len); krh@0: value = ht->pool_size; krh@0: ht->pool_size += len; krh@0: krh@0: return value; krh@0: } krh@0: krh@0: static void krh@0: do_insert(struct hashtable *ht, unsigned long value) krh@0: { krh@0: unsigned int mask; krh@0: const char *key; krh@0: int i, start; krh@0: krh@0: key = &ht->string_pool[value]; krh@0: mask = ht->bucket_alloc - 1; krh@0: start = hash_string(key) & mask; krh@0: i = start; krh@0: do { krh@0: if (ht->buckets[i] == 0) { krh@0: ht->buckets[i] = value; krh@0: break; krh@0: } krh@0: i = (i + 1) & mask; krh@0: } while (i != start); krh@0: } krh@0: krh@0: unsigned long krh@0: hashtable_insert(struct hashtable *ht, const char *key) krh@0: { krh@0: unsigned long value, *buckets, *old_buckets; krh@0: int i, alloc, old_alloc; krh@0: krh@0: alloc = ht->bucket_alloc; krh@0: while (alloc < 4 * ht->bucket_count) krh@0: alloc *= 2; krh@0: krh@0: if (alloc != ht->bucket_alloc) { krh@0: buckets = zalloc(alloc * sizeof *ht->buckets); krh@0: if (buckets == NULL) krh@0: return 0; krh@0: old_buckets = ht->buckets; krh@0: ht->buckets = buckets; krh@0: old_alloc = ht->bucket_alloc; krh@0: ht->bucket_alloc = alloc; krh@0: krh@0: for (i = 0; i < old_alloc; i++) { krh@0: value = old_buckets[i]; krh@0: if (value != 0) krh@0: do_insert(ht, value); krh@0: } krh@0: free(old_buckets); krh@0: } krh@0: krh@0: value = add_to_string_pool(ht, key); krh@0: do_insert (ht, value); krh@0: ht->bucket_count++; krh@0: krh@0: return value; krh@0: } krh@0: krh@0: struct razor_context { krh@0: struct hashtable *global_ht; krh@0: }; krh@0: krh@0: struct razor_context * krh@0: razor_context_create (void) krh@0: { krh@0: struct razor_context *ctx; krh@0: krh@0: ctx = malloc(sizeof *ctx); krh@0: ctx->global_ht = hashtable_create(); krh@0: krh@0: return ctx; krh@0: } krh@0: krh@0: struct razor_context * krh@0: razor_context_create_from_file (const char *filename) krh@0: { krh@0: struct razor_context *ctx; krh@0: krh@0: ctx = malloc(sizeof *ctx); krh@0: ctx->global_ht = hashtable_create_from_file(filename); krh@0: krh@0: return ctx; krh@0: } krh@0: krh@0: unsigned long krh@0: razor_context_tokenize(struct razor_context *ctx, const char *string) krh@0: { krh@0: unsigned long token; krh@0: krh@0: token = hashtable_lookup(ctx->global_ht, string); krh@0: if (token != 0) krh@0: return token; krh@0: krh@0: return hashtable_insert(ctx->global_ht, string); krh@0: } krh@0: krh@0: struct razor_set { krh@0: struct razor_context *ctx; krh@0: }; krh@0: krh@0: struct parsing_context { krh@0: struct razor_context *ctx; krh@0: }; krh@0: krh@0: static void krh@0: start_element(void *data, const char *name, const char **atts) krh@0: { krh@0: struct parsing_context *ctx = data; krh@0: int i; krh@0: krh@0: for (i = 0; atts[i]; i += 2) krh@0: razor_context_tokenize(ctx->ctx, atts[i + 1]); krh@0: } krh@0: krh@0: static void krh@0: end_element (void *data, const char *name) krh@0: { krh@0: } krh@0: krh@0: static char * krh@0: sha1_to_hex(const unsigned char *sha1) krh@0: { krh@0: static int bufno; krh@0: static char hexbuffer[4][50]; krh@0: static const char hex[] = "0123456789abcdef"; krh@0: char *buffer = hexbuffer[3 & ++bufno], *buf = buffer; krh@0: int i; krh@0: krh@0: for (i = 0; i < 20; i++) { krh@0: unsigned int val = *sha1++; krh@0: *buf++ = hex[val >> 4]; krh@0: *buf++ = hex[val & 0xf]; krh@0: } krh@0: *buf = '\0'; krh@0: krh@0: return buffer; krh@0: } krh@0: krh@0: static int krh@0: razor_context_read_file(struct razor_context *ctx, const char *filename) krh@0: { krh@0: SHA_CTX sha1; krh@0: XML_Parser parser; krh@0: struct parsing_context pctx; krh@0: int fd; krh@0: void *p; krh@0: struct stat stat; krh@0: char buf[128]; krh@0: unsigned char hash[20]; krh@0: krh@0: fd = open(filename, O_RDONLY); krh@0: if (fstat(fd, &stat) < 0) krh@0: return -1; krh@0: p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0); krh@0: if (p == MAP_FAILED) krh@0: return -1; krh@0: krh@0: parser = XML_ParserCreate(NULL); krh@0: pctx.ctx = ctx; krh@0: XML_SetUserData(parser, &pctx); krh@0: XML_SetElementHandler(parser, start_element, end_element); krh@0: if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) { krh@0: fprintf(stderr, krh@0: "%s at line %d, %s\n", krh@0: XML_ErrorString(XML_GetErrorCode(parser)), krh@0: XML_GetCurrentLineNumber(parser), krh@0: filename); krh@0: return 1; krh@0: } krh@0: krh@0: XML_ParserFree(parser); krh@0: krh@0: SHA1_Init(&sha1); krh@0: SHA1_Update(&sha1, p, stat.st_size); krh@0: SHA1_Final(hash, &sha1); krh@0: krh@0: close(fd); krh@0: krh@0: snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash)); krh@0: if (write_to_file(buf, p, stat.st_size) < 0) krh@0: return -1; krh@0: munmap(p, stat.st_size); krh@0: krh@0: return 0; krh@0: } krh@0: krh@0: int krh@0: razor_context_write(struct razor_context *ctx, const char *filename) krh@0: { krh@0: return hashtable_write(ctx->global_ht, filename); krh@0: } krh@0: krh@0: void krh@0: razor_context_destroy(struct razor_context *ctx) krh@0: { krh@0: hashtable_destroy(ctx->global_ht); krh@0: free(ctx); krh@0: } krh@0: krh@0: static int krh@0: usage(void) krh@0: { krh@0: printf("usage: razor [ import FILES | lookup ]\n"); krh@0: exit(1); krh@0: } krh@0: krh@0: static const char repo_filename[] = "system.repo"; krh@0: krh@0: int krh@0: main(int argc, char *argv[]) krh@0: { krh@0: int i; krh@0: struct razor_context *ctx; krh@0: struct stat statbuf; krh@0: krh@0: if (argc < 3) { krh@0: usage(); krh@0: } else if (strcmp(argv[1], "import") == 0) { krh@0: if (stat("set", &statbuf) && mkdir("set", 0777)) { krh@0: fprintf(stderr, "could not create directory 'set'\n"); krh@0: exit(-1); krh@0: } krh@0: krh@0: ctx = razor_context_create(); krh@0: krh@0: for (i = 2; i < argc; i++) { krh@0: if (razor_context_read_file(ctx, argv[i]) < 0) { krh@0: fprintf(stderr, "failed to import %s\n", krh@0: argv[i]); krh@0: exit(-1); krh@0: } krh@0: } krh@0: krh@0: printf("number of buckets: %d\n", krh@0: ctx->global_ht->bucket_count); krh@0: printf("bucket allocation: %d\n", krh@0: ctx->global_ht->bucket_alloc); krh@0: printf("pool size: %d\n", ctx->global_ht->pool_size); krh@0: printf("pool allocation: %d\n", ctx->global_ht->pool_alloc); krh@0: krh@0: razor_context_write(ctx, repo_filename); krh@0: krh@0: razor_context_destroy(ctx); krh@0: } else if (strcmp(argv[1], "lookup") == 0) { krh@0: ctx = razor_context_create_from_file(repo_filename); krh@0: printf("%s is %lu\n", argv[2], krh@0: hashtable_lookup(ctx->global_ht, argv[2])); krh@0: razor_context_destroy(ctx); krh@0: } else { krh@0: usage(); krh@0: } krh@0: krh@0: return 0; krh@0: }