1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/razor.c Mon Sep 03 14:36:59 2007 -0400
1.3 @@ -0,0 +1,495 @@
1.4 +#include <stdlib.h>
1.5 +#include <stdio.h>
1.6 +#include <string.h>
1.7 +#include <sys/types.h>
1.8 +#include <sys/stat.h>
1.9 +#include <sys/mman.h>
1.10 +#include <unistd.h>
1.11 +#include <fcntl.h>
1.12 +
1.13 +#include <expat.h>
1.14 +#include "sha1.h"
1.15 +
1.16 +static int
1.17 +write_to_fd(int fd, void *p, size_t size)
1.18 +{
1.19 + int rest, len;
1.20 +
1.21 + rest = size;
1.22 + while (rest > 0) {
1.23 + len = write(fd, p, rest);
1.24 + if (len < 0)
1.25 + return -1;
1.26 + rest -= len;
1.27 + }
1.28 +
1.29 + return 0;
1.30 +}
1.31 +
1.32 +static int
1.33 +write_to_file(const char *filename, void *p, size_t size)
1.34 +{
1.35 + int fd, err;
1.36 +
1.37 + fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
1.38 + if (fd < 0)
1.39 + return -1;
1.40 + err = write_to_fd(fd, p, size);
1.41 + close(fd);
1.42 +
1.43 + return err;
1.44 +}
1.45 +
1.46 +struct hashtable_header {
1.47 + unsigned int magic;
1.48 + unsigned int version;
1.49 + struct { unsigned int type, offset; } sections[0];
1.50 +};
1.51 +
1.52 +#define HASHTABLE_MAGIC 0x7a7a7a7a
1.53 +#define HASHTABLE_VERSION 1
1.54 +#define HASHTABLE_BUCKETS 1
1.55 +#define HASHTABLE_STRINGS 2
1.56 +
1.57 +struct hashtable {
1.58 + unsigned long *buckets;
1.59 + int bucket_count, bucket_alloc;
1.60 + char *string_pool;
1.61 + int pool_size, pool_alloc;
1.62 + struct hashtable_header *header;
1.63 +};
1.64 +
1.65 +static void *
1.66 +zalloc(size_t size)
1.67 +{
1.68 + void *p;
1.69 +
1.70 + p = malloc(size);
1.71 + memset(p, 0, size);
1.72 +
1.73 + return p;
1.74 +}
1.75 +
1.76 +struct hashtable *
1.77 +hashtable_create(void)
1.78 +{
1.79 + struct hashtable *ht;
1.80 +
1.81 + ht = zalloc(sizeof *ht);
1.82 + ht->buckets = zalloc(4096 * sizeof *ht->buckets);
1.83 + ht->bucket_count = 0;
1.84 + ht->bucket_alloc = 4096;
1.85 +
1.86 + ht->string_pool = zalloc(4096);
1.87 + ht->pool_size = 1;
1.88 + ht->pool_alloc = 4096;
1.89 +
1.90 + return ht;
1.91 +}
1.92 +
1.93 +struct hashtable *
1.94 +hashtable_create_from_file(const char *filename)
1.95 +{
1.96 + struct hashtable *ht;
1.97 + struct stat stat;
1.98 + unsigned int size, offset;
1.99 + int fd, i;
1.100 +
1.101 + ht = zalloc(sizeof *ht);
1.102 + fd = open(filename, O_RDONLY);
1.103 + if (fstat(fd, &stat) < 0)
1.104 + return NULL;
1.105 + ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
1.106 + if (ht->header == MAP_FAILED) {
1.107 + free(ht);
1.108 + return NULL;
1.109 + }
1.110 +
1.111 + for (i = 0; i < ht->header->sections[i].type; i++) {
1.112 + offset = ht->header->sections[i].offset;
1.113 + size = ht->header->sections[i + 1].offset - offset;
1.114 +
1.115 + switch (ht->header->sections[i].type) {
1.116 + case HASHTABLE_BUCKETS:
1.117 + ht->buckets = (void *) ht->header + offset;
1.118 + ht->bucket_count = size / sizeof *ht->buckets;
1.119 + ht->bucket_alloc = ht->bucket_count;
1.120 + break;
1.121 + case HASHTABLE_STRINGS:
1.122 + ht->string_pool = (void *) ht->header + offset;
1.123 + ht->pool_size = size;
1.124 + ht->pool_alloc = size;
1.125 + break;
1.126 + }
1.127 + }
1.128 + close(fd);
1.129 +
1.130 + return ht;
1.131 +}
1.132 +
1.133 +void
1.134 +hashtable_destroy(struct hashtable *ht)
1.135 +{
1.136 + unsigned int size;
1.137 + int i;
1.138 +
1.139 + if (ht->header) {
1.140 + for (i = 0; ht->header->sections[i].type; i++)
1.141 + ;
1.142 + size = ht->header->sections[i].type;
1.143 + munmap(ht->header, size);
1.144 + } else {
1.145 + free(ht->buckets);
1.146 + free(ht->string_pool);
1.147 + }
1.148 +
1.149 + free(ht);
1.150 +}
1.151 +
1.152 +static int
1.153 +hashtable_write(struct hashtable *ht, const char *filename)
1.154 +{
1.155 + int fd;
1.156 + char data[4096];
1.157 + struct hashtable_header *header = (struct hashtable_header *) data;
1.158 +
1.159 + memset(data, 0, sizeof data);
1.160 + header->magic = HASHTABLE_MAGIC;
1.161 + header->version = HASHTABLE_VERSION;
1.162 +
1.163 + header->sections[0].type = HASHTABLE_BUCKETS;
1.164 + header->sections[0].offset = sizeof data;
1.165 +
1.166 + header->sections[1].type = HASHTABLE_STRINGS;
1.167 + header->sections[1].offset =
1.168 + sizeof data + ht->bucket_alloc * sizeof *ht->buckets;
1.169 +
1.170 + header->sections[2].type = 0;
1.171 + header->sections[2].offset =
1.172 + header->sections[1].offset + ht->pool_size;
1.173 +
1.174 + fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
1.175 + if (fd < 0)
1.176 + return -1;
1.177 +
1.178 + write_to_fd(fd, data, sizeof data);
1.179 + write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets);
1.180 + write_to_fd(fd, ht->string_pool, ht->pool_size);
1.181 +
1.182 + return 0;
1.183 +}
1.184 +
1.185 +static unsigned int
1.186 +hash_string(const char *key)
1.187 +{
1.188 + const char *p;
1.189 + unsigned int hash = 0;
1.190 +
1.191 + for (p = key; *p; p++)
1.192 + hash = (hash << 2) ^ *p;
1.193 +
1.194 + return hash;
1.195 +}
1.196 +
1.197 +unsigned long
1.198 +hashtable_lookup(struct hashtable *ht, const char *key)
1.199 +{
1.200 + unsigned int start;
1.201 + unsigned int mask;
1.202 + unsigned long value;
1.203 + int i;
1.204 +
1.205 + mask = ht->bucket_alloc - 1;
1.206 + start = hash_string(key) & mask;
1.207 + i = start;
1.208 + do {
1.209 + value = ht->buckets[i];
1.210 +
1.211 + if (value == 0)
1.212 + return 0;
1.213 +
1.214 + if (strcmp(key, &ht->string_pool[value]) == 0)
1.215 + return value;
1.216 +
1.217 + i = (i + 1) & mask;
1.218 + } while (i != start);
1.219 +
1.220 + return 0;
1.221 +}
1.222 +
1.223 +static unsigned long
1.224 +add_to_string_pool(struct hashtable *ht, const char *key)
1.225 +{
1.226 + int len, alloc;
1.227 + char *pool;
1.228 + unsigned long value;
1.229 +
1.230 + len = strlen(key) + 1;
1.231 + alloc = ht->pool_alloc;
1.232 + while (alloc < ht->pool_size + len)
1.233 + alloc *= 2;
1.234 + if (ht->pool_alloc < alloc) {
1.235 + pool = realloc(ht->string_pool, alloc);
1.236 + if (pool == NULL)
1.237 + return 0;
1.238 + ht->string_pool = pool;
1.239 + ht->pool_alloc = alloc;
1.240 + }
1.241 +
1.242 + memcpy(ht->string_pool + ht->pool_size, key, len);
1.243 + value = ht->pool_size;
1.244 + ht->pool_size += len;
1.245 +
1.246 + return value;
1.247 +}
1.248 +
1.249 +static void
1.250 +do_insert(struct hashtable *ht, unsigned long value)
1.251 +{
1.252 + unsigned int mask;
1.253 + const char *key;
1.254 + int i, start;
1.255 +
1.256 + key = &ht->string_pool[value];
1.257 + mask = ht->bucket_alloc - 1;
1.258 + start = hash_string(key) & mask;
1.259 + i = start;
1.260 + do {
1.261 + if (ht->buckets[i] == 0) {
1.262 + ht->buckets[i] = value;
1.263 + break;
1.264 + }
1.265 + i = (i + 1) & mask;
1.266 + } while (i != start);
1.267 +}
1.268 +
1.269 +unsigned long
1.270 +hashtable_insert(struct hashtable *ht, const char *key)
1.271 +{
1.272 + unsigned long value, *buckets, *old_buckets;
1.273 + int i, alloc, old_alloc;
1.274 +
1.275 + alloc = ht->bucket_alloc;
1.276 + while (alloc < 4 * ht->bucket_count)
1.277 + alloc *= 2;
1.278 +
1.279 + if (alloc != ht->bucket_alloc) {
1.280 + buckets = zalloc(alloc * sizeof *ht->buckets);
1.281 + if (buckets == NULL)
1.282 + return 0;
1.283 + old_buckets = ht->buckets;
1.284 + ht->buckets = buckets;
1.285 + old_alloc = ht->bucket_alloc;
1.286 + ht->bucket_alloc = alloc;
1.287 +
1.288 + for (i = 0; i < old_alloc; i++) {
1.289 + value = old_buckets[i];
1.290 + if (value != 0)
1.291 + do_insert(ht, value);
1.292 + }
1.293 + free(old_buckets);
1.294 + }
1.295 +
1.296 + value = add_to_string_pool(ht, key);
1.297 + do_insert (ht, value);
1.298 + ht->bucket_count++;
1.299 +
1.300 + return value;
1.301 +}
1.302 +
1.303 +struct razor_context {
1.304 + struct hashtable *global_ht;
1.305 +};
1.306 +
1.307 +struct razor_context *
1.308 +razor_context_create (void)
1.309 +{
1.310 + struct razor_context *ctx;
1.311 +
1.312 + ctx = malloc(sizeof *ctx);
1.313 + ctx->global_ht = hashtable_create();
1.314 +
1.315 + return ctx;
1.316 +}
1.317 +
1.318 +struct razor_context *
1.319 +razor_context_create_from_file (const char *filename)
1.320 +{
1.321 + struct razor_context *ctx;
1.322 +
1.323 + ctx = malloc(sizeof *ctx);
1.324 + ctx->global_ht = hashtable_create_from_file(filename);
1.325 +
1.326 + return ctx;
1.327 +}
1.328 +
1.329 +unsigned long
1.330 +razor_context_tokenize(struct razor_context *ctx, const char *string)
1.331 +{
1.332 + unsigned long token;
1.333 +
1.334 + token = hashtable_lookup(ctx->global_ht, string);
1.335 + if (token != 0)
1.336 + return token;
1.337 +
1.338 + return hashtable_insert(ctx->global_ht, string);
1.339 +}
1.340 +
1.341 +struct razor_set {
1.342 + struct razor_context *ctx;
1.343 +};
1.344 +
1.345 +struct parsing_context {
1.346 + struct razor_context *ctx;
1.347 +};
1.348 +
1.349 +static void
1.350 +start_element(void *data, const char *name, const char **atts)
1.351 +{
1.352 + struct parsing_context *ctx = data;
1.353 + int i;
1.354 +
1.355 + for (i = 0; atts[i]; i += 2)
1.356 + razor_context_tokenize(ctx->ctx, atts[i + 1]);
1.357 +}
1.358 +
1.359 +static void
1.360 +end_element (void *data, const char *name)
1.361 +{
1.362 +}
1.363 +
1.364 +static char *
1.365 +sha1_to_hex(const unsigned char *sha1)
1.366 +{
1.367 + static int bufno;
1.368 + static char hexbuffer[4][50];
1.369 + static const char hex[] = "0123456789abcdef";
1.370 + char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
1.371 + int i;
1.372 +
1.373 + for (i = 0; i < 20; i++) {
1.374 + unsigned int val = *sha1++;
1.375 + *buf++ = hex[val >> 4];
1.376 + *buf++ = hex[val & 0xf];
1.377 + }
1.378 + *buf = '\0';
1.379 +
1.380 + return buffer;
1.381 +}
1.382 +
1.383 +static int
1.384 +razor_context_read_file(struct razor_context *ctx, const char *filename)
1.385 +{
1.386 + SHA_CTX sha1;
1.387 + XML_Parser parser;
1.388 + struct parsing_context pctx;
1.389 + int fd;
1.390 + void *p;
1.391 + struct stat stat;
1.392 + char buf[128];
1.393 + unsigned char hash[20];
1.394 +
1.395 + fd = open(filename, O_RDONLY);
1.396 + if (fstat(fd, &stat) < 0)
1.397 + return -1;
1.398 + p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
1.399 + if (p == MAP_FAILED)
1.400 + return -1;
1.401 +
1.402 + parser = XML_ParserCreate(NULL);
1.403 + pctx.ctx = ctx;
1.404 + XML_SetUserData(parser, &pctx);
1.405 + XML_SetElementHandler(parser, start_element, end_element);
1.406 + if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) {
1.407 + fprintf(stderr,
1.408 + "%s at line %d, %s\n",
1.409 + XML_ErrorString(XML_GetErrorCode(parser)),
1.410 + XML_GetCurrentLineNumber(parser),
1.411 + filename);
1.412 + return 1;
1.413 + }
1.414 +
1.415 + XML_ParserFree(parser);
1.416 +
1.417 + SHA1_Init(&sha1);
1.418 + SHA1_Update(&sha1, p, stat.st_size);
1.419 + SHA1_Final(hash, &sha1);
1.420 +
1.421 + close(fd);
1.422 +
1.423 + snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash));
1.424 + if (write_to_file(buf, p, stat.st_size) < 0)
1.425 + return -1;
1.426 + munmap(p, stat.st_size);
1.427 +
1.428 + return 0;
1.429 +}
1.430 +
1.431 +int
1.432 +razor_context_write(struct razor_context *ctx, const char *filename)
1.433 +{
1.434 + return hashtable_write(ctx->global_ht, filename);
1.435 +}
1.436 +
1.437 +void
1.438 +razor_context_destroy(struct razor_context *ctx)
1.439 +{
1.440 + hashtable_destroy(ctx->global_ht);
1.441 + free(ctx);
1.442 +}
1.443 +
1.444 +static int
1.445 +usage(void)
1.446 +{
1.447 + printf("usage: razor [ import FILES | lookup <key> ]\n");
1.448 + exit(1);
1.449 +}
1.450 +
1.451 +static const char repo_filename[] = "system.repo";
1.452 +
1.453 +int
1.454 +main(int argc, char *argv[])
1.455 +{
1.456 + int i;
1.457 + struct razor_context *ctx;
1.458 + struct stat statbuf;
1.459 +
1.460 + if (argc < 3) {
1.461 + usage();
1.462 + } else if (strcmp(argv[1], "import") == 0) {
1.463 + if (stat("set", &statbuf) && mkdir("set", 0777)) {
1.464 + fprintf(stderr, "could not create directory 'set'\n");
1.465 + exit(-1);
1.466 + }
1.467 +
1.468 + ctx = razor_context_create();
1.469 +
1.470 + for (i = 2; i < argc; i++) {
1.471 + if (razor_context_read_file(ctx, argv[i]) < 0) {
1.472 + fprintf(stderr, "failed to import %s\n",
1.473 + argv[i]);
1.474 + exit(-1);
1.475 + }
1.476 + }
1.477 +
1.478 + printf("number of buckets: %d\n",
1.479 + ctx->global_ht->bucket_count);
1.480 + printf("bucket allocation: %d\n",
1.481 + ctx->global_ht->bucket_alloc);
1.482 + printf("pool size: %d\n", ctx->global_ht->pool_size);
1.483 + printf("pool allocation: %d\n", ctx->global_ht->pool_alloc);
1.484 +
1.485 + razor_context_write(ctx, repo_filename);
1.486 +
1.487 + razor_context_destroy(ctx);
1.488 + } else if (strcmp(argv[1], "lookup") == 0) {
1.489 + ctx = razor_context_create_from_file(repo_filename);
1.490 + printf("%s is %lu\n", argv[2],
1.491 + hashtable_lookup(ctx->global_ht, argv[2]));
1.492 + razor_context_destroy(ctx);
1.493 + } else {
1.494 + usage();
1.495 + }
1.496 +
1.497 + return 0;
1.498 +}