razor.c
changeset 0 e15eb9ef9c28
child 3 917677cdceb3
     1.1 --- /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.2 +++ b/razor.c	Mon Sep 03 14:36:59 2007 -0400
     1.3 @@ -0,0 +1,495 @@
     1.4 +#include <stdlib.h>
     1.5 +#include <stdio.h>
     1.6 +#include <string.h>
     1.7 +#include <sys/types.h>
     1.8 +#include <sys/stat.h>
     1.9 +#include <sys/mman.h>
    1.10 +#include <unistd.h>
    1.11 +#include <fcntl.h>
    1.12 +
    1.13 +#include <expat.h>
    1.14 +#include "sha1.h"
    1.15 +
    1.16 +static int
    1.17 +write_to_fd(int fd, void *p, size_t size)
    1.18 +{
    1.19 +	int rest, len;
    1.20 +
    1.21 +	rest = size;
    1.22 +	while (rest > 0) {
    1.23 +		len = write(fd, p, rest);
    1.24 +		if (len < 0)
    1.25 +			return -1;
    1.26 +		rest -= len;
    1.27 +	}
    1.28 +
    1.29 +	return 0;
    1.30 +}
    1.31 +
    1.32 +static int
    1.33 +write_to_file(const char *filename, void *p, size_t size)
    1.34 +{
    1.35 +	int fd, err;
    1.36 +
    1.37 +	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
    1.38 +	if (fd < 0)
    1.39 +		return -1;
    1.40 +	err = write_to_fd(fd, p, size);
    1.41 +	close(fd);
    1.42 +
    1.43 +	return err;
    1.44 +}
    1.45 +
    1.46 +struct hashtable_header {
    1.47 +	unsigned int magic;
    1.48 +	unsigned int version;
    1.49 +	struct { unsigned int type, offset; } sections[0];
    1.50 +};
    1.51 +
    1.52 +#define HASHTABLE_MAGIC 0x7a7a7a7a
    1.53 +#define HASHTABLE_VERSION 1
    1.54 +#define HASHTABLE_BUCKETS 1
    1.55 +#define HASHTABLE_STRINGS 2
    1.56 +
    1.57 +struct hashtable {
    1.58 +	unsigned long *buckets;
    1.59 +	int bucket_count, bucket_alloc;
    1.60 +	char *string_pool;
    1.61 +	int pool_size, pool_alloc;
    1.62 +	struct hashtable_header *header;
    1.63 +};
    1.64 +
    1.65 +static void *
    1.66 +zalloc(size_t size)
    1.67 +{
    1.68 +	void *p;
    1.69 +
    1.70 +	p = malloc(size);
    1.71 +	memset(p, 0, size);
    1.72 +
    1.73 +	return p;
    1.74 +}
    1.75 +
    1.76 +struct hashtable *
    1.77 +hashtable_create(void)
    1.78 +{
    1.79 +	struct hashtable *ht;
    1.80 +
    1.81 +	ht = zalloc(sizeof *ht);
    1.82 +	ht->buckets = zalloc(4096 * sizeof *ht->buckets);
    1.83 +	ht->bucket_count = 0;
    1.84 +	ht->bucket_alloc = 4096;
    1.85 +
    1.86 +	ht->string_pool = zalloc(4096);
    1.87 +	ht->pool_size = 1;
    1.88 +	ht->pool_alloc = 4096;
    1.89 +
    1.90 +	return ht;
    1.91 +}
    1.92 +
    1.93 +struct hashtable *
    1.94 +hashtable_create_from_file(const char *filename)
    1.95 +{
    1.96 +	struct hashtable *ht;
    1.97 +	struct stat stat;
    1.98 +	unsigned int size, offset;
    1.99 +	int fd, i;
   1.100 +
   1.101 +	ht = zalloc(sizeof *ht);
   1.102 +	fd = open(filename, O_RDONLY);
   1.103 +	if (fstat(fd, &stat) < 0)
   1.104 +		return NULL;
   1.105 +	ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   1.106 +	if (ht->header == MAP_FAILED) {
   1.107 +		free(ht);
   1.108 +		return NULL;
   1.109 +	}
   1.110 +
   1.111 +	for (i = 0; i < ht->header->sections[i].type; i++) {
   1.112 +		offset = ht->header->sections[i].offset;
   1.113 +		size = ht->header->sections[i + 1].offset - offset;
   1.114 +
   1.115 +		switch (ht->header->sections[i].type) {
   1.116 +		case HASHTABLE_BUCKETS:
   1.117 +			ht->buckets = (void *) ht->header + offset;
   1.118 +			ht->bucket_count = size / sizeof *ht->buckets;
   1.119 +			ht->bucket_alloc = ht->bucket_count;
   1.120 +			break;
   1.121 +		case HASHTABLE_STRINGS:
   1.122 +			ht->string_pool = (void *) ht->header + offset;
   1.123 +			ht->pool_size = size;
   1.124 +			ht->pool_alloc = size;
   1.125 +			break;
   1.126 +		}
   1.127 +	}
   1.128 +	close(fd);
   1.129 +
   1.130 +	return ht;
   1.131 +}
   1.132 +
   1.133 +void
   1.134 +hashtable_destroy(struct hashtable *ht)
   1.135 +{
   1.136 +	unsigned int size;
   1.137 +	int i;
   1.138 +
   1.139 +	if (ht->header) {
   1.140 +		for (i = 0; ht->header->sections[i].type; i++)
   1.141 +			;
   1.142 +		size = ht->header->sections[i].type;
   1.143 +		munmap(ht->header, size);
   1.144 +	} else {
   1.145 +		free(ht->buckets);
   1.146 +		free(ht->string_pool);
   1.147 +	}
   1.148 +
   1.149 +	free(ht);
   1.150 +}
   1.151 +
   1.152 +static int
   1.153 +hashtable_write(struct hashtable *ht, const char *filename)
   1.154 +{
   1.155 +	int fd;
   1.156 +	char data[4096];
   1.157 +	struct hashtable_header *header = (struct hashtable_header *) data;
   1.158 +
   1.159 +	memset(data, 0, sizeof data);
   1.160 +	header->magic = HASHTABLE_MAGIC;
   1.161 +	header->version = HASHTABLE_VERSION;
   1.162 +
   1.163 +	header->sections[0].type = HASHTABLE_BUCKETS;
   1.164 +	header->sections[0].offset = sizeof data;
   1.165 +
   1.166 +	header->sections[1].type = HASHTABLE_STRINGS;
   1.167 +	header->sections[1].offset =
   1.168 +		sizeof data + ht->bucket_alloc * sizeof *ht->buckets;
   1.169 +
   1.170 +	header->sections[2].type = 0;
   1.171 +	header->sections[2].offset =
   1.172 +		header->sections[1].offset + ht->pool_size;
   1.173 +
   1.174 +	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
   1.175 +	if (fd < 0)
   1.176 +		return -1;
   1.177 +
   1.178 +	write_to_fd(fd, data, sizeof data);
   1.179 +	write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets);
   1.180 +	write_to_fd(fd, ht->string_pool, ht->pool_size);
   1.181 +
   1.182 +	return 0;
   1.183 +}
   1.184 +
   1.185 +static unsigned int
   1.186 +hash_string(const char *key)
   1.187 +{
   1.188 +	const char *p;
   1.189 +	unsigned int hash = 0;
   1.190 +
   1.191 +	for (p = key; *p; p++)
   1.192 +		hash = (hash << 2) ^ *p;
   1.193 +
   1.194 +	return hash;
   1.195 +}
   1.196 +
   1.197 +unsigned long
   1.198 +hashtable_lookup(struct hashtable *ht, const char *key)
   1.199 +{
   1.200 +	unsigned int start;
   1.201 +	unsigned int mask;
   1.202 +	unsigned long value;
   1.203 +	int i;
   1.204 +
   1.205 +	mask = ht->bucket_alloc - 1;
   1.206 +	start = hash_string(key) & mask;
   1.207 +	i = start;
   1.208 +	do {
   1.209 +		value = ht->buckets[i];
   1.210 +
   1.211 +		if (value == 0)
   1.212 +			return 0;
   1.213 +
   1.214 +		if (strcmp(key, &ht->string_pool[value]) == 0)
   1.215 +			return value;
   1.216 +
   1.217 +		i = (i + 1) & mask;
   1.218 +	} while (i != start);
   1.219 +
   1.220 +	return 0;
   1.221 +}
   1.222 +
   1.223 +static unsigned long
   1.224 +add_to_string_pool(struct hashtable *ht, const char *key)
   1.225 +{
   1.226 +	int len, alloc;
   1.227 +	char *pool;
   1.228 +	unsigned long value;
   1.229 +
   1.230 +	len = strlen(key) + 1;
   1.231 +	alloc = ht->pool_alloc;
   1.232 +	while (alloc < ht->pool_size + len)
   1.233 +		alloc *= 2;
   1.234 +	if (ht->pool_alloc < alloc) {
   1.235 +		pool = realloc(ht->string_pool, alloc);
   1.236 +		if (pool == NULL)
   1.237 +			return 0;
   1.238 +		ht->string_pool = pool;
   1.239 +		ht->pool_alloc = alloc;
   1.240 +	}
   1.241 +
   1.242 +	memcpy(ht->string_pool + ht->pool_size, key, len);
   1.243 +	value = ht->pool_size;
   1.244 +	ht->pool_size += len;
   1.245 +
   1.246 +	return value;
   1.247 +}
   1.248 +
   1.249 +static void
   1.250 +do_insert(struct hashtable *ht, unsigned long value)
   1.251 +{
   1.252 +	unsigned int mask;
   1.253 +	const char *key;
   1.254 +	int i, start;
   1.255 +
   1.256 +	key = &ht->string_pool[value];
   1.257 +	mask = ht->bucket_alloc - 1;
   1.258 +	start = hash_string(key) & mask;
   1.259 +	i = start;
   1.260 +	do {
   1.261 +		if (ht->buckets[i] == 0) {
   1.262 +			ht->buckets[i] = value;
   1.263 +			break;
   1.264 +		}
   1.265 +		i = (i + 1) & mask;
   1.266 +	} while (i != start);
   1.267 +}
   1.268 +
   1.269 +unsigned long
   1.270 +hashtable_insert(struct hashtable *ht, const char *key)
   1.271 +{
   1.272 +	unsigned long value, *buckets, *old_buckets;
   1.273 +	int i, alloc, old_alloc;
   1.274 +
   1.275 +	alloc = ht->bucket_alloc;
   1.276 +	while (alloc < 4 * ht->bucket_count)
   1.277 +		alloc *= 2;
   1.278 +
   1.279 +	if (alloc != ht->bucket_alloc) {
   1.280 +		buckets = zalloc(alloc * sizeof *ht->buckets);
   1.281 +		if (buckets == NULL)
   1.282 +			return 0;
   1.283 +		old_buckets = ht->buckets;
   1.284 +		ht->buckets = buckets;
   1.285 +		old_alloc = ht->bucket_alloc;
   1.286 +		ht->bucket_alloc = alloc;
   1.287 +		
   1.288 +		for (i = 0; i < old_alloc; i++) {
   1.289 +			value = old_buckets[i];
   1.290 +			if (value != 0)
   1.291 +				do_insert(ht, value);
   1.292 +		}
   1.293 +		free(old_buckets);
   1.294 +	}
   1.295 +
   1.296 +	value = add_to_string_pool(ht, key);
   1.297 +	do_insert (ht, value);
   1.298 +	ht->bucket_count++;
   1.299 +
   1.300 +	return value;
   1.301 +}
   1.302 +
   1.303 +struct razor_context {
   1.304 +	struct hashtable *global_ht;
   1.305 +};
   1.306 +
   1.307 +struct razor_context *
   1.308 +razor_context_create (void)
   1.309 +{
   1.310 +	struct razor_context *ctx;
   1.311 +
   1.312 +	ctx = malloc(sizeof *ctx);
   1.313 +	ctx->global_ht = hashtable_create();
   1.314 +
   1.315 +	return ctx;
   1.316 +}
   1.317 +
   1.318 +struct razor_context *
   1.319 +razor_context_create_from_file (const char *filename)
   1.320 +{
   1.321 +	struct razor_context *ctx;
   1.322 +
   1.323 +	ctx = malloc(sizeof *ctx);
   1.324 +	ctx->global_ht = hashtable_create_from_file(filename);
   1.325 +
   1.326 +	return ctx;
   1.327 +}
   1.328 +
   1.329 +unsigned long
   1.330 +razor_context_tokenize(struct razor_context *ctx, const char *string)
   1.331 +{
   1.332 +	unsigned long token;
   1.333 +
   1.334 +	token = hashtable_lookup(ctx->global_ht, string);
   1.335 +	if (token != 0)
   1.336 +		return token;
   1.337 +
   1.338 +	return hashtable_insert(ctx->global_ht, string);
   1.339 +}
   1.340 +
   1.341 +struct razor_set {
   1.342 +	struct razor_context *ctx;
   1.343 +};
   1.344 +
   1.345 +struct parsing_context {
   1.346 +	struct razor_context *ctx;
   1.347 +};
   1.348 +
   1.349 +static void
   1.350 +start_element(void *data, const char *name, const char **atts)
   1.351 +{
   1.352 +	struct parsing_context *ctx = data;
   1.353 +	int i;
   1.354 +
   1.355 +	for (i = 0; atts[i]; i += 2)
   1.356 +		razor_context_tokenize(ctx->ctx, atts[i + 1]);
   1.357 +}
   1.358 +
   1.359 +static void
   1.360 +end_element (void *data, const char *name)
   1.361 +{
   1.362 +}
   1.363 +
   1.364 +static char *
   1.365 +sha1_to_hex(const unsigned char *sha1)
   1.366 +{
   1.367 +	static int bufno;
   1.368 +	static char hexbuffer[4][50];
   1.369 +	static const char hex[] = "0123456789abcdef";
   1.370 +	char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
   1.371 +	int i;
   1.372 +
   1.373 +	for (i = 0; i < 20; i++) {
   1.374 +		unsigned int val = *sha1++;
   1.375 +		*buf++ = hex[val >> 4];
   1.376 +		*buf++ = hex[val & 0xf];
   1.377 +	}
   1.378 +	*buf = '\0';
   1.379 +
   1.380 +	return buffer;
   1.381 +}
   1.382 +
   1.383 +static int
   1.384 +razor_context_read_file(struct razor_context *ctx, const char *filename)
   1.385 +{
   1.386 +	SHA_CTX sha1;
   1.387 +	XML_Parser parser;
   1.388 +	struct parsing_context pctx;
   1.389 +	int fd;
   1.390 +	void *p;
   1.391 +	struct stat stat;
   1.392 +	char buf[128];
   1.393 +	unsigned char hash[20];
   1.394 +
   1.395 +	fd = open(filename, O_RDONLY);
   1.396 +	if (fstat(fd, &stat) < 0)
   1.397 +		return -1;
   1.398 +	p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   1.399 +	if (p == MAP_FAILED)
   1.400 +		return -1;
   1.401 +
   1.402 +	parser = XML_ParserCreate(NULL);
   1.403 +	pctx.ctx = ctx;
   1.404 +	XML_SetUserData(parser, &pctx);
   1.405 +	XML_SetElementHandler(parser, start_element, end_element);
   1.406 +	if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) {
   1.407 +		fprintf(stderr,
   1.408 +			"%s at line %d, %s\n",
   1.409 +			XML_ErrorString(XML_GetErrorCode(parser)),
   1.410 +			XML_GetCurrentLineNumber(parser),
   1.411 +			filename);
   1.412 +		return 1;
   1.413 +	}
   1.414 +
   1.415 +	XML_ParserFree(parser);
   1.416 +
   1.417 +	SHA1_Init(&sha1);
   1.418 +	SHA1_Update(&sha1, p, stat.st_size);
   1.419 +	SHA1_Final(hash, &sha1);
   1.420 +
   1.421 +	close(fd);
   1.422 +
   1.423 +	snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash));
   1.424 +	if (write_to_file(buf, p, stat.st_size) < 0)
   1.425 +		return -1;
   1.426 +	munmap(p, stat.st_size);
   1.427 +
   1.428 +	return 0;
   1.429 +}
   1.430 +
   1.431 +int
   1.432 +razor_context_write(struct razor_context *ctx, const char *filename)
   1.433 +{
   1.434 +	return hashtable_write(ctx->global_ht, filename);
   1.435 +}
   1.436 +
   1.437 +void
   1.438 +razor_context_destroy(struct razor_context *ctx)
   1.439 +{
   1.440 +	hashtable_destroy(ctx->global_ht);
   1.441 +	free(ctx);
   1.442 +}
   1.443 +
   1.444 +static int
   1.445 +usage(void)
   1.446 +{
   1.447 +	printf("usage: razor [ import FILES | lookup <key> ]\n");
   1.448 +	exit(1);
   1.449 +}
   1.450 +
   1.451 +static const char repo_filename[] = "system.repo";
   1.452 +
   1.453 +int
   1.454 +main(int argc, char *argv[])
   1.455 +{
   1.456 +	int i;
   1.457 +	struct razor_context *ctx;
   1.458 +	struct stat statbuf;
   1.459 +
   1.460 +	if (argc < 3) {
   1.461 +		usage();
   1.462 +	} else if (strcmp(argv[1], "import") == 0) {
   1.463 +		if (stat("set", &statbuf) && mkdir("set", 0777)) {
   1.464 +			fprintf(stderr, "could not create directory 'set'\n");
   1.465 +			exit(-1);
   1.466 +		}
   1.467 +			
   1.468 +		ctx = razor_context_create();
   1.469 +
   1.470 +		for (i = 2; i < argc; i++) {
   1.471 +			if (razor_context_read_file(ctx, argv[i]) < 0) {
   1.472 +				fprintf(stderr, "failed to import %s\n",
   1.473 +					argv[i]);
   1.474 +				exit(-1);
   1.475 +			}
   1.476 +		}
   1.477 +
   1.478 +		printf("number of buckets: %d\n",
   1.479 +		       ctx->global_ht->bucket_count);
   1.480 +		printf("bucket allocation: %d\n",
   1.481 +		       ctx->global_ht->bucket_alloc);
   1.482 +		printf("pool size: %d\n", ctx->global_ht->pool_size);
   1.483 +		printf("pool allocation: %d\n", ctx->global_ht->pool_alloc);
   1.484 +
   1.485 +		razor_context_write(ctx, repo_filename);
   1.486 +
   1.487 +		razor_context_destroy(ctx);
   1.488 +	} else if (strcmp(argv[1], "lookup") == 0) {
   1.489 +		ctx = razor_context_create_from_file(repo_filename);
   1.490 +		printf("%s is %lu\n", argv[2],
   1.491 +		       hashtable_lookup(ctx->global_ht, argv[2]));
   1.492 +		razor_context_destroy(ctx);
   1.493 +	} else {
   1.494 +		usage();
   1.495 +	}
   1.496 +
   1.497 +	return 0;
   1.498 +}