razor.c
author Kristian H?gsberg <krh@redhat.com>
Mon Sep 03 23:10:25 2007 -0400 (2007-09-03)
changeset 2 08c5b27e1952
child 3 917677cdceb3
permissions -rw-r--r--
Improve import script to also pull version and release fields.
     1 #include <stdlib.h>
     2 #include <stdio.h>
     3 #include <string.h>
     4 #include <sys/types.h>
     5 #include <sys/stat.h>
     6 #include <sys/mman.h>
     7 #include <unistd.h>
     8 #include <fcntl.h>
     9 
    10 #include <expat.h>
    11 #include "sha1.h"
    12 
    13 static int
    14 write_to_fd(int fd, void *p, size_t size)
    15 {
    16 	int rest, len;
    17 
    18 	rest = size;
    19 	while (rest > 0) {
    20 		len = write(fd, p, rest);
    21 		if (len < 0)
    22 			return -1;
    23 		rest -= len;
    24 	}
    25 
    26 	return 0;
    27 }
    28 
    29 static int
    30 write_to_file(const char *filename, void *p, size_t size)
    31 {
    32 	int fd, err;
    33 
    34 	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
    35 	if (fd < 0)
    36 		return -1;
    37 	err = write_to_fd(fd, p, size);
    38 	close(fd);
    39 
    40 	return err;
    41 }
    42 
    43 struct hashtable_header {
    44 	unsigned int magic;
    45 	unsigned int version;
    46 	struct { unsigned int type, offset; } sections[0];
    47 };
    48 
    49 #define HASHTABLE_MAGIC 0x7a7a7a7a
    50 #define HASHTABLE_VERSION 1
    51 #define HASHTABLE_BUCKETS 1
    52 #define HASHTABLE_STRINGS 2
    53 
    54 struct hashtable {
    55 	unsigned long *buckets;
    56 	int bucket_count, bucket_alloc;
    57 	char *string_pool;
    58 	int pool_size, pool_alloc;
    59 	struct hashtable_header *header;
    60 };
    61 
    62 static void *
    63 zalloc(size_t size)
    64 {
    65 	void *p;
    66 
    67 	p = malloc(size);
    68 	memset(p, 0, size);
    69 
    70 	return p;
    71 }
    72 
    73 struct hashtable *
    74 hashtable_create(void)
    75 {
    76 	struct hashtable *ht;
    77 
    78 	ht = zalloc(sizeof *ht);
    79 	ht->buckets = zalloc(4096 * sizeof *ht->buckets);
    80 	ht->bucket_count = 0;
    81 	ht->bucket_alloc = 4096;
    82 
    83 	ht->string_pool = zalloc(4096);
    84 	ht->pool_size = 1;
    85 	ht->pool_alloc = 4096;
    86 
    87 	return ht;
    88 }
    89 
    90 struct hashtable *
    91 hashtable_create_from_file(const char *filename)
    92 {
    93 	struct hashtable *ht;
    94 	struct stat stat;
    95 	unsigned int size, offset;
    96 	int fd, i;
    97 
    98 	ht = zalloc(sizeof *ht);
    99 	fd = open(filename, O_RDONLY);
   100 	if (fstat(fd, &stat) < 0)
   101 		return NULL;
   102 	ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   103 	if (ht->header == MAP_FAILED) {
   104 		free(ht);
   105 		return NULL;
   106 	}
   107 
   108 	for (i = 0; i < ht->header->sections[i].type; i++) {
   109 		offset = ht->header->sections[i].offset;
   110 		size = ht->header->sections[i + 1].offset - offset;
   111 
   112 		switch (ht->header->sections[i].type) {
   113 		case HASHTABLE_BUCKETS:
   114 			ht->buckets = (void *) ht->header + offset;
   115 			ht->bucket_count = size / sizeof *ht->buckets;
   116 			ht->bucket_alloc = ht->bucket_count;
   117 			break;
   118 		case HASHTABLE_STRINGS:
   119 			ht->string_pool = (void *) ht->header + offset;
   120 			ht->pool_size = size;
   121 			ht->pool_alloc = size;
   122 			break;
   123 		}
   124 	}
   125 	close(fd);
   126 
   127 	return ht;
   128 }
   129 
   130 void
   131 hashtable_destroy(struct hashtable *ht)
   132 {
   133 	unsigned int size;
   134 	int i;
   135 
   136 	if (ht->header) {
   137 		for (i = 0; ht->header->sections[i].type; i++)
   138 			;
   139 		size = ht->header->sections[i].type;
   140 		munmap(ht->header, size);
   141 	} else {
   142 		free(ht->buckets);
   143 		free(ht->string_pool);
   144 	}
   145 
   146 	free(ht);
   147 }
   148 
   149 static int
   150 hashtable_write(struct hashtable *ht, const char *filename)
   151 {
   152 	int fd;
   153 	char data[4096];
   154 	struct hashtable_header *header = (struct hashtable_header *) data;
   155 
   156 	memset(data, 0, sizeof data);
   157 	header->magic = HASHTABLE_MAGIC;
   158 	header->version = HASHTABLE_VERSION;
   159 
   160 	header->sections[0].type = HASHTABLE_BUCKETS;
   161 	header->sections[0].offset = sizeof data;
   162 
   163 	header->sections[1].type = HASHTABLE_STRINGS;
   164 	header->sections[1].offset =
   165 		sizeof data + ht->bucket_alloc * sizeof *ht->buckets;
   166 
   167 	header->sections[2].type = 0;
   168 	header->sections[2].offset =
   169 		header->sections[1].offset + ht->pool_size;
   170 
   171 	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
   172 	if (fd < 0)
   173 		return -1;
   174 
   175 	write_to_fd(fd, data, sizeof data);
   176 	write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets);
   177 	write_to_fd(fd, ht->string_pool, ht->pool_size);
   178 
   179 	return 0;
   180 }
   181 
   182 static unsigned int
   183 hash_string(const char *key)
   184 {
   185 	const char *p;
   186 	unsigned int hash = 0;
   187 
   188 	for (p = key; *p; p++)
   189 		hash = (hash << 2) ^ *p;
   190 
   191 	return hash;
   192 }
   193 
   194 unsigned long
   195 hashtable_lookup(struct hashtable *ht, const char *key)
   196 {
   197 	unsigned int start;
   198 	unsigned int mask;
   199 	unsigned long value;
   200 	int i;
   201 
   202 	mask = ht->bucket_alloc - 1;
   203 	start = hash_string(key) & mask;
   204 	i = start;
   205 	do {
   206 		value = ht->buckets[i];
   207 
   208 		if (value == 0)
   209 			return 0;
   210 
   211 		if (strcmp(key, &ht->string_pool[value]) == 0)
   212 			return value;
   213 
   214 		i = (i + 1) & mask;
   215 	} while (i != start);
   216 
   217 	return 0;
   218 }
   219 
   220 static unsigned long
   221 add_to_string_pool(struct hashtable *ht, const char *key)
   222 {
   223 	int len, alloc;
   224 	char *pool;
   225 	unsigned long value;
   226 
   227 	len = strlen(key) + 1;
   228 	alloc = ht->pool_alloc;
   229 	while (alloc < ht->pool_size + len)
   230 		alloc *= 2;
   231 	if (ht->pool_alloc < alloc) {
   232 		pool = realloc(ht->string_pool, alloc);
   233 		if (pool == NULL)
   234 			return 0;
   235 		ht->string_pool = pool;
   236 		ht->pool_alloc = alloc;
   237 	}
   238 
   239 	memcpy(ht->string_pool + ht->pool_size, key, len);
   240 	value = ht->pool_size;
   241 	ht->pool_size += len;
   242 
   243 	return value;
   244 }
   245 
   246 static void
   247 do_insert(struct hashtable *ht, unsigned long value)
   248 {
   249 	unsigned int mask;
   250 	const char *key;
   251 	int i, start;
   252 
   253 	key = &ht->string_pool[value];
   254 	mask = ht->bucket_alloc - 1;
   255 	start = hash_string(key) & mask;
   256 	i = start;
   257 	do {
   258 		if (ht->buckets[i] == 0) {
   259 			ht->buckets[i] = value;
   260 			break;
   261 		}
   262 		i = (i + 1) & mask;
   263 	} while (i != start);
   264 }
   265 
   266 unsigned long
   267 hashtable_insert(struct hashtable *ht, const char *key)
   268 {
   269 	unsigned long value, *buckets, *old_buckets;
   270 	int i, alloc, old_alloc;
   271 
   272 	alloc = ht->bucket_alloc;
   273 	while (alloc < 4 * ht->bucket_count)
   274 		alloc *= 2;
   275 
   276 	if (alloc != ht->bucket_alloc) {
   277 		buckets = zalloc(alloc * sizeof *ht->buckets);
   278 		if (buckets == NULL)
   279 			return 0;
   280 		old_buckets = ht->buckets;
   281 		ht->buckets = buckets;
   282 		old_alloc = ht->bucket_alloc;
   283 		ht->bucket_alloc = alloc;
   284 		
   285 		for (i = 0; i < old_alloc; i++) {
   286 			value = old_buckets[i];
   287 			if (value != 0)
   288 				do_insert(ht, value);
   289 		}
   290 		free(old_buckets);
   291 	}
   292 
   293 	value = add_to_string_pool(ht, key);
   294 	do_insert (ht, value);
   295 	ht->bucket_count++;
   296 
   297 	return value;
   298 }
   299 
   300 struct razor_context {
   301 	struct hashtable *global_ht;
   302 };
   303 
   304 struct razor_context *
   305 razor_context_create (void)
   306 {
   307 	struct razor_context *ctx;
   308 
   309 	ctx = malloc(sizeof *ctx);
   310 	ctx->global_ht = hashtable_create();
   311 
   312 	return ctx;
   313 }
   314 
   315 struct razor_context *
   316 razor_context_create_from_file (const char *filename)
   317 {
   318 	struct razor_context *ctx;
   319 
   320 	ctx = malloc(sizeof *ctx);
   321 	ctx->global_ht = hashtable_create_from_file(filename);
   322 
   323 	return ctx;
   324 }
   325 
   326 unsigned long
   327 razor_context_tokenize(struct razor_context *ctx, const char *string)
   328 {
   329 	unsigned long token;
   330 
   331 	token = hashtable_lookup(ctx->global_ht, string);
   332 	if (token != 0)
   333 		return token;
   334 
   335 	return hashtable_insert(ctx->global_ht, string);
   336 }
   337 
   338 struct razor_set {
   339 	struct razor_context *ctx;
   340 };
   341 
   342 struct parsing_context {
   343 	struct razor_context *ctx;
   344 };
   345 
   346 static void
   347 start_element(void *data, const char *name, const char **atts)
   348 {
   349 	struct parsing_context *ctx = data;
   350 	int i;
   351 
   352 	for (i = 0; atts[i]; i += 2)
   353 		razor_context_tokenize(ctx->ctx, atts[i + 1]);
   354 }
   355 
   356 static void
   357 end_element (void *data, const char *name)
   358 {
   359 }
   360 
   361 static char *
   362 sha1_to_hex(const unsigned char *sha1)
   363 {
   364 	static int bufno;
   365 	static char hexbuffer[4][50];
   366 	static const char hex[] = "0123456789abcdef";
   367 	char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
   368 	int i;
   369 
   370 	for (i = 0; i < 20; i++) {
   371 		unsigned int val = *sha1++;
   372 		*buf++ = hex[val >> 4];
   373 		*buf++ = hex[val & 0xf];
   374 	}
   375 	*buf = '\0';
   376 
   377 	return buffer;
   378 }
   379 
   380 static int
   381 razor_context_read_file(struct razor_context *ctx, const char *filename)
   382 {
   383 	SHA_CTX sha1;
   384 	XML_Parser parser;
   385 	struct parsing_context pctx;
   386 	int fd;
   387 	void *p;
   388 	struct stat stat;
   389 	char buf[128];
   390 	unsigned char hash[20];
   391 
   392 	fd = open(filename, O_RDONLY);
   393 	if (fstat(fd, &stat) < 0)
   394 		return -1;
   395 	p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   396 	if (p == MAP_FAILED)
   397 		return -1;
   398 
   399 	parser = XML_ParserCreate(NULL);
   400 	pctx.ctx = ctx;
   401 	XML_SetUserData(parser, &pctx);
   402 	XML_SetElementHandler(parser, start_element, end_element);
   403 	if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) {
   404 		fprintf(stderr,
   405 			"%s at line %d, %s\n",
   406 			XML_ErrorString(XML_GetErrorCode(parser)),
   407 			XML_GetCurrentLineNumber(parser),
   408 			filename);
   409 		return 1;
   410 	}
   411 
   412 	XML_ParserFree(parser);
   413 
   414 	SHA1_Init(&sha1);
   415 	SHA1_Update(&sha1, p, stat.st_size);
   416 	SHA1_Final(hash, &sha1);
   417 
   418 	close(fd);
   419 
   420 	snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash));
   421 	if (write_to_file(buf, p, stat.st_size) < 0)
   422 		return -1;
   423 	munmap(p, stat.st_size);
   424 
   425 	return 0;
   426 }
   427 
   428 int
   429 razor_context_write(struct razor_context *ctx, const char *filename)
   430 {
   431 	return hashtable_write(ctx->global_ht, filename);
   432 }
   433 
   434 void
   435 razor_context_destroy(struct razor_context *ctx)
   436 {
   437 	hashtable_destroy(ctx->global_ht);
   438 	free(ctx);
   439 }
   440 
   441 static int
   442 usage(void)
   443 {
   444 	printf("usage: razor [ import FILES | lookup <key> ]\n");
   445 	exit(1);
   446 }
   447 
   448 static const char repo_filename[] = "system.repo";
   449 
   450 int
   451 main(int argc, char *argv[])
   452 {
   453 	int i;
   454 	struct razor_context *ctx;
   455 	struct stat statbuf;
   456 
   457 	if (argc < 3) {
   458 		usage();
   459 	} else if (strcmp(argv[1], "import") == 0) {
   460 		if (stat("set", &statbuf) && mkdir("set", 0777)) {
   461 			fprintf(stderr, "could not create directory 'set'\n");
   462 			exit(-1);
   463 		}
   464 			
   465 		ctx = razor_context_create();
   466 
   467 		for (i = 2; i < argc; i++) {
   468 			if (razor_context_read_file(ctx, argv[i]) < 0) {
   469 				fprintf(stderr, "failed to import %s\n",
   470 					argv[i]);
   471 				exit(-1);
   472 			}
   473 		}
   474 
   475 		printf("number of buckets: %d\n",
   476 		       ctx->global_ht->bucket_count);
   477 		printf("bucket allocation: %d\n",
   478 		       ctx->global_ht->bucket_alloc);
   479 		printf("pool size: %d\n", ctx->global_ht->pool_size);
   480 		printf("pool allocation: %d\n", ctx->global_ht->pool_alloc);
   481 
   482 		razor_context_write(ctx, repo_filename);
   483 
   484 		razor_context_destroy(ctx);
   485 	} else if (strcmp(argv[1], "lookup") == 0) {
   486 		ctx = razor_context_create_from_file(repo_filename);
   487 		printf("%s is %lu\n", argv[2],
   488 		       hashtable_lookup(ctx->global_ht, argv[2]));
   489 		razor_context_destroy(ctx);
   490 	} else {
   491 		usage();
   492 	}
   493 
   494 	return 0;
   495 }