razor.c
author Kristian H?gsberg <krh@redhat.com>
Mon Sep 03 23:13:19 2007 -0400 (2007-09-03)
changeset 3 917677cdceb3
parent 0 e15eb9ef9c28
child 4 c8e9dbcfaf02
permissions -rw-r--r--
Add a simple package list to the repo.
     1 #include <stdlib.h>
     2 #include <stdio.h>
     3 #include <string.h>
     4 #include <sys/types.h>
     5 #include <sys/stat.h>
     6 #include <sys/mman.h>
     7 #include <unistd.h>
     8 #include <fcntl.h>
     9 
    10 #include <expat.h>
    11 #include "sha1.h"
    12 
    13 static int
    14 write_to_fd(int fd, void *p, size_t size)
    15 {
    16 	int rest, len;
    17 
    18 	rest = size;
    19 	while (rest > 0) {
    20 		len = write(fd, p, rest);
    21 		if (len < 0)
    22 			return -1;
    23 		rest -= len;
    24 	}
    25 
    26 	return 0;
    27 }
    28 
    29 static int
    30 write_to_file(const char *filename, void *p, size_t size)
    31 {
    32 	int fd, err;
    33 
    34 	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
    35 	if (fd < 0)
    36 		return -1;
    37 	err = write_to_fd(fd, p, size);
    38 	close(fd);
    39 
    40 	return err;
    41 }
    42 
    43 struct hashtable_header {
    44 	unsigned int magic;
    45 	unsigned int version;
    46 	struct { unsigned int type, offset; } sections[0];
    47 };
    48 
    49 #define HASHTABLE_MAGIC 0x7a7a7a7a
    50 #define HASHTABLE_VERSION 1
    51 #define HASHTABLE_BUCKETS 1
    52 #define HASHTABLE_STRINGS 2
    53 #define HASHTABLE_PACKAGES 3
    54 
    55 struct package {
    56 	unsigned long name;
    57 	unsigned long version;
    58 };
    59 
    60 struct hashtable {
    61 	unsigned long *buckets;
    62 	int bucket_count, bucket_alloc;
    63 	char *string_pool;
    64 	int pool_size, pool_alloc;
    65 	struct hashtable_header *header;
    66 
    67 	struct package *packages;
    68 	int package_count, package_alloc;
    69 };
    70 
    71 static void *
    72 zalloc(size_t size)
    73 {
    74 	void *p;
    75 
    76 	p = malloc(size);
    77 	memset(p, 0, size);
    78 
    79 	return p;
    80 }
    81 
    82 struct hashtable *
    83 hashtable_create(void)
    84 {
    85 	struct hashtable *ht;
    86 
    87 	ht = zalloc(sizeof *ht);
    88 	ht->buckets = zalloc(4096 * sizeof *ht->buckets);
    89 	ht->bucket_count = 0;
    90 	ht->bucket_alloc = 4096;
    91 
    92 	ht->string_pool = zalloc(4096);
    93 	ht->pool_size = 1;
    94 	ht->pool_alloc = 4096;
    95 
    96 	ht->packages = zalloc(4096 * sizeof *ht->packages);
    97 	ht->package_count = 0;
    98 	ht->package_alloc = 4096;
    99 
   100 	return ht;
   101 }
   102 
   103 struct hashtable *
   104 hashtable_create_from_file(const char *filename)
   105 {
   106 	struct hashtable *ht;
   107 	struct stat stat;
   108 	unsigned int size, offset;
   109 	int fd, i;
   110 
   111 	ht = zalloc(sizeof *ht);
   112 	fd = open(filename, O_RDONLY);
   113 	if (fstat(fd, &stat) < 0)
   114 		return NULL;
   115 	ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   116 	if (ht->header == MAP_FAILED) {
   117 		free(ht);
   118 		return NULL;
   119 	}
   120 
   121 	for (i = 0; i < ht->header->sections[i].type; i++) {
   122 		offset = ht->header->sections[i].offset;
   123 		size = ht->header->sections[i + 1].offset - offset;
   124 
   125 		switch (ht->header->sections[i].type) {
   126 		case HASHTABLE_BUCKETS:
   127 			ht->buckets = (void *) ht->header + offset;
   128 			ht->bucket_count = size / sizeof *ht->buckets;
   129 			ht->bucket_alloc = ht->bucket_count;
   130 			break;
   131 		case HASHTABLE_STRINGS:
   132 			ht->string_pool = (void *) ht->header + offset;
   133 			ht->pool_size = size;
   134 			ht->pool_alloc = size;
   135 			break;
   136 		case HASHTABLE_PACKAGES:
   137 			ht->packages = (void *) ht->header + offset;
   138 			ht->package_count = size / sizeof *ht->packages;
   139 			ht->package_alloc = size / sizeof *ht->packages;
   140 			break;
   141 		}
   142 	}
   143 	close(fd);
   144 
   145 	return ht;
   146 }
   147 
   148 void
   149 hashtable_destroy(struct hashtable *ht)
   150 {
   151 	unsigned int size;
   152 	int i;
   153 
   154 	if (ht->header) {
   155 		for (i = 0; ht->header->sections[i].type; i++)
   156 			;
   157 		size = ht->header->sections[i].type;
   158 		munmap(ht->header, size);
   159 	} else {
   160 		free(ht->buckets);
   161 		free(ht->string_pool);
   162 	}
   163 
   164 	free(ht);
   165 }
   166 
   167 static int
   168 hashtable_write(struct hashtable *ht, const char *filename)
   169 {
   170 	char data[4096];
   171 	struct hashtable_header *header = (struct hashtable_header *) data;
   172 	int fd, pool_size, package_size;
   173 
   174 	/* Align these to pages sizes */
   175 	pool_size = (ht->pool_size + 4095) & ~4095;
   176 	package_size =
   177 		(ht->package_alloc * sizeof *ht->packages + 4095) & ~4095;
   178 
   179 	memset(data, 0, sizeof data);
   180 	header->magic = HASHTABLE_MAGIC;
   181 	header->version = HASHTABLE_VERSION;
   182 
   183 	header->sections[0].type = HASHTABLE_BUCKETS;
   184 	header->sections[0].offset = sizeof data;
   185 
   186 	header->sections[1].type = HASHTABLE_STRINGS;
   187 	header->sections[1].offset = header->sections[0].offset +
   188 		ht->bucket_alloc * sizeof *ht->buckets;
   189 
   190 	header->sections[2].type = HASHTABLE_PACKAGES;
   191 	header->sections[2].offset = header->sections[1].offset + pool_size;
   192 
   193 	header->sections[3].type = 0;
   194 	header->sections[3].offset = header->sections[2].offset + package_size;
   195 
   196 	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
   197 	if (fd < 0)
   198 		return -1;
   199 
   200 	write_to_fd(fd, data, sizeof data);
   201 	write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets);
   202 	write_to_fd(fd, ht->string_pool, pool_size);
   203 	write_to_fd(fd, ht->packages, package_size);
   204 
   205 	return 0;
   206 }
   207 
   208 static unsigned int
   209 hash_string(const char *key)
   210 {
   211 	const char *p;
   212 	unsigned int hash = 0;
   213 
   214 	for (p = key; *p; p++)
   215 		hash = (hash << 2) ^ *p;
   216 
   217 	return hash;
   218 }
   219 
   220 unsigned long
   221 hashtable_lookup(struct hashtable *ht, const char *key)
   222 {
   223 	unsigned int start;
   224 	unsigned int mask;
   225 	unsigned long value;
   226 	int i;
   227 
   228 	mask = ht->bucket_alloc - 1;
   229 	start = hash_string(key) & mask;
   230 	i = start;
   231 	do {
   232 		value = ht->buckets[i];
   233 
   234 		if (value == 0)
   235 			return 0;
   236 
   237 		if (strcmp(key, &ht->string_pool[value]) == 0)
   238 			return value;
   239 
   240 		i = (i + 1) & mask;
   241 	} while (i != start);
   242 
   243 	return 0;
   244 }
   245 
   246 static unsigned long
   247 add_to_string_pool(struct hashtable *ht, const char *key)
   248 {
   249 	int len, alloc;
   250 	char *pool;
   251 	unsigned long value;
   252 
   253 	len = strlen(key) + 1;
   254 	alloc = ht->pool_alloc;
   255 	while (alloc < ht->pool_size + len)
   256 		alloc *= 2;
   257 	if (ht->pool_alloc < alloc) {
   258 		pool = realloc(ht->string_pool, alloc);
   259 		if (pool == NULL)
   260 			return 0;
   261 		ht->string_pool = pool;
   262 		ht->pool_alloc = alloc;
   263 	}
   264 
   265 	memcpy(ht->string_pool + ht->pool_size, key, len);
   266 	value = ht->pool_size;
   267 	ht->pool_size += len;
   268 
   269 	return value;
   270 }
   271 
   272 static void
   273 do_insert(struct hashtable *ht, unsigned long value)
   274 {
   275 	unsigned int mask;
   276 	const char *key;
   277 	int i, start;
   278 
   279 	key = &ht->string_pool[value];
   280 	mask = ht->bucket_alloc - 1;
   281 	start = hash_string(key) & mask;
   282 	i = start;
   283 	do {
   284 		if (ht->buckets[i] == 0) {
   285 			ht->buckets[i] = value;
   286 			break;
   287 		}
   288 		i = (i + 1) & mask;
   289 	} while (i != start);
   290 }
   291 
   292 unsigned long
   293 hashtable_insert(struct hashtable *ht, const char *key)
   294 {
   295 	unsigned long value, *buckets, *old_buckets;
   296 	int i, alloc, old_alloc;
   297 
   298 	alloc = ht->bucket_alloc;
   299 	while (alloc < 4 * ht->bucket_count)
   300 		alloc *= 2;
   301 
   302 	if (alloc != ht->bucket_alloc) {
   303 		buckets = zalloc(alloc * sizeof *ht->buckets);
   304 		if (buckets == NULL)
   305 			return 0;
   306 		old_buckets = ht->buckets;
   307 		ht->buckets = buckets;
   308 		old_alloc = ht->bucket_alloc;
   309 		ht->bucket_alloc = alloc;
   310 		
   311 		for (i = 0; i < old_alloc; i++) {
   312 			value = old_buckets[i];
   313 			if (value != 0)
   314 				do_insert(ht, value);
   315 		}
   316 		free(old_buckets);
   317 	}
   318 
   319 	value = add_to_string_pool(ht, key);
   320 	do_insert (ht, value);
   321 	ht->bucket_count++;
   322 
   323 	return value;
   324 }
   325 
   326 static unsigned long
   327 hashtable_add_package(struct hashtable *ht,
   328 		      unsigned long name, unsigned long version)
   329 {
   330 	struct package *packages;
   331 	int alloc;
   332 
   333 	alloc = ht->package_alloc;
   334 	while (alloc < ht->package_count + 1)
   335 		alloc *= 2;
   336 	if (ht->package_alloc < alloc) {
   337 		packages = realloc(ht->packages, alloc * sizeof ht->packages);
   338 		if (packages == NULL)
   339 			return 0;
   340 		ht->packages = packages;
   341 		ht->package_alloc = alloc;
   342 	}
   343 
   344 	ht->packages[ht->package_count].name = name;
   345 	ht->packages[ht->package_count].version = version;
   346 	ht->package_count++;
   347 
   348 	return 0;
   349 }
   350 
   351 
   352 struct razor_context {
   353 	struct hashtable *global_ht;
   354 };
   355 
   356 struct razor_context *
   357 razor_context_create (void)
   358 {
   359 	struct razor_context *ctx;
   360 
   361 	ctx = malloc(sizeof *ctx);
   362 	ctx->global_ht = hashtable_create();
   363 
   364 	return ctx;
   365 }
   366 
   367 struct razor_context *
   368 razor_context_create_from_file (const char *filename)
   369 {
   370 	struct razor_context *ctx;
   371 
   372 	ctx = malloc(sizeof *ctx);
   373 	ctx->global_ht = hashtable_create_from_file(filename);
   374 
   375 	return ctx;
   376 }
   377 
   378 unsigned long
   379 razor_context_tokenize(struct razor_context *ctx, const char *string)
   380 {
   381 	unsigned long token;
   382 
   383 	token = hashtable_lookup(ctx->global_ht, string);
   384 	if (token != 0)
   385 		return token;
   386 
   387 	return hashtable_insert(ctx->global_ht, string);
   388 }
   389 
   390 static struct hashtable *qsort_ht;
   391 
   392 static int
   393 compare_packages(const void *p1, const void *p2)
   394 {
   395 	const struct package *pkg1 = p1, *pkg2 = p2;
   396 
   397 	return strcmp(&qsort_ht->string_pool[pkg1->name],
   398 		      &qsort_ht->string_pool[pkg2->name]);
   399 }
   400 
   401 static void
   402 razor_context_sort(struct razor_context *ctx)
   403 {
   404 	struct hashtable *ht = ctx->global_ht;
   405 
   406 	qsort_ht = ht;
   407 	qsort(ht->packages, ht->package_count, sizeof *ht->packages,
   408 	      compare_packages);
   409 }
   410 
   411 struct razor_set {
   412 	struct razor_context *ctx;
   413 };
   414 
   415 struct parsing_context {
   416 	struct razor_context *ctx;
   417 };
   418 
   419 static void
   420 parse_package(struct parsing_context *ctx, const char **atts)
   421 {
   422 	unsigned long name, version;
   423 	int i;
   424 
   425 	for (i = 0; atts[i]; i += 2) {
   426 		if (strcmp(atts[i], "name") == 0)
   427 			name = razor_context_tokenize(ctx->ctx, atts[i + 1]);
   428 		else if (strcmp(atts[i], "version") == 0)
   429 			version = razor_context_tokenize(ctx->ctx, atts[i + 1]);
   430 	}
   431 
   432 	if (name == 0 || version == 0) {
   433 		fprintf(stderr, "invalid package tag, "
   434 			"missing name or version attributes\n");
   435 		return;
   436 	}
   437 
   438 	hashtable_add_package(ctx->ctx->global_ht, name, version);
   439 }
   440 
   441 static void
   442 start_element(void *data, const char *name, const char **atts)
   443 {
   444 	struct parsing_context *ctx = data;
   445 	int i;
   446 
   447 	if (strcmp(name, "package") == 0)
   448 		parse_package(ctx, atts);
   449 
   450 	for (i = 0; atts[i]; i += 2)
   451 		razor_context_tokenize(ctx->ctx, atts[i + 1]);
   452 }
   453 
   454 static void
   455 end_element (void *data, const char *name)
   456 {
   457 }
   458 
   459 static char *
   460 sha1_to_hex(const unsigned char *sha1)
   461 {
   462 	static int bufno;
   463 	static char hexbuffer[4][50];
   464 	static const char hex[] = "0123456789abcdef";
   465 	char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
   466 	int i;
   467 
   468 	for (i = 0; i < 20; i++) {
   469 		unsigned int val = *sha1++;
   470 		*buf++ = hex[val >> 4];
   471 		*buf++ = hex[val & 0xf];
   472 	}
   473 	*buf = '\0';
   474 
   475 	return buffer;
   476 }
   477 
   478 static int
   479 razor_context_read_file(struct razor_context *ctx, const char *filename)
   480 {
   481 	SHA_CTX sha1;
   482 	XML_Parser parser;
   483 	struct parsing_context pctx;
   484 	int fd;
   485 	void *p;
   486 	struct stat stat;
   487 	char buf[128];
   488 	unsigned char hash[20];
   489 
   490 	fd = open(filename, O_RDONLY);
   491 	if (fstat(fd, &stat) < 0)
   492 		return -1;
   493 	p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
   494 	if (p == MAP_FAILED)
   495 		return -1;
   496 
   497 	parser = XML_ParserCreate(NULL);
   498 	pctx.ctx = ctx;
   499 	XML_SetUserData(parser, &pctx);
   500 	XML_SetElementHandler(parser, start_element, end_element);
   501 	if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) {
   502 		fprintf(stderr,
   503 			"%s at line %d, %s\n",
   504 			XML_ErrorString(XML_GetErrorCode(parser)),
   505 			XML_GetCurrentLineNumber(parser),
   506 			filename);
   507 		return 1;
   508 	}
   509 
   510 	XML_ParserFree(parser);
   511 
   512 	SHA1_Init(&sha1);
   513 	SHA1_Update(&sha1, p, stat.st_size);
   514 	SHA1_Final(hash, &sha1);
   515 
   516 	close(fd);
   517 
   518 	snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash));
   519 	if (write_to_file(buf, p, stat.st_size) < 0)
   520 		return -1;
   521 	munmap(p, stat.st_size);
   522 
   523 	return 0;
   524 }
   525 
   526 int
   527 razor_context_write(struct razor_context *ctx, const char *filename)
   528 {
   529 	return hashtable_write(ctx->global_ht, filename);
   530 }
   531 
   532 void
   533 razor_context_list_packages(struct razor_context *ctx)
   534 {
   535 	int i;
   536 	struct hashtable *ht = ctx->global_ht;
   537 	struct package *p;
   538 
   539 	p = ht->packages;
   540 	for (i = 0; i < ht->package_count && p->name; i++, p++) {
   541 		printf("%s %s\n",
   542 		       &ht->string_pool[p->name],
   543 		       &ht->string_pool[p->version]);
   544 	}
   545 }
   546 
   547 void
   548 razor_context_info(struct razor_context *ctx)
   549 {
   550 	struct hashtable *ht = ctx->global_ht;
   551 	unsigned int offset, size;
   552 	int i;
   553 
   554 	for (i = 0; i < ht->header->sections[i].type; i++) {
   555 		offset = ht->header->sections[i].offset;
   556 		size = ht->header->sections[i + 1].offset - offset;
   557 
   558 		switch (ht->header->sections[i].type) {
   559 		case HASHTABLE_BUCKETS:
   560 			printf("bucket section:\t\t%dkb\n", size / 1024);
   561 			break;
   562 		case HASHTABLE_STRINGS:
   563 			printf("string pool:\t\t%dkb\n", size / 1024);
   564 			break;
   565 		case HASHTABLE_PACKAGES:
   566 			printf("package section:\t%dkb\n", size / 1024);
   567 			break;
   568 		}
   569 	}
   570 
   571 }
   572 
   573 void
   574 razor_context_destroy(struct razor_context *ctx)
   575 {
   576 	hashtable_destroy(ctx->global_ht);
   577 	free(ctx);
   578 }
   579 
   580 static int
   581 usage(void)
   582 {
   583 	printf("usage: razor [ import FILES | lookup <key> | list | info ]\n");
   584 	exit(1);
   585 }
   586 
   587 static const char repo_filename[] = "system.repo";
   588 
   589 int
   590 main(int argc, char *argv[])
   591 {
   592 	int i;
   593 	struct razor_context *ctx;
   594 	struct stat statbuf;
   595 
   596 	if (argc < 2) {
   597 		usage();
   598 	} else if (strcmp(argv[1], "import") == 0) {
   599 		if (stat("set", &statbuf) && mkdir("set", 0777)) {
   600 			fprintf(stderr, "could not create directory 'set'\n");
   601 			exit(-1);
   602 		}
   603 			
   604 		ctx = razor_context_create();
   605 
   606 		for (i = 2; i < argc; i++) {
   607 			if (razor_context_read_file(ctx, argv[i]) < 0) {
   608 				fprintf(stderr, "failed to import %s\n",
   609 					argv[i]);
   610 				exit(-1);
   611 			}
   612 		}
   613 
   614 		razor_context_sort(ctx);
   615 
   616 		printf("number of buckets: %d\n",
   617 		       ctx->global_ht->bucket_count);
   618 		printf("bucket allocation: %d\n",
   619 		       ctx->global_ht->bucket_alloc);
   620 		printf("pool size: %d\n", ctx->global_ht->pool_size);
   621 		printf("pool allocation: %d\n", ctx->global_ht->pool_alloc);
   622 
   623 		razor_context_write(ctx, repo_filename);
   624 
   625 		razor_context_destroy(ctx);
   626 	} else if (strcmp(argv[1], "lookup") == 0) {
   627 		ctx = razor_context_create_from_file(repo_filename);
   628 		printf("%s is %lu\n", argv[2],
   629 		       hashtable_lookup(ctx->global_ht, argv[2]));
   630 		razor_context_destroy(ctx);
   631 	} else if (strcmp(argv[1], "list") == 0) {
   632 		ctx = razor_context_create_from_file(repo_filename);
   633 		razor_context_list_packages(ctx);
   634 		razor_context_destroy(ctx);
   635 	} else if (strcmp(argv[1], "info") == 0) {
   636 		ctx = razor_context_create_from_file(repo_filename);
   637 		razor_context_info(ctx);
   638 		razor_context_destroy(ctx);
   639 	} else {
   640 		usage();
   641 	}
   642 
   643 	return 0;
   644 }