razor.c
author Kristian H?gsberg <krh@redhat.com>
Mon Sep 03 23:10:25 2007 -0400 (2007-09-03)
changeset 2 08c5b27e1952
child 3 917677cdceb3
permissions -rw-r--r--
Improve import script to also pull version and release fields.
krh@0
     1
#include <stdlib.h>
krh@0
     2
#include <stdio.h>
krh@0
     3
#include <string.h>
krh@0
     4
#include <sys/types.h>
krh@0
     5
#include <sys/stat.h>
krh@0
     6
#include <sys/mman.h>
krh@0
     7
#include <unistd.h>
krh@0
     8
#include <fcntl.h>
krh@0
     9
krh@0
    10
#include <expat.h>
krh@0
    11
#include "sha1.h"
krh@0
    12
krh@0
    13
static int
krh@0
    14
write_to_fd(int fd, void *p, size_t size)
krh@0
    15
{
krh@0
    16
	int rest, len;
krh@0
    17
krh@0
    18
	rest = size;
krh@0
    19
	while (rest > 0) {
krh@0
    20
		len = write(fd, p, rest);
krh@0
    21
		if (len < 0)
krh@0
    22
			return -1;
krh@0
    23
		rest -= len;
krh@0
    24
	}
krh@0
    25
krh@0
    26
	return 0;
krh@0
    27
}
krh@0
    28
krh@0
    29
static int
krh@0
    30
write_to_file(const char *filename, void *p, size_t size)
krh@0
    31
{
krh@0
    32
	int fd, err;
krh@0
    33
krh@0
    34
	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
krh@0
    35
	if (fd < 0)
krh@0
    36
		return -1;
krh@0
    37
	err = write_to_fd(fd, p, size);
krh@0
    38
	close(fd);
krh@0
    39
krh@0
    40
	return err;
krh@0
    41
}
krh@0
    42
krh@0
    43
struct hashtable_header {
krh@0
    44
	unsigned int magic;
krh@0
    45
	unsigned int version;
krh@0
    46
	struct { unsigned int type, offset; } sections[0];
krh@0
    47
};
krh@0
    48
krh@0
    49
#define HASHTABLE_MAGIC 0x7a7a7a7a
krh@0
    50
#define HASHTABLE_VERSION 1
krh@0
    51
#define HASHTABLE_BUCKETS 1
krh@0
    52
#define HASHTABLE_STRINGS 2
krh@0
    53
krh@0
    54
struct hashtable {
krh@0
    55
	unsigned long *buckets;
krh@0
    56
	int bucket_count, bucket_alloc;
krh@0
    57
	char *string_pool;
krh@0
    58
	int pool_size, pool_alloc;
krh@0
    59
	struct hashtable_header *header;
krh@0
    60
};
krh@0
    61
krh@0
    62
static void *
krh@0
    63
zalloc(size_t size)
krh@0
    64
{
krh@0
    65
	void *p;
krh@0
    66
krh@0
    67
	p = malloc(size);
krh@0
    68
	memset(p, 0, size);
krh@0
    69
krh@0
    70
	return p;
krh@0
    71
}
krh@0
    72
krh@0
    73
struct hashtable *
krh@0
    74
hashtable_create(void)
krh@0
    75
{
krh@0
    76
	struct hashtable *ht;
krh@0
    77
krh@0
    78
	ht = zalloc(sizeof *ht);
krh@0
    79
	ht->buckets = zalloc(4096 * sizeof *ht->buckets);
krh@0
    80
	ht->bucket_count = 0;
krh@0
    81
	ht->bucket_alloc = 4096;
krh@0
    82
krh@0
    83
	ht->string_pool = zalloc(4096);
krh@0
    84
	ht->pool_size = 1;
krh@0
    85
	ht->pool_alloc = 4096;
krh@0
    86
krh@0
    87
	return ht;
krh@0
    88
}
krh@0
    89
krh@0
    90
struct hashtable *
krh@0
    91
hashtable_create_from_file(const char *filename)
krh@0
    92
{
krh@0
    93
	struct hashtable *ht;
krh@0
    94
	struct stat stat;
krh@0
    95
	unsigned int size, offset;
krh@0
    96
	int fd, i;
krh@0
    97
krh@0
    98
	ht = zalloc(sizeof *ht);
krh@0
    99
	fd = open(filename, O_RDONLY);
krh@0
   100
	if (fstat(fd, &stat) < 0)
krh@0
   101
		return NULL;
krh@0
   102
	ht->header = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
krh@0
   103
	if (ht->header == MAP_FAILED) {
krh@0
   104
		free(ht);
krh@0
   105
		return NULL;
krh@0
   106
	}
krh@0
   107
krh@0
   108
	for (i = 0; i < ht->header->sections[i].type; i++) {
krh@0
   109
		offset = ht->header->sections[i].offset;
krh@0
   110
		size = ht->header->sections[i + 1].offset - offset;
krh@0
   111
krh@0
   112
		switch (ht->header->sections[i].type) {
krh@0
   113
		case HASHTABLE_BUCKETS:
krh@0
   114
			ht->buckets = (void *) ht->header + offset;
krh@0
   115
			ht->bucket_count = size / sizeof *ht->buckets;
krh@0
   116
			ht->bucket_alloc = ht->bucket_count;
krh@0
   117
			break;
krh@0
   118
		case HASHTABLE_STRINGS:
krh@0
   119
			ht->string_pool = (void *) ht->header + offset;
krh@0
   120
			ht->pool_size = size;
krh@0
   121
			ht->pool_alloc = size;
krh@0
   122
			break;
krh@0
   123
		}
krh@0
   124
	}
krh@0
   125
	close(fd);
krh@0
   126
krh@0
   127
	return ht;
krh@0
   128
}
krh@0
   129
krh@0
   130
void
krh@0
   131
hashtable_destroy(struct hashtable *ht)
krh@0
   132
{
krh@0
   133
	unsigned int size;
krh@0
   134
	int i;
krh@0
   135
krh@0
   136
	if (ht->header) {
krh@0
   137
		for (i = 0; ht->header->sections[i].type; i++)
krh@0
   138
			;
krh@0
   139
		size = ht->header->sections[i].type;
krh@0
   140
		munmap(ht->header, size);
krh@0
   141
	} else {
krh@0
   142
		free(ht->buckets);
krh@0
   143
		free(ht->string_pool);
krh@0
   144
	}
krh@0
   145
krh@0
   146
	free(ht);
krh@0
   147
}
krh@0
   148
krh@0
   149
static int
krh@0
   150
hashtable_write(struct hashtable *ht, const char *filename)
krh@0
   151
{
krh@0
   152
	int fd;
krh@0
   153
	char data[4096];
krh@0
   154
	struct hashtable_header *header = (struct hashtable_header *) data;
krh@0
   155
krh@0
   156
	memset(data, 0, sizeof data);
krh@0
   157
	header->magic = HASHTABLE_MAGIC;
krh@0
   158
	header->version = HASHTABLE_VERSION;
krh@0
   159
krh@0
   160
	header->sections[0].type = HASHTABLE_BUCKETS;
krh@0
   161
	header->sections[0].offset = sizeof data;
krh@0
   162
krh@0
   163
	header->sections[1].type = HASHTABLE_STRINGS;
krh@0
   164
	header->sections[1].offset =
krh@0
   165
		sizeof data + ht->bucket_alloc * sizeof *ht->buckets;
krh@0
   166
krh@0
   167
	header->sections[2].type = 0;
krh@0
   168
	header->sections[2].offset =
krh@0
   169
		header->sections[1].offset + ht->pool_size;
krh@0
   170
krh@0
   171
	fd = open(filename, O_CREAT | O_WRONLY | O_TRUNC, 0666);
krh@0
   172
	if (fd < 0)
krh@0
   173
		return -1;
krh@0
   174
krh@0
   175
	write_to_fd(fd, data, sizeof data);
krh@0
   176
	write_to_fd(fd, ht->buckets, ht->bucket_alloc * sizeof *ht->buckets);
krh@0
   177
	write_to_fd(fd, ht->string_pool, ht->pool_size);
krh@0
   178
krh@0
   179
	return 0;
krh@0
   180
}
krh@0
   181
krh@0
   182
static unsigned int
krh@0
   183
hash_string(const char *key)
krh@0
   184
{
krh@0
   185
	const char *p;
krh@0
   186
	unsigned int hash = 0;
krh@0
   187
krh@0
   188
	for (p = key; *p; p++)
krh@0
   189
		hash = (hash << 2) ^ *p;
krh@0
   190
krh@0
   191
	return hash;
krh@0
   192
}
krh@0
   193
krh@0
   194
unsigned long
krh@0
   195
hashtable_lookup(struct hashtable *ht, const char *key)
krh@0
   196
{
krh@0
   197
	unsigned int start;
krh@0
   198
	unsigned int mask;
krh@0
   199
	unsigned long value;
krh@0
   200
	int i;
krh@0
   201
krh@0
   202
	mask = ht->bucket_alloc - 1;
krh@0
   203
	start = hash_string(key) & mask;
krh@0
   204
	i = start;
krh@0
   205
	do {
krh@0
   206
		value = ht->buckets[i];
krh@0
   207
krh@0
   208
		if (value == 0)
krh@0
   209
			return 0;
krh@0
   210
krh@0
   211
		if (strcmp(key, &ht->string_pool[value]) == 0)
krh@0
   212
			return value;
krh@0
   213
krh@0
   214
		i = (i + 1) & mask;
krh@0
   215
	} while (i != start);
krh@0
   216
krh@0
   217
	return 0;
krh@0
   218
}
krh@0
   219
krh@0
   220
static unsigned long
krh@0
   221
add_to_string_pool(struct hashtable *ht, const char *key)
krh@0
   222
{
krh@0
   223
	int len, alloc;
krh@0
   224
	char *pool;
krh@0
   225
	unsigned long value;
krh@0
   226
krh@0
   227
	len = strlen(key) + 1;
krh@0
   228
	alloc = ht->pool_alloc;
krh@0
   229
	while (alloc < ht->pool_size + len)
krh@0
   230
		alloc *= 2;
krh@0
   231
	if (ht->pool_alloc < alloc) {
krh@0
   232
		pool = realloc(ht->string_pool, alloc);
krh@0
   233
		if (pool == NULL)
krh@0
   234
			return 0;
krh@0
   235
		ht->string_pool = pool;
krh@0
   236
		ht->pool_alloc = alloc;
krh@0
   237
	}
krh@0
   238
krh@0
   239
	memcpy(ht->string_pool + ht->pool_size, key, len);
krh@0
   240
	value = ht->pool_size;
krh@0
   241
	ht->pool_size += len;
krh@0
   242
krh@0
   243
	return value;
krh@0
   244
}
krh@0
   245
krh@0
   246
static void
krh@0
   247
do_insert(struct hashtable *ht, unsigned long value)
krh@0
   248
{
krh@0
   249
	unsigned int mask;
krh@0
   250
	const char *key;
krh@0
   251
	int i, start;
krh@0
   252
krh@0
   253
	key = &ht->string_pool[value];
krh@0
   254
	mask = ht->bucket_alloc - 1;
krh@0
   255
	start = hash_string(key) & mask;
krh@0
   256
	i = start;
krh@0
   257
	do {
krh@0
   258
		if (ht->buckets[i] == 0) {
krh@0
   259
			ht->buckets[i] = value;
krh@0
   260
			break;
krh@0
   261
		}
krh@0
   262
		i = (i + 1) & mask;
krh@0
   263
	} while (i != start);
krh@0
   264
}
krh@0
   265
krh@0
   266
unsigned long
krh@0
   267
hashtable_insert(struct hashtable *ht, const char *key)
krh@0
   268
{
krh@0
   269
	unsigned long value, *buckets, *old_buckets;
krh@0
   270
	int i, alloc, old_alloc;
krh@0
   271
krh@0
   272
	alloc = ht->bucket_alloc;
krh@0
   273
	while (alloc < 4 * ht->bucket_count)
krh@0
   274
		alloc *= 2;
krh@0
   275
krh@0
   276
	if (alloc != ht->bucket_alloc) {
krh@0
   277
		buckets = zalloc(alloc * sizeof *ht->buckets);
krh@0
   278
		if (buckets == NULL)
krh@0
   279
			return 0;
krh@0
   280
		old_buckets = ht->buckets;
krh@0
   281
		ht->buckets = buckets;
krh@0
   282
		old_alloc = ht->bucket_alloc;
krh@0
   283
		ht->bucket_alloc = alloc;
krh@0
   284
		
krh@0
   285
		for (i = 0; i < old_alloc; i++) {
krh@0
   286
			value = old_buckets[i];
krh@0
   287
			if (value != 0)
krh@0
   288
				do_insert(ht, value);
krh@0
   289
		}
krh@0
   290
		free(old_buckets);
krh@0
   291
	}
krh@0
   292
krh@0
   293
	value = add_to_string_pool(ht, key);
krh@0
   294
	do_insert (ht, value);
krh@0
   295
	ht->bucket_count++;
krh@0
   296
krh@0
   297
	return value;
krh@0
   298
}
krh@0
   299
krh@0
   300
struct razor_context {
krh@0
   301
	struct hashtable *global_ht;
krh@0
   302
};
krh@0
   303
krh@0
   304
struct razor_context *
krh@0
   305
razor_context_create (void)
krh@0
   306
{
krh@0
   307
	struct razor_context *ctx;
krh@0
   308
krh@0
   309
	ctx = malloc(sizeof *ctx);
krh@0
   310
	ctx->global_ht = hashtable_create();
krh@0
   311
krh@0
   312
	return ctx;
krh@0
   313
}
krh@0
   314
krh@0
   315
struct razor_context *
krh@0
   316
razor_context_create_from_file (const char *filename)
krh@0
   317
{
krh@0
   318
	struct razor_context *ctx;
krh@0
   319
krh@0
   320
	ctx = malloc(sizeof *ctx);
krh@0
   321
	ctx->global_ht = hashtable_create_from_file(filename);
krh@0
   322
krh@0
   323
	return ctx;
krh@0
   324
}
krh@0
   325
krh@0
   326
unsigned long
krh@0
   327
razor_context_tokenize(struct razor_context *ctx, const char *string)
krh@0
   328
{
krh@0
   329
	unsigned long token;
krh@0
   330
krh@0
   331
	token = hashtable_lookup(ctx->global_ht, string);
krh@0
   332
	if (token != 0)
krh@0
   333
		return token;
krh@0
   334
krh@0
   335
	return hashtable_insert(ctx->global_ht, string);
krh@0
   336
}
krh@0
   337
krh@0
   338
struct razor_set {
krh@0
   339
	struct razor_context *ctx;
krh@0
   340
};
krh@0
   341
krh@0
   342
struct parsing_context {
krh@0
   343
	struct razor_context *ctx;
krh@0
   344
};
krh@0
   345
krh@0
   346
static void
krh@0
   347
start_element(void *data, const char *name, const char **atts)
krh@0
   348
{
krh@0
   349
	struct parsing_context *ctx = data;
krh@0
   350
	int i;
krh@0
   351
krh@0
   352
	for (i = 0; atts[i]; i += 2)
krh@0
   353
		razor_context_tokenize(ctx->ctx, atts[i + 1]);
krh@0
   354
}
krh@0
   355
krh@0
   356
static void
krh@0
   357
end_element (void *data, const char *name)
krh@0
   358
{
krh@0
   359
}
krh@0
   360
krh@0
   361
static char *
krh@0
   362
sha1_to_hex(const unsigned char *sha1)
krh@0
   363
{
krh@0
   364
	static int bufno;
krh@0
   365
	static char hexbuffer[4][50];
krh@0
   366
	static const char hex[] = "0123456789abcdef";
krh@0
   367
	char *buffer = hexbuffer[3 & ++bufno], *buf = buffer;
krh@0
   368
	int i;
krh@0
   369
krh@0
   370
	for (i = 0; i < 20; i++) {
krh@0
   371
		unsigned int val = *sha1++;
krh@0
   372
		*buf++ = hex[val >> 4];
krh@0
   373
		*buf++ = hex[val & 0xf];
krh@0
   374
	}
krh@0
   375
	*buf = '\0';
krh@0
   376
krh@0
   377
	return buffer;
krh@0
   378
}
krh@0
   379
krh@0
   380
static int
krh@0
   381
razor_context_read_file(struct razor_context *ctx, const char *filename)
krh@0
   382
{
krh@0
   383
	SHA_CTX sha1;
krh@0
   384
	XML_Parser parser;
krh@0
   385
	struct parsing_context pctx;
krh@0
   386
	int fd;
krh@0
   387
	void *p;
krh@0
   388
	struct stat stat;
krh@0
   389
	char buf[128];
krh@0
   390
	unsigned char hash[20];
krh@0
   391
krh@0
   392
	fd = open(filename, O_RDONLY);
krh@0
   393
	if (fstat(fd, &stat) < 0)
krh@0
   394
		return -1;
krh@0
   395
	p = mmap(NULL, stat.st_size, PROT_READ, MAP_PRIVATE, fd, 0);
krh@0
   396
	if (p == MAP_FAILED)
krh@0
   397
		return -1;
krh@0
   398
krh@0
   399
	parser = XML_ParserCreate(NULL);
krh@0
   400
	pctx.ctx = ctx;
krh@0
   401
	XML_SetUserData(parser, &pctx);
krh@0
   402
	XML_SetElementHandler(parser, start_element, end_element);
krh@0
   403
	if (XML_Parse(parser, p, stat.st_size, 1) == XML_STATUS_ERROR) {
krh@0
   404
		fprintf(stderr,
krh@0
   405
			"%s at line %d, %s\n",
krh@0
   406
			XML_ErrorString(XML_GetErrorCode(parser)),
krh@0
   407
			XML_GetCurrentLineNumber(parser),
krh@0
   408
			filename);
krh@0
   409
		return 1;
krh@0
   410
	}
krh@0
   411
krh@0
   412
	XML_ParserFree(parser);
krh@0
   413
krh@0
   414
	SHA1_Init(&sha1);
krh@0
   415
	SHA1_Update(&sha1, p, stat.st_size);
krh@0
   416
	SHA1_Final(hash, &sha1);
krh@0
   417
krh@0
   418
	close(fd);
krh@0
   419
krh@0
   420
	snprintf(buf, sizeof buf, "set/%s", sha1_to_hex(hash));
krh@0
   421
	if (write_to_file(buf, p, stat.st_size) < 0)
krh@0
   422
		return -1;
krh@0
   423
	munmap(p, stat.st_size);
krh@0
   424
krh@0
   425
	return 0;
krh@0
   426
}
krh@0
   427
krh@0
   428
int
krh@0
   429
razor_context_write(struct razor_context *ctx, const char *filename)
krh@0
   430
{
krh@0
   431
	return hashtable_write(ctx->global_ht, filename);
krh@0
   432
}
krh@0
   433
krh@0
   434
void
krh@0
   435
razor_context_destroy(struct razor_context *ctx)
krh@0
   436
{
krh@0
   437
	hashtable_destroy(ctx->global_ht);
krh@0
   438
	free(ctx);
krh@0
   439
}
krh@0
   440
krh@0
   441
static int
krh@0
   442
usage(void)
krh@0
   443
{
krh@0
   444
	printf("usage: razor [ import FILES | lookup <key> ]\n");
krh@0
   445
	exit(1);
krh@0
   446
}
krh@0
   447
krh@0
   448
static const char repo_filename[] = "system.repo";
krh@0
   449
krh@0
   450
int
krh@0
   451
main(int argc, char *argv[])
krh@0
   452
{
krh@0
   453
	int i;
krh@0
   454
	struct razor_context *ctx;
krh@0
   455
	struct stat statbuf;
krh@0
   456
krh@0
   457
	if (argc < 3) {
krh@0
   458
		usage();
krh@0
   459
	} else if (strcmp(argv[1], "import") == 0) {
krh@0
   460
		if (stat("set", &statbuf) && mkdir("set", 0777)) {
krh@0
   461
			fprintf(stderr, "could not create directory 'set'\n");
krh@0
   462
			exit(-1);
krh@0
   463
		}
krh@0
   464
			
krh@0
   465
		ctx = razor_context_create();
krh@0
   466
krh@0
   467
		for (i = 2; i < argc; i++) {
krh@0
   468
			if (razor_context_read_file(ctx, argv[i]) < 0) {
krh@0
   469
				fprintf(stderr, "failed to import %s\n",
krh@0
   470
					argv[i]);
krh@0
   471
				exit(-1);
krh@0
   472
			}
krh@0
   473
		}
krh@0
   474
krh@0
   475
		printf("number of buckets: %d\n",
krh@0
   476
		       ctx->global_ht->bucket_count);
krh@0
   477
		printf("bucket allocation: %d\n",
krh@0
   478
		       ctx->global_ht->bucket_alloc);
krh@0
   479
		printf("pool size: %d\n", ctx->global_ht->pool_size);
krh@0
   480
		printf("pool allocation: %d\n", ctx->global_ht->pool_alloc);
krh@0
   481
krh@0
   482
		razor_context_write(ctx, repo_filename);
krh@0
   483
krh@0
   484
		razor_context_destroy(ctx);
krh@0
   485
	} else if (strcmp(argv[1], "lookup") == 0) {
krh@0
   486
		ctx = razor_context_create_from_file(repo_filename);
krh@0
   487
		printf("%s is %lu\n", argv[2],
krh@0
   488
		       hashtable_lookup(ctx->global_ht, argv[2]));
krh@0
   489
		razor_context_destroy(ctx);
krh@0
   490
	} else {
krh@0
   491
		usage();
krh@0
   492
	}
krh@0
   493
krh@0
   494
	return 0;
krh@0
   495
}