Implement linear package set merger.
authorKristian H?gsberg <krh@redhat.com>
Sat Sep 29 19:25:38 2007 -0400 (2007-09-29)
changeset 417eea400e19db
parent 40 305cd8657bc8
child 42 aedfc8f95227
Implement linear package set merger.
TODO
razor.c
     1.1 --- a/TODO	Wed Sep 26 00:08:03 2007 -0400
     1.2 +++ b/TODO	Sat Sep 29 19:25:38 2007 -0400
     1.3 @@ -67,3 +67,8 @@
     1.4    to pull eg the latest evince and dependencies from another box.  We
     1.5    should be able to regenerate a rzr pkg from the system so we can
     1.6    reuse the signature from the originating repo.
     1.7 +
     1.8 +- Ok, maybe the fastest package set merge method in the end is to use
     1.9 +  the razor_importer, but use a hash table for the properties.  This
    1.10 +  way we can assign them unique IDs immediately (like tokenizing
    1.11 +  strings).
     2.1 --- a/razor.c	Wed Sep 26 00:08:03 2007 -0400
     2.2 +++ b/razor.c	Sat Sep 29 19:25:38 2007 -0400
     2.3 @@ -79,8 +79,6 @@
     2.4  	struct import_property_context requires;
     2.5  	struct import_property_context provides;
     2.6  	struct razor_package *package;
     2.7 -	unsigned long *requires_map;
     2.8 -	unsigned long *provides_map;
     2.9  };
    2.10  
    2.11  static void
    2.12 @@ -932,92 +930,318 @@
    2.13  	array_release(&unsatisfied);
    2.14  }
    2.15  
    2.16 +#define UPSTREAM_SOURCE 0x80000000ul
    2.17 +#define INDEX_MASK 0x00fffffful
    2.18 +
    2.19 +struct source {
    2.20 +	struct razor_set *set;
    2.21 +	unsigned long *requires_map;
    2.22 +	unsigned long *provides_map;
    2.23 +};
    2.24 +
    2.25 +static void
    2.26 +prepare_source(struct source *source, struct razor_set *set)
    2.27 +{
    2.28 +	int count;
    2.29 +	size_t size;
    2.30 +
    2.31 +	source->set = set;
    2.32 +
    2.33 +	count = set->requires.size / sizeof (struct razor_property);
    2.34 +	size = count * sizeof *source->requires_map;
    2.35 +	source->requires_map = zalloc(size);
    2.36 +
    2.37 +	count = set->provides.size / sizeof (struct razor_property);
    2.38 +	size = count * sizeof *source->provides_map;
    2.39 +	source->provides_map = zalloc(size);
    2.40 +}
    2.41 +
    2.42  static void
    2.43  add_package(struct razor_importer *importer,
    2.44 -	    struct razor_package *package, struct razor_set *set)
    2.45 +	    struct razor_package *package, struct source *source,
    2.46 +	    unsigned long flags)
    2.47  {
    2.48  	char *pool;
    2.49  	unsigned long *r;
    2.50 -	struct razor_property *p, *properties;
    2.51 +	struct razor_package *p;
    2.52  
    2.53 -	pool = set->string_pool.data;
    2.54 -	razor_importer_begin_package(importer,
    2.55 -				     &pool[package->name],
    2.56 -				     &pool[package->version]);
    2.57 +	pool = source->set->string_pool.data;
    2.58 +	p = array_add(&importer->set->packages, sizeof *p);
    2.59 +	p->name = razor_importer_tokenize(importer, &pool[package->name]);
    2.60 +	p->name |= flags;
    2.61 +	p->version = razor_importer_tokenize(importer,
    2.62 +					     &pool[package->version]);
    2.63 +	p->requires = package->requires;
    2.64 +	p->provides = package->provides;
    2.65  
    2.66 -	r = (unsigned long *) set->requires_pool.data + package->requires;
    2.67 -	properties = set->requires.data;
    2.68 -	while (~*r) {
    2.69 -		p = &properties[*r++];
    2.70 -		razor_importer_add_requires(importer,
    2.71 -					    &pool[p->name], &pool[p->version]);
    2.72 +	r = (unsigned long *)
    2.73 +		source->set->requires_pool.data + package->requires;
    2.74 +	while (*r != ~0)
    2.75 +		source->requires_map[*r++] = 1;
    2.76 +
    2.77 +	r = (unsigned long *)
    2.78 +		source->set->provides_pool.data + package->provides;
    2.79 +	while (*r != ~0)
    2.80 +		source->provides_map[*r++] = 1;
    2.81 +}
    2.82 +
    2.83 +
    2.84 +/* Build the new package list sorted by merging the two package lists.
    2.85 + * Build new string pool as we go. (for now we just re-use that part of
    2.86 + * the importer). */
    2.87 +static void
    2.88 +merge_packages(struct razor_importer *importer,
    2.89 +	       struct source *source1, struct source *source2,
    2.90 +	       struct array *packages)
    2.91 +{
    2.92 +	struct razor_package *upstream_packages, *p, *s, *send;
    2.93 +	char *spool, *upool;
    2.94 +	unsigned long *u, *uend;
    2.95 +	int cmp;
    2.96 +
    2.97 +	upstream_packages = source2->set->packages.data;
    2.98 +
    2.99 +	u = packages->data;
   2.100 +	uend = packages->data + packages->size;
   2.101 +	upool = source2->set->string_pool.data;
   2.102 +
   2.103 +	s = source1->set->packages.data;
   2.104 +	send = source1->set->packages.data + source1->set->packages.size;
   2.105 +	spool = source1->set->string_pool.data;
   2.106 +
   2.107 +	while (s < send) {
   2.108 +		p = upstream_packages + *u;
   2.109 +
   2.110 +		if (u < uend)
   2.111 +			cmp = strcmp(&spool[s->name], &upool[p->name]);
   2.112 +		if (u >= uend || cmp < 0) {
   2.113 +			add_package(importer, s, source1, 0);
   2.114 +			s++;
   2.115 +		} else if (cmp == 0) {
   2.116 +			add_package(importer, p, source2, UPSTREAM_SOURCE);
   2.117 +			s++;
   2.118 +			u++;
   2.119 +		} else {
   2.120 +			add_package(importer, p, source2, UPSTREAM_SOURCE);
   2.121 +			u++;
   2.122 +		}
   2.123 +	}
   2.124 +}
   2.125 +
   2.126 +static unsigned long
   2.127 +add_property(struct razor_importer *importer, struct array *properties,
   2.128 +	     const char *name, const char *version)
   2.129 +{
   2.130 +	struct razor_property *p;
   2.131 +
   2.132 +	p = array_add(properties, sizeof *p);
   2.133 +	p->name = razor_importer_tokenize(importer, name);
   2.134 +	p->version = razor_importer_tokenize(importer, version);
   2.135 +
   2.136 +	return p - (struct razor_property *) properties->data;
   2.137 +}
   2.138 +
   2.139 +static void
   2.140 +merge_properties(struct array *properties,
   2.141 +		 struct razor_importer *importer,
   2.142 +		 struct razor_set *set1,
   2.143 +		 struct array *properties1,
   2.144 +		 unsigned long *map1,
   2.145 +		 struct razor_set *set2,
   2.146 +		 struct array *properties2,
   2.147 +		 unsigned long *map2)
   2.148 +{
   2.149 +	struct razor_property *p1, *p2;
   2.150 +	int i, j, cmp, count1, count2;
   2.151 +	char *pool1, *pool2;
   2.152 +
   2.153 +	i = 0;
   2.154 +	j = 0;
   2.155 +	pool1 = set1->string_pool.data;
   2.156 +	pool2 = set2->string_pool.data;
   2.157 +
   2.158 +	count1 = properties1->size / sizeof *p1;
   2.159 +	count2 = properties2->size / sizeof *p2;
   2.160 +	while (i < count1 || j < count2) {
   2.161 +		if (i < count1 && map1[i] == 0) {
   2.162 +			i++;
   2.163 +			continue;
   2.164 +		}
   2.165 +		if (j < count2 && map2[j] == 0) {
   2.166 +			j++;
   2.167 +			continue;
   2.168 +		}
   2.169 +		p1 = (struct razor_property *) properties1->data + i;
   2.170 +		p2 = (struct razor_property *) properties2->data + j;
   2.171 +		if (i < count1 && j < count2)
   2.172 +			cmp = strcmp(&pool1[p1->name], &pool2[p2->name]);
   2.173 +		else if (i < count1)
   2.174 +			cmp = -1;
   2.175 +		else
   2.176 +			cmp = 1;
   2.177 +		if (cmp == 0)
   2.178 +			cmp = versioncmp(&pool1[p1->version],
   2.179 +					 &pool2[p2->version]);
   2.180 +		if (cmp < 0) {
   2.181 +			map1[i++] = add_property(importer,
   2.182 +						 properties,
   2.183 +						 &pool1[p1->name],
   2.184 +						 &pool1[p1->version]);
   2.185 +		} else if (cmp > 0) {
   2.186 +			map2[j++] = add_property(importer,
   2.187 +						 properties,
   2.188 +						 &pool2[p2->name],
   2.189 +						 &pool2[p2->version]);
   2.190 +		} else  {
   2.191 +			map1[i++] = map2[j++] = add_property(importer,
   2.192 +							     properties,
   2.193 +							     &pool1[p1->name],
   2.194 +							     &pool1[p1->version]);
   2.195 +		}
   2.196 +	}
   2.197 +}
   2.198 +
   2.199 +static unsigned long
   2.200 +emit_properties(struct array *source_pool, unsigned long index,
   2.201 +		unsigned long *map, struct array *pool)
   2.202 +{
   2.203 +	unsigned long r, *p, *q;
   2.204 +
   2.205 +	r = pool->size / sizeof *q;
   2.206 +	p = (unsigned long *) source_pool->data + index;
   2.207 +	while (*p != ~0) {
   2.208 +		q = array_add(pool, sizeof *q);
   2.209 +		*q = map[*p++];
   2.210  	}
   2.211  
   2.212 -	r = (unsigned long *) set->provides_pool.data + package->provides;
   2.213 -	properties = set->provides.data;
   2.214 -	while (~*r) {
   2.215 -		p = &properties[*r++];
   2.216 -		razor_importer_add_provides(importer,
   2.217 -					    &pool[p->name], &pool[p->version]);
   2.218 +	q = array_add(pool, sizeof *q);
   2.219 +	*q = ~0;
   2.220 +
   2.221 +	return r;
   2.222 +}
   2.223 +	
   2.224 +/* Rebuild property->packages maps.  We can't just remap these, as a
   2.225 + * property may have lost or gained a number of packages.  Allocate an
   2.226 + * array per property and loop through the packages and add them to
   2.227 + * the arrays for their properties. */
   2.228 +static void
   2.229 +rebuild_package_lists(struct razor_set *set)
   2.230 +{
   2.231 +	int requires_count, provides_count;
   2.232 +	struct array *requires_pkgs, *provides_pkgs, *a;
   2.233 +	struct razor_package *pkg, *pkg_end;
   2.234 +	struct razor_property *prop, *prop_end;
   2.235 +	unsigned long *r, *q, *rpool, *ppool;
   2.236 +
   2.237 +	requires_count = set->requires.size / sizeof (struct razor_property);
   2.238 +	provides_count = set->provides.size / sizeof (struct razor_property);
   2.239 +	requires_pkgs = zalloc(requires_count * sizeof *requires_pkgs);
   2.240 +	provides_pkgs = zalloc(provides_count * sizeof *provides_pkgs);
   2.241 +	pkg_end = set->packages.data + set->packages.size;
   2.242 +	rpool = set->requires_pool.data;
   2.243 +	ppool = set->provides_pool.data;
   2.244 +
   2.245 +	for (pkg = set->packages.data; pkg < pkg_end; pkg++) {
   2.246 +		for (r = &rpool[pkg->requires]; *r != ~0; r++) {
   2.247 +			q = array_add(&requires_pkgs[*r], sizeof *q);
   2.248 +			*q = pkg - (struct razor_package *) set->packages.data;
   2.249 +		}
   2.250 +		for (r = &ppool[pkg->provides]; *r != ~0; r++) {
   2.251 +			q = array_add(&provides_pkgs[*r], sizeof *q);
   2.252 +			*q = pkg - (struct razor_package *) set->packages.data;
   2.253 +		}
   2.254  	}
   2.255  
   2.256 -	razor_importer_finish_package(importer);
   2.257 +	prop_end = set->requires.data + set->requires.size;
   2.258 +	a = requires_pkgs;
   2.259 +	for (prop = set->requires.data; prop < prop_end; prop++, a++) {
   2.260 +		prop->packages = add_to_property_pool(&set->requires_pool, a);
   2.261 +		array_release(a);
   2.262 +	}
   2.263 +	free(requires_pkgs);
   2.264 +
   2.265 +	prop_end = set->provides.data + set->provides.size;
   2.266 +	a = provides_pkgs;
   2.267 +	for (prop = set->provides.data; prop < prop_end; prop++, a++) {
   2.268 +		prop->packages = add_to_property_pool(&set->provides_pool, a);
   2.269 +		array_release(a);
   2.270 +	}
   2.271 +	free(provides_pkgs);
   2.272  }
   2.273  
   2.274  /* Add packages from 'upstream' to 'set'.  The packages to add are
   2.275   * specified by the 'packages' array, which is a sorted list of
   2.276   * package indexes.  Returns a newly allocated package set.  Does not
   2.277 - * enforce validity of the resulting package set. */
   2.278 -
   2.279 -/* FIXME: We can do this in a linear sweep instead of using an
   2.280 - * importer and the sorting that incurs: build the new package list
   2.281 - * sorted, build up a map from package index in old set to package
   2.282 - * index in new set for both sets. ~0 means 'not in new set'.  build
   2.283 - * new string pool as we go, probably just re-use that part of the
   2.284 - * importer.  as we build the package list, fill out a bitvector of
   2.285 - * the properties that are referenced by the pacakges in the new
   2.286 - * set. then do a parallel loop through the properties and emit them
   2.287 - * to the new set and build a map from indices in the old set to
   2.288 - * indices in the new set. then loop through the packages again and
   2.289 - * emit the property lists. */
   2.290 -
   2.291 + * enforce validity of the resulting package set.
   2.292 + *
   2.293 + * This looks more complicated than it is.  An easy way to merge two
   2.294 + * package sets would be to just use a razor_importer, but that
   2.295 + * requires resorting, and is thus O(n log n).  We can do this in a
   2.296 + * linear sweep, but it gets a little more complicated.
   2.297 + */
   2.298  struct razor_set *
   2.299  razor_set_add(struct razor_set *set, struct razor_set *upstream,
   2.300  	      struct array *packages)
   2.301  {
   2.302 +	struct razor_set *result;
   2.303  	struct razor_importer *importer;
   2.304 -	struct razor_package *upstream_packages, *p, *s, *send;
   2.305 -	char *spool, *upool;
   2.306 -	unsigned long *u, *uend;
   2.307 -	int cmp;
   2.308 +	struct razor_package *p, *pend;
   2.309 +	struct source source, upstream_source;
   2.310  
   2.311  	importer = razor_importer_new();
   2.312 -	upstream_packages = upstream->packages.data;
   2.313 -	u = packages->data;
   2.314 -	uend = packages->data + packages->size;
   2.315 -	upool = upstream->string_pool.data;
   2.316 -	s = set->packages.data;
   2.317 -	send = set->packages.data + set->packages.size;
   2.318 -	spool = set->string_pool.data;
   2.319  
   2.320 -	while (s < send) {
   2.321 -		p = upstream_packages + *u;
   2.322 -		if (u < uend)
   2.323 -			cmp = strcmp(&spool[s->name], &upool[p->name]);
   2.324 -		if (u >= uend || cmp < 0) {
   2.325 -			add_package(importer, s, set);
   2.326 -			s++;
   2.327 -		} else if (cmp == 0) {
   2.328 -			add_package(importer, p, upstream);
   2.329 -			s++;
   2.330 -			u++;
   2.331 -		} else {
   2.332 -			add_package(importer, p, upstream);
   2.333 -			u++;
   2.334 -		}
   2.335 +	prepare_source(&upstream_source, upstream);
   2.336 +	prepare_source(&source, set);
   2.337 +
   2.338 +	merge_packages(importer, &source, &upstream_source, packages);
   2.339 +
   2.340 +	/* As we built the package list, we filled out a bitvector of
   2.341 +	 * the properties that are referenced by the packages in the
   2.342 +	 * new set.  Now we do a parallel loop through the properties
   2.343 +	 * and emit those marked in the bit vector to the new set.  In
   2.344 +	 * the process, we update the bit vector to actually map from
   2.345 +	 * indices in the old property list to indices in the new
   2.346 +	 * property list for both sets. */
   2.347 +
   2.348 +	merge_properties(&importer->set->requires, importer,
   2.349 +			 set, &set->requires, source.requires_map,
   2.350 +			 upstream, &upstream->requires,
   2.351 +			 upstream_source.requires_map);
   2.352 +	merge_properties(&importer->set->provides, importer,
   2.353 +			 set, &set->provides, source.provides_map,
   2.354 +			 upstream, &upstream->provides,
   2.355 +			 upstream_source.provides_map);
   2.356 +
   2.357 +	/* Now we loop through the packages again and emit the
   2.358 +	 * property lists, remapped to point to the new properties. */
   2.359 +
   2.360 +	pend = importer->set->packages.data + importer->set->packages.size;
   2.361 +	for (p = importer->set->packages.data; p < pend; p++) {
   2.362 +		struct source *src;
   2.363 +
   2.364 +		if (p->name & UPSTREAM_SOURCE)
   2.365 +			src = &upstream_source;
   2.366 +		else
   2.367 +			src = &source;
   2.368 +
   2.369 +		p->requires = emit_properties(&src->set->requires_pool,
   2.370 +					      p->requires,
   2.371 +					      src->requires_map,
   2.372 +					      &importer->set->requires_pool);
   2.373 +		p->provides = emit_properties(&src->set->provides_pool,
   2.374 +					      p->provides,
   2.375 +					      src->provides_map,
   2.376 +					      &importer->set->provides_pool);
   2.377 +		p->name &= INDEX_MASK;
   2.378  	}
   2.379  
   2.380 -	return razor_importer_finish(importer);
   2.381 +	rebuild_package_lists(importer->set);
   2.382 +
   2.383 +	result = importer->set;
   2.384 +	array_release(&importer->buckets);
   2.385 +	free(importer);
   2.386 +
   2.387 +	return result;
   2.388  }
   2.389  
   2.390  void
   2.391 @@ -1278,6 +1502,19 @@
   2.392  			return 1;
   2.393  		razor_set_list_unsatisfied(set);
   2.394  		razor_set_destroy(set);
   2.395 +	} else if (strcmp(argv[1], "add") == 0) {
   2.396 +		struct array list;
   2.397 +
   2.398 +		set = razor_set_open(repo_filename);
   2.399 +		upstream = razor_set_open(rawhide_repo_filename);
   2.400 +		if (set == NULL || upstream == NULL)
   2.401 +			return 1;
   2.402 +		array_init(&list);
   2.403 +		find_packages(upstream, argc - 2, argv + 2, &list);
   2.404 +		set = razor_set_add(set, upstream, &list);
   2.405 +		razor_set_write(set, "system-updated.repo");
   2.406 +		razor_set_destroy(set);
   2.407 +		printf("wrote system-updated.repo\n");
   2.408  	} else if (strcmp(argv[1], "update") == 0) {
   2.409  		set = razor_set_open(repo_filename);
   2.410  		upstream = razor_set_open(rawhide_repo_filename);