Implement linear package set merger.
1.1 --- a/TODO Wed Sep 26 00:08:03 2007 -0400
1.2 +++ b/TODO Sat Sep 29 19:25:38 2007 -0400
1.3 @@ -67,3 +67,8 @@
1.4 to pull eg the latest evince and dependencies from another box. We
1.5 should be able to regenerate a rzr pkg from the system so we can
1.6 reuse the signature from the originating repo.
1.7 +
1.8 +- Ok, maybe the fastest package set merge method in the end is to use
1.9 + the razor_importer, but use a hash table for the properties. This
1.10 + way we can assign them unique IDs immediately (like tokenizing
1.11 + strings).
2.1 --- a/razor.c Wed Sep 26 00:08:03 2007 -0400
2.2 +++ b/razor.c Sat Sep 29 19:25:38 2007 -0400
2.3 @@ -79,8 +79,6 @@
2.4 struct import_property_context requires;
2.5 struct import_property_context provides;
2.6 struct razor_package *package;
2.7 - unsigned long *requires_map;
2.8 - unsigned long *provides_map;
2.9 };
2.10
2.11 static void
2.12 @@ -932,92 +930,318 @@
2.13 array_release(&unsatisfied);
2.14 }
2.15
2.16 +#define UPSTREAM_SOURCE 0x80000000ul
2.17 +#define INDEX_MASK 0x00fffffful
2.18 +
2.19 +struct source {
2.20 + struct razor_set *set;
2.21 + unsigned long *requires_map;
2.22 + unsigned long *provides_map;
2.23 +};
2.24 +
2.25 +static void
2.26 +prepare_source(struct source *source, struct razor_set *set)
2.27 +{
2.28 + int count;
2.29 + size_t size;
2.30 +
2.31 + source->set = set;
2.32 +
2.33 + count = set->requires.size / sizeof (struct razor_property);
2.34 + size = count * sizeof *source->requires_map;
2.35 + source->requires_map = zalloc(size);
2.36 +
2.37 + count = set->provides.size / sizeof (struct razor_property);
2.38 + size = count * sizeof *source->provides_map;
2.39 + source->provides_map = zalloc(size);
2.40 +}
2.41 +
2.42 static void
2.43 add_package(struct razor_importer *importer,
2.44 - struct razor_package *package, struct razor_set *set)
2.45 + struct razor_package *package, struct source *source,
2.46 + unsigned long flags)
2.47 {
2.48 char *pool;
2.49 unsigned long *r;
2.50 - struct razor_property *p, *properties;
2.51 + struct razor_package *p;
2.52
2.53 - pool = set->string_pool.data;
2.54 - razor_importer_begin_package(importer,
2.55 - &pool[package->name],
2.56 - &pool[package->version]);
2.57 + pool = source->set->string_pool.data;
2.58 + p = array_add(&importer->set->packages, sizeof *p);
2.59 + p->name = razor_importer_tokenize(importer, &pool[package->name]);
2.60 + p->name |= flags;
2.61 + p->version = razor_importer_tokenize(importer,
2.62 + &pool[package->version]);
2.63 + p->requires = package->requires;
2.64 + p->provides = package->provides;
2.65
2.66 - r = (unsigned long *) set->requires_pool.data + package->requires;
2.67 - properties = set->requires.data;
2.68 - while (~*r) {
2.69 - p = &properties[*r++];
2.70 - razor_importer_add_requires(importer,
2.71 - &pool[p->name], &pool[p->version]);
2.72 + r = (unsigned long *)
2.73 + source->set->requires_pool.data + package->requires;
2.74 + while (*r != ~0)
2.75 + source->requires_map[*r++] = 1;
2.76 +
2.77 + r = (unsigned long *)
2.78 + source->set->provides_pool.data + package->provides;
2.79 + while (*r != ~0)
2.80 + source->provides_map[*r++] = 1;
2.81 +}
2.82 +
2.83 +
2.84 +/* Build the new package list sorted by merging the two package lists.
2.85 + * Build new string pool as we go. (for now we just re-use that part of
2.86 + * the importer). */
2.87 +static void
2.88 +merge_packages(struct razor_importer *importer,
2.89 + struct source *source1, struct source *source2,
2.90 + struct array *packages)
2.91 +{
2.92 + struct razor_package *upstream_packages, *p, *s, *send;
2.93 + char *spool, *upool;
2.94 + unsigned long *u, *uend;
2.95 + int cmp;
2.96 +
2.97 + upstream_packages = source2->set->packages.data;
2.98 +
2.99 + u = packages->data;
2.100 + uend = packages->data + packages->size;
2.101 + upool = source2->set->string_pool.data;
2.102 +
2.103 + s = source1->set->packages.data;
2.104 + send = source1->set->packages.data + source1->set->packages.size;
2.105 + spool = source1->set->string_pool.data;
2.106 +
2.107 + while (s < send) {
2.108 + p = upstream_packages + *u;
2.109 +
2.110 + if (u < uend)
2.111 + cmp = strcmp(&spool[s->name], &upool[p->name]);
2.112 + if (u >= uend || cmp < 0) {
2.113 + add_package(importer, s, source1, 0);
2.114 + s++;
2.115 + } else if (cmp == 0) {
2.116 + add_package(importer, p, source2, UPSTREAM_SOURCE);
2.117 + s++;
2.118 + u++;
2.119 + } else {
2.120 + add_package(importer, p, source2, UPSTREAM_SOURCE);
2.121 + u++;
2.122 + }
2.123 + }
2.124 +}
2.125 +
2.126 +static unsigned long
2.127 +add_property(struct razor_importer *importer, struct array *properties,
2.128 + const char *name, const char *version)
2.129 +{
2.130 + struct razor_property *p;
2.131 +
2.132 + p = array_add(properties, sizeof *p);
2.133 + p->name = razor_importer_tokenize(importer, name);
2.134 + p->version = razor_importer_tokenize(importer, version);
2.135 +
2.136 + return p - (struct razor_property *) properties->data;
2.137 +}
2.138 +
2.139 +static void
2.140 +merge_properties(struct array *properties,
2.141 + struct razor_importer *importer,
2.142 + struct razor_set *set1,
2.143 + struct array *properties1,
2.144 + unsigned long *map1,
2.145 + struct razor_set *set2,
2.146 + struct array *properties2,
2.147 + unsigned long *map2)
2.148 +{
2.149 + struct razor_property *p1, *p2;
2.150 + int i, j, cmp, count1, count2;
2.151 + char *pool1, *pool2;
2.152 +
2.153 + i = 0;
2.154 + j = 0;
2.155 + pool1 = set1->string_pool.data;
2.156 + pool2 = set2->string_pool.data;
2.157 +
2.158 + count1 = properties1->size / sizeof *p1;
2.159 + count2 = properties2->size / sizeof *p2;
2.160 + while (i < count1 || j < count2) {
2.161 + if (i < count1 && map1[i] == 0) {
2.162 + i++;
2.163 + continue;
2.164 + }
2.165 + if (j < count2 && map2[j] == 0) {
2.166 + j++;
2.167 + continue;
2.168 + }
2.169 + p1 = (struct razor_property *) properties1->data + i;
2.170 + p2 = (struct razor_property *) properties2->data + j;
2.171 + if (i < count1 && j < count2)
2.172 + cmp = strcmp(&pool1[p1->name], &pool2[p2->name]);
2.173 + else if (i < count1)
2.174 + cmp = -1;
2.175 + else
2.176 + cmp = 1;
2.177 + if (cmp == 0)
2.178 + cmp = versioncmp(&pool1[p1->version],
2.179 + &pool2[p2->version]);
2.180 + if (cmp < 0) {
2.181 + map1[i++] = add_property(importer,
2.182 + properties,
2.183 + &pool1[p1->name],
2.184 + &pool1[p1->version]);
2.185 + } else if (cmp > 0) {
2.186 + map2[j++] = add_property(importer,
2.187 + properties,
2.188 + &pool2[p2->name],
2.189 + &pool2[p2->version]);
2.190 + } else {
2.191 + map1[i++] = map2[j++] = add_property(importer,
2.192 + properties,
2.193 + &pool1[p1->name],
2.194 + &pool1[p1->version]);
2.195 + }
2.196 + }
2.197 +}
2.198 +
2.199 +static unsigned long
2.200 +emit_properties(struct array *source_pool, unsigned long index,
2.201 + unsigned long *map, struct array *pool)
2.202 +{
2.203 + unsigned long r, *p, *q;
2.204 +
2.205 + r = pool->size / sizeof *q;
2.206 + p = (unsigned long *) source_pool->data + index;
2.207 + while (*p != ~0) {
2.208 + q = array_add(pool, sizeof *q);
2.209 + *q = map[*p++];
2.210 }
2.211
2.212 - r = (unsigned long *) set->provides_pool.data + package->provides;
2.213 - properties = set->provides.data;
2.214 - while (~*r) {
2.215 - p = &properties[*r++];
2.216 - razor_importer_add_provides(importer,
2.217 - &pool[p->name], &pool[p->version]);
2.218 + q = array_add(pool, sizeof *q);
2.219 + *q = ~0;
2.220 +
2.221 + return r;
2.222 +}
2.223 +
2.224 +/* Rebuild property->packages maps. We can't just remap these, as a
2.225 + * property may have lost or gained a number of packages. Allocate an
2.226 + * array per property and loop through the packages and add them to
2.227 + * the arrays for their properties. */
2.228 +static void
2.229 +rebuild_package_lists(struct razor_set *set)
2.230 +{
2.231 + int requires_count, provides_count;
2.232 + struct array *requires_pkgs, *provides_pkgs, *a;
2.233 + struct razor_package *pkg, *pkg_end;
2.234 + struct razor_property *prop, *prop_end;
2.235 + unsigned long *r, *q, *rpool, *ppool;
2.236 +
2.237 + requires_count = set->requires.size / sizeof (struct razor_property);
2.238 + provides_count = set->provides.size / sizeof (struct razor_property);
2.239 + requires_pkgs = zalloc(requires_count * sizeof *requires_pkgs);
2.240 + provides_pkgs = zalloc(provides_count * sizeof *provides_pkgs);
2.241 + pkg_end = set->packages.data + set->packages.size;
2.242 + rpool = set->requires_pool.data;
2.243 + ppool = set->provides_pool.data;
2.244 +
2.245 + for (pkg = set->packages.data; pkg < pkg_end; pkg++) {
2.246 + for (r = &rpool[pkg->requires]; *r != ~0; r++) {
2.247 + q = array_add(&requires_pkgs[*r], sizeof *q);
2.248 + *q = pkg - (struct razor_package *) set->packages.data;
2.249 + }
2.250 + for (r = &ppool[pkg->provides]; *r != ~0; r++) {
2.251 + q = array_add(&provides_pkgs[*r], sizeof *q);
2.252 + *q = pkg - (struct razor_package *) set->packages.data;
2.253 + }
2.254 }
2.255
2.256 - razor_importer_finish_package(importer);
2.257 + prop_end = set->requires.data + set->requires.size;
2.258 + a = requires_pkgs;
2.259 + for (prop = set->requires.data; prop < prop_end; prop++, a++) {
2.260 + prop->packages = add_to_property_pool(&set->requires_pool, a);
2.261 + array_release(a);
2.262 + }
2.263 + free(requires_pkgs);
2.264 +
2.265 + prop_end = set->provides.data + set->provides.size;
2.266 + a = provides_pkgs;
2.267 + for (prop = set->provides.data; prop < prop_end; prop++, a++) {
2.268 + prop->packages = add_to_property_pool(&set->provides_pool, a);
2.269 + array_release(a);
2.270 + }
2.271 + free(provides_pkgs);
2.272 }
2.273
2.274 /* Add packages from 'upstream' to 'set'. The packages to add are
2.275 * specified by the 'packages' array, which is a sorted list of
2.276 * package indexes. Returns a newly allocated package set. Does not
2.277 - * enforce validity of the resulting package set. */
2.278 -
2.279 -/* FIXME: We can do this in a linear sweep instead of using an
2.280 - * importer and the sorting that incurs: build the new package list
2.281 - * sorted, build up a map from package index in old set to package
2.282 - * index in new set for both sets. ~0 means 'not in new set'. build
2.283 - * new string pool as we go, probably just re-use that part of the
2.284 - * importer. as we build the package list, fill out a bitvector of
2.285 - * the properties that are referenced by the pacakges in the new
2.286 - * set. then do a parallel loop through the properties and emit them
2.287 - * to the new set and build a map from indices in the old set to
2.288 - * indices in the new set. then loop through the packages again and
2.289 - * emit the property lists. */
2.290 -
2.291 + * enforce validity of the resulting package set.
2.292 + *
2.293 + * This looks more complicated than it is. An easy way to merge two
2.294 + * package sets would be to just use a razor_importer, but that
2.295 + * requires resorting, and is thus O(n log n). We can do this in a
2.296 + * linear sweep, but it gets a little more complicated.
2.297 + */
2.298 struct razor_set *
2.299 razor_set_add(struct razor_set *set, struct razor_set *upstream,
2.300 struct array *packages)
2.301 {
2.302 + struct razor_set *result;
2.303 struct razor_importer *importer;
2.304 - struct razor_package *upstream_packages, *p, *s, *send;
2.305 - char *spool, *upool;
2.306 - unsigned long *u, *uend;
2.307 - int cmp;
2.308 + struct razor_package *p, *pend;
2.309 + struct source source, upstream_source;
2.310
2.311 importer = razor_importer_new();
2.312 - upstream_packages = upstream->packages.data;
2.313 - u = packages->data;
2.314 - uend = packages->data + packages->size;
2.315 - upool = upstream->string_pool.data;
2.316 - s = set->packages.data;
2.317 - send = set->packages.data + set->packages.size;
2.318 - spool = set->string_pool.data;
2.319
2.320 - while (s < send) {
2.321 - p = upstream_packages + *u;
2.322 - if (u < uend)
2.323 - cmp = strcmp(&spool[s->name], &upool[p->name]);
2.324 - if (u >= uend || cmp < 0) {
2.325 - add_package(importer, s, set);
2.326 - s++;
2.327 - } else if (cmp == 0) {
2.328 - add_package(importer, p, upstream);
2.329 - s++;
2.330 - u++;
2.331 - } else {
2.332 - add_package(importer, p, upstream);
2.333 - u++;
2.334 - }
2.335 + prepare_source(&upstream_source, upstream);
2.336 + prepare_source(&source, set);
2.337 +
2.338 + merge_packages(importer, &source, &upstream_source, packages);
2.339 +
2.340 + /* As we built the package list, we filled out a bitvector of
2.341 + * the properties that are referenced by the packages in the
2.342 + * new set. Now we do a parallel loop through the properties
2.343 + * and emit those marked in the bit vector to the new set. In
2.344 + * the process, we update the bit vector to actually map from
2.345 + * indices in the old property list to indices in the new
2.346 + * property list for both sets. */
2.347 +
2.348 + merge_properties(&importer->set->requires, importer,
2.349 + set, &set->requires, source.requires_map,
2.350 + upstream, &upstream->requires,
2.351 + upstream_source.requires_map);
2.352 + merge_properties(&importer->set->provides, importer,
2.353 + set, &set->provides, source.provides_map,
2.354 + upstream, &upstream->provides,
2.355 + upstream_source.provides_map);
2.356 +
2.357 + /* Now we loop through the packages again and emit the
2.358 + * property lists, remapped to point to the new properties. */
2.359 +
2.360 + pend = importer->set->packages.data + importer->set->packages.size;
2.361 + for (p = importer->set->packages.data; p < pend; p++) {
2.362 + struct source *src;
2.363 +
2.364 + if (p->name & UPSTREAM_SOURCE)
2.365 + src = &upstream_source;
2.366 + else
2.367 + src = &source;
2.368 +
2.369 + p->requires = emit_properties(&src->set->requires_pool,
2.370 + p->requires,
2.371 + src->requires_map,
2.372 + &importer->set->requires_pool);
2.373 + p->provides = emit_properties(&src->set->provides_pool,
2.374 + p->provides,
2.375 + src->provides_map,
2.376 + &importer->set->provides_pool);
2.377 + p->name &= INDEX_MASK;
2.378 }
2.379
2.380 - return razor_importer_finish(importer);
2.381 + rebuild_package_lists(importer->set);
2.382 +
2.383 + result = importer->set;
2.384 + array_release(&importer->buckets);
2.385 + free(importer);
2.386 +
2.387 + return result;
2.388 }
2.389
2.390 void
2.391 @@ -1278,6 +1502,19 @@
2.392 return 1;
2.393 razor_set_list_unsatisfied(set);
2.394 razor_set_destroy(set);
2.395 + } else if (strcmp(argv[1], "add") == 0) {
2.396 + struct array list;
2.397 +
2.398 + set = razor_set_open(repo_filename);
2.399 + upstream = razor_set_open(rawhide_repo_filename);
2.400 + if (set == NULL || upstream == NULL)
2.401 + return 1;
2.402 + array_init(&list);
2.403 + find_packages(upstream, argc - 2, argv + 2, &list);
2.404 + set = razor_set_add(set, upstream, &list);
2.405 + razor_set_write(set, "system-updated.repo");
2.406 + razor_set_destroy(set);
2.407 + printf("wrote system-updated.repo\n");
2.408 } else if (strcmp(argv[1], "update") == 0) {
2.409 set = razor_set_open(repo_filename);
2.410 upstream = razor_set_open(rawhide_repo_filename);