docs/REPO.txt
changeset 311 cd292c2de0b6
parent 310 9a7691262ce6
child 312 068a7429db5d
     1.1 --- a/docs/REPO.txt	Wed Jul 02 18:46:47 2008 +0100
     1.2 +++ /dev/null	Thu Jan 01 00:00:00 1970 +0000
     1.3 @@ -1,172 +0,0 @@
     1.4 -The repo file format / razor_set data structure
     1.5 ------------------------------------------------
     1.6 -
     1.7 -The repo starts with a header, containing some number of sections,
     1.8 -terminated by a section with type 0:
     1.9 -
    1.10 -	struct razor_set_header {
    1.11 -		uint32_t magic;
    1.12 -		uint32_t version;
    1.13 -		struct razor_set_section sections[0];
    1.14 -	};
    1.15 -
    1.16 -	struct razor_set_section {
    1.17 -		uint32_t type;
    1.18 -		uint32_t offset;
    1.19 -		uint32_t size;
    1.20 -	};
    1.21 -
    1.22 -razor_set_open() mmaps the repo file, and creates a struct razor_set:
    1.23 -
    1.24 -	struct razor_set {
    1.25 -		struct array string_pool;
    1.26 -	 	struct array packages;
    1.27 -	 	struct array properties;
    1.28 -	 	struct array files;
    1.29 -		struct array package_pool;
    1.30 -	 	struct array property_pool;
    1.31 -	 	struct array file_pool;
    1.32 -		struct razor_set_header *header;
    1.33 -	};
    1.34 -
    1.35 -by finding the sections with those IDs and creating "struct array"s
    1.36 -pointing to the right places in the mmapped data. (This is the only
    1.37 -processing needed when reading in the file; everything else is used
    1.38 -exactly as-is.)
    1.39 -
    1.40 -
    1.41 -The sections
    1.42 -------------
    1.43 -
    1.44 -RAZOR_STRING_POOL
    1.45 -
    1.46 -	Stores one copy of each string that appears in the repo. (At
    1.47 -	the moment, this is: package names, package versions, property
    1.48 -	names, property versions, and (basenames of) filenames.) The
    1.49 -	strings are arbitrarily-sized, 0-terminated, and not in any
    1.50 -	particular order (although the empty string always ends up
    1.51 -	being at offset 0).
    1.52 -
    1.53 -RAZOR_PACKAGES
    1.54 -
    1.55 -	Array of struct razor_package; one for each package in the
    1.56 -	set, sorted by name.
    1.57 -
    1.58 -RAZOR_PROPERTIES
    1.59 -
    1.60 -	Array of struct razor_property; one for each unique property
    1.61 -	in the set, sorted by type, then name, then relation type (eg,
    1.62 -	"<" or ">="), then version. (Properties with no version have
    1.63 -	relation type RAZOR_VERSION_EQUAL, and version "".)
    1.64 -
    1.65 -RAZOR_FILES
    1.66 -
    1.67 -	Array of struct razor_entry; one for each file owned by any
    1.68 -	package in the set. The current sort order (which is subject
    1.69 -	to change) is breadth-first, sorted by basename. So eg: /, /bin,
    1.70 -	/dev, /etc, /bin/false, /bin/true, /dev/null, /etc/passwd.
    1.71 -
    1.72 -RAZOR_PACKAGE_POOL
    1.73 -
    1.74 -	Array of struct list, with each list item containing the index
    1.75 -	of a struct razor_package in the packages section. See the
    1.76 -	discussion of lists below.
    1.77 -
    1.78 -RAZOR_PROPERTY_POOL
    1.79 -
    1.80 -	Array of struct list, with each list item containing the index
    1.81 -	of a struct razor_property in the properties section. See the
    1.82 -	discussion of lists below.
    1.83 -
    1.84 -RAZOR_FILE_POOL
    1.85 -
    1.86 -	Array of struct list, with each list item containing the index
    1.87 -	of a struct razor_entry in the files section. See the
    1.88 -	discussion of lists below.
    1.89 -
    1.90 -
    1.91 -Data types
    1.92 -----------
    1.93 -Note that the exact layout of bits involves some historical accidents.
    1.94 -(Particularly the fact that the "name" field in most structs loses its
    1.95 -high bits to a flags field.)
    1.96 -
    1.97 -struct list_head
    1.98 -	uint list_ptr : 24;
    1.99 -	uint flags    : 8;
   1.100 -
   1.101 -struct list
   1.102 -	uint data  : 24;
   1.103 -	uint flags : 8;
   1.104 -
   1.105 -	Used to store lists of package, property, or file IDs. "struct
   1.106 -	list_head" stores the head of the list, which points to one or
   1.107 -	more "struct list"s in the appropriate "pool" section.
   1.108 -	("struct list" should probably be called "struct list_item".)
   1.109 -
   1.110 -	"list_first(&head, &pool)" returns a "struct list *" pointing
   1.111 -	to the first element of the list (or NULL for an empty list),
   1.112 -	and "list_next(list)" will return successive elements, until
   1.113 -	NULL is returned. Each "list->data" contains the index of a
   1.114 -	package, property, or file in the corresponding section of the
   1.115 -	set.
   1.116 -
   1.117 -	Peeking underneath the abstraction, a list_head's "flags" is
   1.118 -	0xff if the list is empty, 0x80 if it contains a single
   1.119 -	element, or 0x00 if it contains more than one element. In the
   1.120 -	single-element case, that element is actually stored in the
   1.121 -	list_head directly rather than being stored in a pool (and so
   1.122 -	list_first() just casts the list_head* to a list* and returns
   1.123 -	it). For multi-element lists, list_ptr is the index in the
   1.124 -	pool of the first element of this list; the list continues
   1.125 -	through successive elements of the pool until one with
   1.126 -	non-zero flags is reached, indicating the end of the list.
   1.127 -
   1.128 -struct razor_package
   1.129 -	uint name    : 24;
   1.130 -	uint flags   : 8;
   1.131 -	uint version : 32;
   1.132 -	struct list_head properties;
   1.133 -	struct list_head files;
   1.134 -
   1.135 -	name and version are indexes into string_pool. properties is a
   1.136 -	list of all of the package's properties, and files is a list
   1.137 -	of its files. flags is currently only used during razor_set
   1.138 -	merging, to keep track of which set a package came from.
   1.139 -
   1.140 -struct razor_property
   1.141 -	uint name     : 24;
   1.142 -	uint flags    : 6;
   1.143 -	uint type     : 2;
   1.144 -	uint relation : 32;
   1.145 -	uint version  : 32;
   1.146 -	struct list_head packages;
   1.147 -
   1.148 -	name and version are indexes into string_pool. type is an enum
   1.149 -	razor_property_type (eg, RAZOR_PROPERTY_REQUIRES), and
   1.150 -	relation is an enum razor_version_relation (eg,
   1.151 -	RAZOR_VERSION_GREATER_OR_EQUAL). packages is a list of the
   1.152 -	packages that have this property. flags is currently unused.
   1.153 -
   1.154 -struct razor_entry
   1.155 -	uint name  : 24;
   1.156 -	uint flags : 8;
   1.157 -	uint start : 32;
   1.158 -	struct list_head packages;
   1.159 -
   1.160 -	name is an index into string_pool, giving the basename of the
   1.161 -	file. start is either 0, or an index pointing to another
   1.162 -	razor_entry that is the first child of this entry (for a
   1.163 -	non-empty directory). (Entry 0 is always the root of the tree,
   1.164 -	so no entry could have entry 0 as a child.) flags is 0x80
   1.165 -	(RAZOR_ENTRY_LAST) if an entry is the last entry in its
   1.166 -	directory. Otherwise it is 0.
   1.167 -
   1.168 -	Note that given a pointer to a struct_razor_entry (eg, from a
   1.169 -	package's "files" list), there is no way to reconstruct its
   1.170 -	full name without walking the entire files array up to that
   1.171 -	point. Because of this and other problems (fix_file_map()), it
   1.172 -	seems like razor_entry should be modified to include a pointer
   1.173 -	to its parent. (Storing full paths instead of just basenames
   1.174 -	would also fix this problem, but that would use much more
   1.175 -	memory.)