1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/docs/REPO.txt Sat Jun 28 18:33:15 2008 -0400
1.3 @@ -0,0 +1,172 @@
1.4 +The repo file format / razor_set data structure
1.5 +-----------------------------------------------
1.6 +
1.7 +The repo starts with a header, containing some number of sections,
1.8 +terminated by a section with type 0:
1.9 +
1.10 + struct razor_set_header {
1.11 + uint32_t magic;
1.12 + uint32_t version;
1.13 + struct razor_set_section sections[0];
1.14 + };
1.15 +
1.16 + struct razor_set_section {
1.17 + uint32_t type;
1.18 + uint32_t offset;
1.19 + uint32_t size;
1.20 + };
1.21 +
1.22 +razor_set_open() mmaps the repo file, and creates a struct razor_set:
1.23 +
1.24 + struct razor_set {
1.25 + struct array string_pool;
1.26 + struct array packages;
1.27 + struct array properties;
1.28 + struct array files;
1.29 + struct array package_pool;
1.30 + struct array property_pool;
1.31 + struct array file_pool;
1.32 + struct razor_set_header *header;
1.33 + };
1.34 +
1.35 +by finding the sections with those IDs and creating "struct array"s
1.36 +pointing to the right places in the mmapped data. (This is the only
1.37 +processing needed when reading in the file; everything else is used
1.38 +exactly as-is.)
1.39 +
1.40 +
1.41 +The sections
1.42 +------------
1.43 +
1.44 +RAZOR_STRING_POOL
1.45 +
1.46 + Stores one copy of each string that appears in the repo. (At
1.47 + the moment, this is: package names, package versions, property
1.48 + names, property versions, and (basenames of) filenames.) The
1.49 + strings are arbitrarily-sized, 0-terminated, and not in any
1.50 + particular order (although the empty string always ends up
1.51 + being at offset 0).
1.52 +
1.53 +RAZOR_PACKAGES
1.54 +
1.55 + Array of struct razor_package; one for each package in the
1.56 + set, sorted by name.
1.57 +
1.58 +RAZOR_PROPERTIES
1.59 +
1.60 + Array of struct razor_property; one for each unique property
1.61 + in the set, sorted by type, then name, then relation type (eg,
1.62 + "<" or ">="), then version. (Properties with no version have
1.63 + relation type RAZOR_VERSION_EQUAL, and version "".)
1.64 +
1.65 +RAZOR_FILES
1.66 +
1.67 + Array of struct razor_entry; one for each file owned by any
1.68 + package in the set. The current sort order (which is subject
1.69 + to change) is breadth-first, sorted by basename. So eg: /, /bin,
1.70 + /dev, /etc, /bin/false, /bin/true, /dev/null, /etc/passwd.
1.71 +
1.72 +RAZOR_PACKAGE_POOL
1.73 +
1.74 + Array of struct list, with each list item containing the index
1.75 + of a struct razor_package in the packages section. See the
1.76 + discussion of lists below.
1.77 +
1.78 +RAZOR_PROPERTY_POOL
1.79 +
1.80 + Array of struct list, with each list item containing the index
1.81 + of a struct razor_property in the properties section. See the
1.82 + discussion of lists below.
1.83 +
1.84 +RAZOR_FILE_POOL
1.85 +
1.86 + Array of struct list, with each list item containing the index
1.87 + of a struct razor_entry in the files section. See the
1.88 + discussion of lists below.
1.89 +
1.90 +
1.91 +Data types
1.92 +----------
1.93 +Note that the exact layout of bits involves some historical accidents.
1.94 +(Particularly the fact that the "name" field in most structs loses its
1.95 +high bits to a flags field.)
1.96 +
1.97 +struct list_head
1.98 + uint list_ptr : 24;
1.99 + uint flags : 8;
1.100 +
1.101 +struct list
1.102 + uint data : 24;
1.103 + uint flags : 8;
1.104 +
1.105 + Used to store lists of package, property, or file IDs. "struct
1.106 + list_head" stores the head of the list, which points to one or
1.107 + more "struct list"s in the appropriate "pool" section.
1.108 + ("struct list" should probably be called "struct list_item".)
1.109 +
1.110 + "list_first(&head, &pool)" returns a "struct list *" pointing
1.111 + to the first element of the list (or NULL for an empty list),
1.112 + and "list_next(list)" will return successive elements, until
1.113 + NULL is returned. Each "list->data" contains the index of a
1.114 + package, property, or file in the corresponding section of the
1.115 + set.
1.116 +
1.117 + Peeking underneath the abstraction, a list_head's "flags" is
1.118 + 0xff if the list is empty, 0x80 if it contains a single
1.119 + element, or 0x00 if it contains more than one element. In the
1.120 + single-element case, that element is actually stored in the
1.121 + list_head directly rather than being stored in a pool (and so
1.122 + list_first() just casts the list_head* to a list* and returns
1.123 + it). For multi-element lists, list_ptr is the index in the
1.124 + pool of the first element of this list; the list continues
1.125 + through successive elements of the pool until one with
1.126 + non-zero flags is reached, indicating the end of the list.
1.127 +
1.128 +struct razor_package
1.129 + uint name : 24;
1.130 + uint flags : 8;
1.131 + uint version : 32;
1.132 + struct list_head properties;
1.133 + struct list_head files;
1.134 +
1.135 + name and version are indexes into string_pool. properties is a
1.136 + list of all of the package's properties, and files is a list
1.137 + of its files. flags is currently only used during razor_set
1.138 + merging, to keep track of which set a package came from.
1.139 +
1.140 +struct razor_property
1.141 + uint name : 24;
1.142 + uint flags : 6;
1.143 + uint type : 2;
1.144 + uint relation : 32;
1.145 + uint version : 32;
1.146 + struct list_head packages;
1.147 +
1.148 + name and version are indexes into string_pool. type is an enum
1.149 + razor_property_type (eg, RAZOR_PROPERTY_REQUIRES), and
1.150 + relation is an enum razor_version_relation (eg,
1.151 + RAZOR_VERSION_GREATER_OR_EQUAL). packages is a list of the
1.152 + packages that have this property. flags is currently unused.
1.153 +
1.154 +struct razor_entry
1.155 + uint name : 24;
1.156 + uint flags : 8;
1.157 + uint start : 32;
1.158 + struct list_head packages;
1.159 +
1.160 + name is an index into string_pool, giving the basename of the
1.161 + file. start is either 0, or an index pointing to another
1.162 + razor_entry that is the first child of this entry (for a
1.163 + non-empty directory). (Entry 0 is always the root of the tree,
1.164 + so no entry could have entry 0 as a child.) flags is 0x80
1.165 + (RAZOR_ENTRY_LAST) if an entry is the last entry in its
1.166 + directory. Otherwise it is 0.
1.167 +
1.168 + Note that given a pointer to a struct_razor_entry (eg, from a
1.169 + package's "files" list), there is no way to reconstruct its
1.170 + full name without walking the entire files array up to that
1.171 + point. Because of this and other problems (fix_file_map()), it
1.172 + seems like razor_entry should be modified to include a pointer
1.173 + to its parent. (Storing full paths instead of just basenames
1.174 + would also fix this problem, but that would use much more
1.175 + memory.)