patch-2.3.99-pre8 linux/scripts/cramfs/mkcramfs.c
Next file: linux/Documentation/Changes
Previous file: linux/net/unix/af_unix.c
Back to the patch index
Back to the overall index
- Lines: 134
- Date:
Fri May 12 13:29:19 2000
- Orig file:
v2.3.99-pre7/linux/scripts/cramfs/mkcramfs.c
- Orig date:
Fri Jan 21 18:19:17 2000
diff -u --recursive --new-file v2.3.99-pre7/linux/scripts/cramfs/mkcramfs.c linux/scripts/cramfs/mkcramfs.c
@@ -59,6 +59,9 @@
/* FS data */
void *uncompressed;
+ /* points to other identical file */
+ struct entry *same;
+ unsigned int offset; /* pointer to compressed data in archive */
unsigned int dir_offset; /* Where in the archive is the directory entry? */
/* organization */
@@ -84,7 +87,28 @@
*/
#define MAX_INPUT_NAMELEN 255
-static unsigned int parse_directory(const char *name, struct entry **prev, loff_t *fslen_ub)
+static int find_identical_file(struct entry *orig,struct entry *newfile)
+{
+ if(orig==newfile) return 1;
+ if(!orig) return 0;
+ if(orig->size==newfile->size && orig->uncompressed && !memcmp(orig->uncompressed,newfile->uncompressed,orig->size)) {
+ newfile->same=orig;
+ return 0;
+ }
+ return find_identical_file(orig->child,newfile) ||
+ find_identical_file(orig->next,newfile);
+}
+
+static void eliminate_doubles(struct entry *root,struct entry *orig) {
+ if(orig) {
+ if(orig->size && orig->uncompressed)
+ find_identical_file(root,orig);
+ eliminate_doubles(root,orig->child);
+ eliminate_doubles(root,orig->next);
+ }
+}
+
+static unsigned int parse_directory(struct entry *root_entry, const char *name, struct entry **prev, loff_t *fslen_ub)
{
DIR *dir;
int count = 0, totalsize = 0;
@@ -173,7 +197,7 @@
size = sizeof(struct cramfs_inode) + ((namelen + 3) & ~3);
*fslen_ub += size;
if (S_ISDIR(st.st_mode)) {
- entry->size = parse_directory(path, &entry->child, fslen_ub);
+ entry->size = parse_directory(root_entry, path, &entry->child, fslen_ub);
} else if (S_ISREG(st.st_mode)) {
/* TODO: We ought to open files in do_compress, one
at a time, instead of amassing all these memory
@@ -233,10 +257,14 @@
warn_dev = 1;
}
- if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode))
+ if (S_ISREG(st.st_mode) || S_ISLNK(st.st_mode)) {
/* block pointers & data expansion allowance + data */
- *fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
- + MIN(entry->size + 3, st.st_blocks << 9));
+ if(entry->size)
+ *fslen_ub += ((4+26)*((entry->size - 1) / blksize + 1)
+ + MIN(entry->size + 3, st.st_blocks << 9));
+ else
+ *fslen_ub += MIN(entry->size + 3, st.st_blocks << 9);
+ }
/* Link it into the list */
*prev = entry;
@@ -448,7 +476,7 @@
st_blocks * 512. But if you say that then perhaps
administrative data should also be included in both. */
change = new_size - original_size;
- printf("%5.2f%% (%d bytes)\t%s\n",
+ printf("%6.2f%% (%+d bytes)\t%s\n",
(change * 100) / (double) original_size, change, name);
return curr;
@@ -459,26 +487,23 @@
* Traverse the entry tree, writing data for every item that has
* non-null entry->compressed (i.e. every symlink and non-empty
* regfile).
- *
- * Frees the entry pointers as it goes.
*/
static unsigned int write_data(struct entry *entry, char *base, unsigned int offset)
{
do {
if (entry->uncompressed) {
- set_data_offset(entry, base, offset);
- offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
+ if(entry->same) {
+ set_data_offset(entry, base, entry->same->offset);
+ entry->offset=entry->same->offset;
+ } else {
+ set_data_offset(entry, base, offset);
+ entry->offset=offset;
+ offset = do_compress(base, offset, entry->name, entry->uncompressed, entry->size);
+ }
}
else if (entry->child)
offset = write_data(entry->child, base, offset);
-
- /* Free the old before processing the next. */
- {
- struct entry *tmp = entry;
- entry = entry->next;
- free(tmp->name);
- free(tmp);
- }
+ entry=entry->next;
} while (entry);
return offset;
}
@@ -537,7 +562,7 @@
root_entry->uid = st.st_uid;
root_entry->gid = st.st_gid;
- root_entry->size = parse_directory(argv[1], &root_entry->child, &fslen_ub);
+ root_entry->size = parse_directory(root_entry, argv[1], &root_entry->child, &fslen_ub);
if (fslen_ub > MAXFSLEN) {
fprintf(stderr,
"warning: guestimate of required size (upper bound) is %luMB, but maximum image size is %uMB. We might die prematurely.\n",
@@ -545,6 +570,11 @@
MAXFSLEN >> 20);
fslen_ub = MAXFSLEN;
}
+
+ /* find duplicate files. TODO: uses the most inefficient algorithm
+ possible. */
+ eliminate_doubles(root_entry,root_entry);
+
/* TODO: Why do we use a private/anonymous mapping here
followed by a write below, instead of just a shared mapping
FUNET's LINUX-ADM group, [email protected]
TCL-scripts by Sam Shen (who was at: [email protected])