Date: Mon, 13 Jan 2003 20:28:03 +0600 (NOVT) From: Alexey Dokuchaev <danfe@regency.nsu.ru> To: FreeBSD-gnats-submit@FreeBSD.org Subject: ports/47018: Teach ftp/wget new very useful feature - remove old files and dirs not present on server Message-ID: <200301131428.h0DES3LL070373@regency.nsu.ru>
next in thread | raw e-mail | index | archive | help
>Number: 47018 >Category: ports >Synopsis: Teach ftp/wget new very useful feature - remove old files and dirs not present on server >Confidential: no >Severity: non-critical >Priority: low >Responsible: freebsd-ports >State: open >Quarter: >Keywords: >Date-Required: >Class: change-request >Submitter-Id: current-users >Arrival-Date: Mon Jan 13 06:30:17 PST 2003 >Closed-Date: >Last-Modified: >Originator: Alexey Dokuchaev >Release: FreeBSD 4.7-STABLE i386 >Organization: CNIT NSU >Environment: System: FreeBSD regency.nsu.ru 4.7-STABLE FreeBSD 4.7-STABLE #0: Thu Dec 5 19:45:35 NOVT 2002 root@regency.nsu.ru:/usr/src/sys/compile/REGENCY i386 >Description: Since I was unable to find in 1.8.2 (the latest, to my knowledge) version of Wget a very useful feature of removing files, symlinks and directories not any longer present on server while mirroring FTP sites, I implemented it myself. It might be of use to someone else, so I've submitted it to Wget team for review, and it's very likely that it will be included in the next release of Wget. For now, applying this patch teaches 1.8.2 version of Wget to accept -J (--remove-old) option to do the job. This patch essentially makes wget(1) useful for mirroring large FTP sites (such as ftp.FreeBSD.org). Reviewed by: fjoe >How-To-Repeat: N/A >Fix: Just put this patch in files/ directory of ftp/wget. It applies cleanly along with all the other patches in there. <patch> diff -ur src/ftp.c src/ftp.c --- src/ftp.c Sat May 18 10:05:16 2002 +++ src/ftp.c Tue Dec 24 10:03:28 2002 @@ -41,6 +41,7 @@ # include <unistd.h> #endif #include <sys/types.h> +#include <sys/param.h> #include <assert.h> #include <errno.h> @@ -1237,6 +1238,9 @@ static struct fileinfo *delelement PARAMS ((struct fileinfo *, struct fileinfo **)); static void freefileinfo PARAMS ((struct fileinfo *f)); +#ifndef WINDOWS +static void ftp_remove_missing (char *, struct fileinfo *); +#endif /* WINDOWS */ /* Retrieve a list of files given in struct fileinfo linked list. If a file is a symbolic link, do not retrieve it, but rather try to @@ -1282,6 +1286,12 @@ err = RETROK; /* in case it's not used */ +#ifndef WINDOWS + if (opt.remove_old) + /* Remove left-overs first. */ + ftp_remove_missing (url_filename (u), f); +#endif /* WINDOWS */ + while (f) { char *old_target, *ofile; @@ -1757,3 +1767,221 @@ f = next; } } + +#ifndef WINDOWS +/* Synchronize local and remote directory contents (i.e. remove files, + directories, and symlinks missing on server). We do this by obtaining + a list of local files first, sorting both linked lists, comparing + them, and removing local entries missing in server's file list. + + This is required for coherent FTP mirroring. + + Sorting is needed since we are not sure that both server and local + directory traversing will use the same sorting rules (hidden files + position in the list, case sensitivity, etc. Without sorting at all, + comparing two file lists would take O(n^2) time, instead of O(n). + + We could first check whether file lists are sorted in the same + manner, and if they are, do not attempt to further sort them, but + this seems kinda cumbersome and thus omitted. If someone feels like + doing this, be my guest. + + Implemented by Alexey Dokuchaev (danfe@regency.nsu.ru). */ + +static struct fileinfo *getlocallist (const char *, int *); +int comparefileinfo (const void *, const void *); +static void unlinkmissing (const char *, const char *); + +static void +ftp_remove_missing (char *tgt, struct fileinfo *r) +{ + struct fileinfo **loc, **rmt, *l, *o; + int n = 1, m, i, j; + + /* #### There probably is a better way of getting the number of list + elements, so I don't have to traverse the list. I just didn't bother + to find it. ;-) */ + while (r->next) + { + n++; + r = r->next; + } + rmt = (struct fileinfo **)xmalloc (n * sizeof (struct fileinfo *)); + for (i = n; i;) + { + rmt[--i] = r; + r = r->prev; + } + + qsort (rmt, n, sizeof (struct fileinfo *), comparefileinfo); + + j = strlen (tgt); + while (j && tgt[--j] != '/'); + if (!j) + tgt[j++] = '.'; + tgt[j] = '\0'; + + if (!(o = l = getlocallist (tgt, &m))) + { + logprintf (LOG_NOTQUIET, _("Failed to clean up `%s'.\n"), tgt); + xfree (rmt); + return; + } + else if (o == (struct fileinfo *)-1) + { + logprintf (LOG_VERBOSE, _("Directory `%s' is empty, nothing to clean up.\n"), tgt); + xfree (rmt); + return; + } + + loc = (struct fileinfo **)xmalloc (m * sizeof (struct fileinfo *)); + while (i < m) + { + loc[i++] = l; + l = l->next; + } + + qsort (loc, m, sizeof (struct fileinfo *), comparefileinfo); + + for (i = j = 0; i < n && j < m;) + { + int q = strcmp (rmt[i]->name, loc[j]->name); + + if (q > 0) + unlinkmissing (tgt, loc[j++]->name); + else + { + i++; + if (!q) + j++; + } + } + + /* If any files are left locally after comparing remote and local + lists, remove them anyway. */ + while (j < m) + unlinkmissing (tgt, loc[j++]->name); + + freefileinfo (o); + xfree (rmt); + xfree (loc); +} + +/* Obtain linked list of local directory contents. */ +static struct fileinfo * +getlocallist (const char *dir, int *n) +{ + DIR *d; + struct dirent *dp; + struct fileinfo *cur, *prev = NULL, *orig = (struct fileinfo *)-1; + + if (!(d = opendir (dir))) + return NULL; + + for (*n = 0; (dp = readdir (d));) + { + /* #### Should check for DT_DIR || DT_REG || DT_LNK here. Also, + might consider removing redundant strcmp()ing. */ + if (strcmp (dp->d_name, ".") && strcmp (dp->d_name, "..")) + { + cur = (struct fileinfo *)xmalloc (sizeof (struct fileinfo)); + cur->type = dp->d_type; + cur->name = (char *)xmalloc (dp->d_namlen + 1); + memcpy (cur->name, dp->d_name, dp->d_namlen + 1); + cur->linkto = NULL; /* for freefileinfo() */ + cur->prev = prev; + cur->next = NULL; + if (prev) + prev->next = cur; + prev = cur; + if (orig == (struct fileinfo *)-1) + orig = cur; + (*n)++; + } + } + closedir (d); + return orig; /* don't forget to freefileinfo() it! */ +} + +static int removedir (const char *); + +static void +unlinkmissing (const char *path, const char *nm) +{ + struct stat sb; + char *fp = xmalloc (strlen (path) + strlen (nm) + 1); + + strcpy (fp, path); strcat (fp, "/"); strcat (fp, nm); + + if (!stat (fp, &sb)) + { + if (!S_ISDIR (sb.st_mode)) + { + if (unlink (fp)) + logprintf (LOG_NOTQUIET, _("Could not remove stale file `%s': %s.\n"), nm, + strerror (errno)); + else + logprintf (LOG_VERBOSE, _("Removed stale file `%s'.\n"), nm); + } + else + { + if (removedir (fp)) + logprintf (LOG_NOTQUIET, _("Could not remove stale directory `%s': %s.\n"), + nm, strerror (errno)); + else + logprintf (LOG_VERBOSE, _("Removed stale directory `%s'.\n"), nm); + } + } + else if (!lstat (fp, &sb)) + { + if (unlink (fp)) + logprintf (LOG_NOTQUIET, _("Could not remove stale symlink `%s': %s.\n"), nm, + strerror (errno)); + else + logprintf (LOG_VERBOSE, _("Removed stale symlink `%s'.\n"), nm); + } + xfree (fp); +} + +static int +removedir (const char *path) +{ + DIR *d; + struct dirent *dp; + char *fp, *p; + int r = 0; + + if (!(d = opendir (path))) + return 1; + + fp = xmalloc (strlen (path) + MAXPATHLEN); + strcpy (fp, path); strcat (fp, "/"); + p = fp + strlen (fp); + + while ((dp = readdir (d)) != NULL) + { + if (strcmp (dp->d_name, ".") && strcmp (dp->d_name, "..")) + { + strcat (fp, dp->d_name); + + if (dp->d_type != DT_DIR) + r |= unlink (fp); + else + r |= removedir (fp); + + *p = '\0'; + } + } + + xfree (fp); + closedir (d); + r |= rmdir (path); + return r; +} + +int +comparefileinfo (const void *a, const void *b) +{ + return strcmp((*(struct fileinfo **)a)->name, (*(struct fileinfo **)b)->name); +} +#endif /* WINDOWS */ diff -ur src/ftp.h src/ftp.h --- src/ftp.h Sun May 19 10:04:53 2002 +++ src/ftp.h Fri Dec 20 21:53:31 2002 @@ -30,6 +30,9 @@ #ifndef FTP_H #define FTP_H +/* Need it for enum ftype. */ +#include <dirent.h> + /* Need it for struct rbuf. */ #include "rbuf.h" @@ -61,10 +64,10 @@ /* File types. */ enum ftype { - FT_PLAINFILE, - FT_DIRECTORY, - FT_SYMLINK, - FT_UNKNOWN + FT_PLAINFILE = DT_REG, + FT_DIRECTORY = DT_DIR, + FT_SYMLINK = DT_LNK, + FT_UNKNOWN = DT_UNKNOWN }; diff -ur src/init.c src/init.c --- src/init.c Sat May 18 10:05:19 2002 +++ src/init.c Fri Dec 20 22:01:12 2002 @@ -180,6 +180,9 @@ { "reject", &opt.rejects, cmd_vector }, { "relativeonly", &opt.relative_only, cmd_boolean }, { "removelisting", &opt.remove_listing, cmd_boolean }, +#ifndef WINDOWS + { "removeold", &opt.remove_old, cmd_boolean }, +#endif /* WINDOWS */ { "retrsymlinks", &opt.retr_symlinks, cmd_boolean }, { "robots", &opt.use_robots, cmd_boolean }, { "savecookies", &opt.cookies_output, cmd_file }, @@ -266,6 +269,10 @@ opt.dot_bytes = 1024; opt.dot_spacing = 10; opt.dots_in_line = 50; + +#ifndef WINDOWS + opt.remove_old = 0; +#endif /* WINDOWS */ } /* Return the user's home directory (strdup-ed), or NULL if none is diff -ur src/main.c src/main.c --- src/main.c Sat May 18 10:05:19 2002 +++ src/main.c Fri Dec 20 22:26:27 2002 @@ -222,6 +222,7 @@ -k, --convert-links convert non-relative links to relative.\n\ -K, --backup-converted before converting file X, back up as X.orig.\n\ -m, --mirror shortcut option equivalent to -r -N -l inf -nr.\n\ + -J, --remove-old remove files and directories not present on server.\n\ -p, --page-requisites get all images, etc. needed to display HTML page.\n\ \n"), stdout); fputs (_("\ @@ -280,6 +281,9 @@ { "random-wait", no_argument, NULL, 165 }, { "recursive", no_argument, NULL, 'r' }, { "relative", no_argument, NULL, 'L' }, +#ifndef WINDOWS + { "remove-old", no_argument, NULL, 'J' }, +#endif /* WINDOWS */ { "retr-symlinks", no_argument, NULL, 137 }, { "save-headers", no_argument, NULL, 's' }, { "server-response", no_argument, NULL, 'S' }, @@ -363,7 +367,7 @@ that the options with required arguments must be followed by a ':'. -- Dan Harkless <wget@harkless.org>] */ while ((c = getopt_long (argc, argv, "\ -hpVqvdkKsxmNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:", +hpVqvdkKsxmJNWrHSLcFbEY:G:g:T:U:O:l:n:i:o:a:t:D:A:R:P:B:e:Q:X:I:w:C:", long_options, (int *)0)) != EOF) { switch (c) @@ -437,6 +441,11 @@ case 'F': setval ("forcehtml", "on"); break; +#ifndef WINDOWS + case 'J': + setval ("removeold", "on"); + break; +#endif /* WINDOWS */ case 'H': setval ("spanhosts", "on"); break; diff -ur src/options.h src/options.h --- src/options.h Sat May 18 10:05:20 2002 +++ src/options.h Fri Dec 20 22:09:49 2002 @@ -146,6 +146,10 @@ locally? */ int remove_listing; /* Do we remove .listing files generated by FTP? */ +#ifndef WINDOWS + int remove_old; /* Do we remove files and directories + not present on FTP server? */ +#endif /* WINDOWS */ int htmlify; /* Do we HTML-ify the OS-dependent listings? */ </patch> >Release-Note: >Audit-Trail: >Unformatted: To Unsubscribe: send mail to majordomo@FreeBSD.org with "unsubscribe freebsd-ports" in the body of the message
Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?200301131428.h0DES3LL070373>