Skip site navigation (1)Skip section navigation (2)
Date:      Mon, 19 Nov 2001 14:40:29 +0200
From:      Alexey Zelkin <phantom@FreeBSD.ORG>
To:        Martin Horcicka <horcicka@FreeBSD.cz>
Cc:        Hiroki Sato <hrs@eos.ocn.ne.jp>, freebsd-doc@FreeBSD.ORG, nik@FreeBSD.ORG, saken@hotel.rmta.org
Subject:   Re: Why TIDY can never work correctly with ISO-8859-2 and others
Message-ID:  <20011119144029.A17854@ark.cris.net>
In-Reply-To: <20011119104031.B88374-100000@dual.ms.mff.cuni.cz>; from horcicka@FreeBSD.cz on Mon, Nov 19, 2001 at 10:45:08AM %2B0100
References:  <20011115215244.A7285@ark.cris.net> <20011119104031.B88374-100000@dual.ms.mff.cuni.cz>

next in thread | previous in thread | raw e-mail | index | archive | help
hi,

On Mon, Nov 19, 2001 at 10:45:08AM +0100, Martin Horcicka wrote:

> > Attached patch does a job. At least my simple tests were passed successfully.
> > I just added new option '-preserve' to tidy. This option disables
> > translation of characters entities to characters before processing.
> > As "side effect" we have all entities saved correctly in output file.
> >
> > I would like to have feedback on this one. At least for Russian Doc Project
> > it should do a good job and I'd like to see it commited.
> 
> Great! And could it be possible to add another option to turn off the
> interpretation of .tidyrc in home directory?

There's no simple way to turn off .tidyrc because it's unconditionally
preloaded before processing any command line option. As simple workaround
to get rid of "Can't open '/home/<username>/.tidyrc'" message I just
added check to existence of ~/.tidyrc and supress file open error output 
in case of missing ~/.tidyrc. Patch attached.

Comments ?

diff -u work/tidy4aug00/config.c work.0/tidy4aug00/config.c
--- work/tidy4aug00/config.c	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/config.c	Mon Nov 19 14:42:14 2001
@@ -94,6 +94,7 @@
 Bool TidyMark = yes;        /* add meta element indicating tidied doc */
 Bool Emacs = no;            /* if true format error output for GNU Emacs */
 Bool LiteralAttribs = no;   /* if true attributes may use newlines */
+Bool PreserveEntities = no; /* if true don't convert entities to chars */
 
 typedef struct _lex PLex;
 
@@ -186,6 +187,7 @@
     {"doctype",         {(int *)&doctype_str},      ParseDocType},
     {"fix-backslash",   {(int *)&FixBackslash},     ParseBool},
     {"gnu-emacs",       {(int *)&Emacs},            ParseBool},
+    {"preserve-entities", {(int *)&PreserveEntities}, ParseBool},
 
   /* this must be the final entry */
     {0,          0,             0}
@@ -423,7 +425,10 @@
     /* open the file and parse its contents */
 
     if ((fin = fopen(fname, "r")) == null)
-        FileError(stderr, fname);
+    {
+        if (FileExists(fname))		/* quiet file open error on */
+            FileError(stderr, fname);   /* non-existent file */
+    }
     else
     {
         config_text = null;
@@ -533,6 +538,12 @@
     {
         QuoteAmpersand = yes;
         HideEndTags = no;
+    }
+
+ /* Avoid &amp;copy; in preserve-entities case */
+    if (PreserveEntities)
+    {
+       QuoteAmpersand = no;
     }
 }
 
diff -u work/tidy4aug00/html.h work.0/tidy4aug00/html.h
--- work/tidy4aug00/html.h	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/html.h	Mon Nov 19 14:36:54 2001
@@ -380,6 +380,7 @@
 
 void FatalError(char *msg);
 void FileError(FILE *fp, const char *file);
+int FileExists(const char *file);
 
 Node *GetToken(Lexer *lexer, uint mode);
 
@@ -758,6 +759,7 @@
 extern Bool Word2000;
 extern Bool Emacs;  /* sasdjb 01May00 GNU Emacs error output format */
 extern Bool LiteralAttribs;
+extern Bool PreserveEntities;
 
 /* Parser methods for tags */
 
diff -u work/tidy4aug00/lexer.c work.0/tidy4aug00/lexer.c
--- work/tidy4aug00/lexer.c	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/lexer.c	Thu Nov 15 21:44:03 2001
@@ -1517,8 +1517,10 @@
 
                     continue;
                 }
-                else if (c == '&' && mode != IgnoreMarkup)
-                    ParseEntity(lexer, mode);
+                else if (c == '&' && mode != IgnoreMarkup
+				&& !PreserveEntities) {
+               		ParseEntity(lexer, mode);
+		}
 
                 /* this is needed to avoid trimming trailing whitespace */
                 if (mode == IgnoreWhitespace)
@@ -2624,7 +2626,7 @@
                 seen_gt = yes;
         }
 
-        if (c == '&')
+        if (c == '&')	/* XXX: possibly need support for PreserveEntities */
         {
             AddCharToLexer(lexer, c);
             ParseEntity(lexer, null);
diff -u work/tidy4aug00/localize.c work.0/tidy4aug00/localize.c
--- work/tidy4aug00/localize.c	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/localize.c	Mon Nov 19 14:39:38 2001
@@ -8,6 +8,9 @@
   to localize HTML tidy.
 */
 
+#include <sys/types.h>
+#include <sys/stat.h>
+
 #include "platform.h"
 #include "html.h"
 
@@ -50,6 +53,16 @@
     tidy_out(fp, "Can't open \"%s\"\n", file);
 }
 
+int FileExists(const char *file)
+{
+    struct stat st;
+
+    if (stat(file, &st) < 0)
+        return (0);
+    else
+        return (1);
+}
+
 static void ReportTag(Lexer *lexer, Node *tag)
 {
     if (tag)
@@ -736,6 +749,7 @@
     tidy_out(out, "  -xml            use this when input is wellformed xml\n");
     tidy_out(out, "  -asxml          to convert html to wellformed xml\n");
     tidy_out(out, "  -slides         to burst into slides on h2 elements\n");
+    tidy_out(out, "  -preserve       to preserve entities as is in source file\n");
     tidy_out(out, "\n");
 
     tidy_out(out, "Character encodings\n");
diff -u work/tidy4aug00/man_page.txt work.0/tidy4aug00/man_page.txt
--- work/tidy4aug00/man_page.txt	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/man_page.txt	Thu Nov 15 21:54:05 2001
@@ -12,6 +12,7 @@
 .IR column ]
 .RB [ -upper ]
 .RB [ -clean ]
+.RB [ -preserve ]
 .RB [ -raw
 |
 .B -ascii
@@ -106,6 +107,9 @@
 .TP
 .B -slides
 Burst into slides on <H2> elements.
+.TP
+.B -preserve
+Preserve source file entities as is.
 .TP
 .BR -help ", " -h
 List command-line options.
diff -u work/tidy4aug00/tidy.c work.0/tidy4aug00/tidy.c
--- work/tidy4aug00/tidy.c	Fri Aug  4 19:21:05 2000
+++ work.0/tidy4aug00/tidy.c	Mon Nov 19 14:39:50 2001
@@ -785,6 +785,8 @@
                 Quiet = yes;
             else if (strcmp(arg, "slides") == 0)
                 BurstSlides = yes;
+            else if (strcmp(arg, "preserve") == 0)
+                PreserveEntities = yes;
             else if (strcmp(arg, "help") == 0 ||
                      argv[1][1] == '?'|| argv[1][1] == 'h')
             {

To Unsubscribe: send mail to majordomo@FreeBSD.org
with "unsubscribe freebsd-doc" in the body of the message




Want to link to this message? Use this URL: <https://mail-archive.FreeBSD.org/cgi/mid.cgi?20011119144029.A17854>