3 # Copyright (c) 2008 Geoffrey Sneddon
5 # Permission is hereby granted, free of charge, to any person obtaining a copy
6 # of this software and associated documentation files (the "Software"), to deal
7 # in the Software without restriction, including without limitation the rights
8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 # copies of the Software, and to permit persons to whom the Software is
10 # furnished to do so, subject to the following conditions:
12 # The above copyright notice and this permission notice shall be included in
13 # all copies or substantial portions of the Software.
15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 """usage: anolis [options] input output
24 Post-process a document, adding cross-references, table of contents, etc.
27 from optparse import OptionParser, SUPPRESS_HELP
30 from lxml import etree
32 from anolislib import generator, utils
36 # Create the options parser
37 optParser = getOptParser()
38 opts, args = optParser.parse_args()
40 # Check we have enough arguments
46 # Get input and generate
47 input = open(args[0], "rb")
48 tree = generator.fromFile(input, **kwargs)
52 output = open(args[1], "wb")
53 generator.toFile(tree, output, **kwargs)
55 except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
56 sys.stderr.write(unicode(e) + u"\n")
59 sys.stderr.write(u"anolis expects two arguments. Use -h for help\n")
64 def enable(option, opt_str, value, parser, *args, **kwargs):
65 parser.values.processes.add(value)
67 def disable(option, opt_str, value, parser, *args, **kwargs):
68 parser.values.processes.discard(value)
70 parser = OptionParser(usage = __doc__, version="%prog 1.1dev")
72 parser.add_option("", "--enable", action="callback", callback=enable,
73 type="string", dest="processes",
74 help="Enable the process given as the option value")
76 parser.add_option("", "--disable", action="callback", callback=disable,
78 help="Disable the process given as the option value")
80 parser.add_option("", "--parser", type="choice",
81 choices=("html5lib", "lxml.html"),
82 help="Choose what parser to use. Valid options: html5lib, lxml.html")
84 parser.add_option("", "--serializer", type="choice",
85 choices=("html5lib", "lxml.html"),
86 help="Choose what serializer to use. Valid options: html5lib, lxml.html")
88 parser.add_option("", "--newline-char", action="store", type="string",
90 help="Set the newline character/string used when creating new newlines. This should match the rest of the newlines in the document.")
92 parser.add_option("", "--indent-char", action="store", type="string",
94 help="Set the character/string used when creating indenting new blocks of (X)HTML. This should match the rest of the indentation in the document.")
96 parser.add_option("", "--force-html4-id", action="store_true",
97 dest="force_html4_id",
98 help="Force the ID generation algorithm to create HTML 4 compliant IDs regardless of the DOCTYPE.")
100 parser.add_option("", "--min-depth", action="store", type="int",
102 help="Highest ranking header to number/insert into TOC.")
104 parser.add_option("", "--max-depth", action="store", type="int",
106 help="Lowest ranking header to number/insert into TOC.")
108 parser.add_option("", "--allow-duplicate-dfns", action="store_true",
109 dest="allow_duplicate_dfns",
110 help="Allow multiple definitions of terms when cross-referencing (the last instance of the term is used when referencing it).")
112 parser.add_option("", "--w3c-compat", action="store_true",
114 help="Behave in a (mostly) compatible way to the W3C CSS WG's Postprocessor (this implies all of the other --w3c-compat options with the exception of --w3c-compat-crazy-substitution, as that is too crazy).")
116 parser.add_option("", "--w3c-compat-xref-elements", action="store_true",
117 dest="w3c_compat_xref_elements",
118 help="Uses the same list of elements to look for cross-references in as the W3C CSS WG's Postprocessor, even when the elements shouldn't semantically be used for cross-reference terms.")
120 parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
121 dest="w3c_compat_xref_a_placement",
122 help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
124 parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
125 dest="w3c_compat_xref_normalization",
126 help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
128 parser.add_option("", "--w3c-compat-class-toc", action="store_true",
129 dest="w3c_compat_class_toc",
130 help="Add @class='toc' on every ol element in the table of contents (instead of only the root ol element).")
132 parser.add_option("", "--w3c-compat-substitutions", action="store_true",
133 dest="w3c_compat_substitutions",
134 help="Do W3C specific substitutions.")
136 parser.add_option("", "--w3c-compat-crazy-substitutions", action="store_true",
137 dest="w3c_compat_crazy_substitutions",
138 help="Do crazy W3C specific substitutions, which may cause unexpected behaviour (i.e., replacing random strings within the document with no special marker).")
152 parser.add_option("", "--profile", action="store_true",
153 dest="profile", help=SUPPRESS_HELP)
155 parser.add_option("", "--inject-meta-charset", action="store_true",
156 dest="inject_meta_charset", help=SUPPRESS_HELP)
158 parser.add_option("", "--strip-whitespace", action="store_true",
159 dest="strip_whitespace", help=SUPPRESS_HELP)
161 parser.add_option("", "--omit-optional-tags", action="store_true",
162 dest="omit_optional_tags", help=SUPPRESS_HELP)
164 parser.add_option("", "--quote-attr-values", action="store_true",
165 dest="quote_attr_values", help=SUPPRESS_HELP)
167 parser.add_option("", "--use-best-quote-char", action="store_true",
168 dest="use_best_quote_char", help=SUPPRESS_HELP)
170 parser.add_option("", "--no-minimize-boolean-attributes",
171 action="store_false",
172 dest="minimize_boolean_attributes", help=SUPPRESS_HELP)
174 parser.add_option("", "--use-trailing-solidus", action="store_true",
175 dest="use_trailing_solidus", help=SUPPRESS_HELP)
177 parser.add_option("", "--space-before-trailing-solidus",
179 dest="space_before_trailing_solidus", help=SUPPRESS_HELP)
181 parser.add_option("", "--escape-lt-in-attrs", action="store_true",
182 dest="escape_lt_in_attrs", help=SUPPRESS_HELP)
184 parser.add_option("", "--escape-rcdata", action="store_true",
185 dest="escape_rcdata", help=SUPPRESS_HELP)
187 parser.add_option("", "--output-encoding", action="store", type=str,
188 dest="output_encoding", help="Output encoding")
191 processes=set(["sub", "xref", "toc"]),
193 serializer="html5lib",
196 force_html4_id=False,
199 allow_duplicate_dfns=False,
201 w3c_compat_xref_elements=False,
202 w3c_compat_xref_a_placement=False,
203 w3c_compat_xref_normalization=False,
204 w3c_compat_class_toc=False,
205 w3c_compat_substitutions=False,
206 w3c_compat_crazy_substitutions=False,
208 inject_meta_charset=False,
209 omit_optional_tags=False,
210 quote_attr_values=False,
211 use_best_quote_char=False,
212 minimize_boolean_attributes=False,
213 use_trailing_solidus=False,
214 space_before_trailing_solidus=False,
215 escape_lt_in_attrs=False,
217 output_encoding="utf-8"
222 if __name__ == "__main__":