anolis
author Geoffrey Sneddon <geoffers@gmail.com>
Thu Aug 13 08:46:49 2009 +0200 (2009-08-13)
changeset 305 e75817a809b8
parent 290 56f77d671483
child 310 7894cef2cd7a
permissions -rw-r--r--
Peh, who needs setuptools?
     1 #!/usr/bin/env python
     2 # coding=UTF-8
     3 # Copyright (c) 2008 Geoffrey Sneddon
     4 #
     5 # Permission is hereby granted, free of charge, to any person obtaining a copy
     6 # of this software and associated documentation files (the "Software"), to deal
     7 # in the Software without restriction, including without limitation the rights
     8 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     9 # copies of the Software, and to permit persons to whom the Software is
    10 # furnished to do so, subject to the following conditions:
    11 #
    12 # The above copyright notice and this permission notice shall be included in
    13 # all copies or substantial portions of the Software.
    14 #
    15 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    16 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    17 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    18 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    19 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    20 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    21 # THE SOFTWARE.
    22 """usage: anolis [options] input output
    23 
    24 Post-process a document, adding cross-references, table of contents, etc.
    25 """
    26 
    27 from optparse import OptionParser, SUPPRESS_HELP
    28 import sys
    29 
    30 from lxml import etree
    31 
    32 from anolislib import generator, utils
    33 
    34 
    35 def main():
    36     # Create the options parser
    37     optParser = getOptParser()
    38     opts, args = optParser.parse_args()
    39 
    40     # Check we have enough arguments
    41     if len(args) >= 2:
    42         try:
    43             # Get options
    44             kwargs = vars(opts)
    45 
    46             # Get input and generate
    47             input = open(args[0], "rb")
    48             tree = generator.fromFile(input, **kwargs)
    49             input.close()
    50 
    51             # Write output
    52             output = open(args[1], "wb")
    53             generator.toFile(tree, output, **kwargs)
    54             output.close()
    55         except (utils.AnolisException, IOError, etree.XMLSyntaxError), e:
    56             sys.stderr.write(unicode(e) + u"\n")
    57             sys.exit(1)
    58     else:
    59         sys.stderr.write(u"anolis expects two arguments. Use -h for help\n")
    60         sys.exit(2)
    61 
    62 
    63 def getOptParser():
    64     def enable(option, opt_str, value, parser, *args, **kwargs):
    65         parser.values.processes.add(value)
    66 
    67     def disable(option, opt_str, value, parser, *args, **kwargs):
    68         parser.values.processes.discard(value)
    69 
    70     parser = OptionParser(usage = __doc__, version="%prog 1.1dev")
    71 
    72     parser.add_option("", "--enable", action="callback", callback=enable,
    73                       type="string", dest="processes",
    74                       help="Enable the process given as the option value")
    75 
    76     parser.add_option("", "--disable", action="callback", callback=disable,
    77                       type="string",
    78                       help="Disable the process given as the option value")
    79 
    80     parser.add_option("", "--parser", type="choice",
    81                       choices=("html5lib", "lxml.html"),
    82                       help="Choose what parser to use. Valid options: html5lib, lxml.html")
    83 
    84     parser.add_option("", "--serializer", type="choice",
    85                       choices=("html5lib", "lxml.html"),
    86                       help="Choose what serializer to use. Valid options: html5lib, lxml.html")
    87 
    88     parser.add_option("", "--newline-char", action="store", type="string",
    89                       dest="newline_char",
    90                       help="Set the newline character/string used when creating new newlines. This should match the rest of the newlines in the document.")
    91 
    92     parser.add_option("", "--indent-char", action="store", type="string",
    93                       dest="indent_char",
    94                       help="Set the character/string used when creating indenting new blocks of (X)HTML. This should match the rest of the indentation in the document.")
    95 
    96     parser.add_option("", "--force-html4-id", action="store_true",
    97                       dest="force_html4_id",
    98                       help="Force the ID generation algorithm to create HTML 4 compliant IDs regardless of the DOCTYPE.")
    99 
   100     parser.add_option("", "--min-depth", action="store", type="int",
   101                       dest="min_depth",
   102                       help="Highest ranking header to number/insert into TOC.")
   103 
   104     parser.add_option("", "--max-depth", action="store", type="int",
   105                       dest="max_depth",
   106                       help="Lowest ranking header to number/insert into TOC.")
   107 
   108     parser.add_option("", "--allow-duplicate-dfns", action="store_true",
   109                       dest="allow_duplicate_dfns",
   110                       help="Allow multiple definitions of terms when cross-referencing (the last instance of the term is used when referencing it).")
   111 
   112     parser.add_option("", "--w3c-compat", action="store_true",
   113                       dest="w3c_compat",
   114                       help="Behave in a (mostly) compatible way to the W3C CSS WG's Postprocessor (this implies all of the other --w3c-compat options with the exception of --w3c-compat-crazy-substitution, as that is too crazy).")
   115 
   116     parser.add_option("", "--w3c-compat-xref-elements", action="store_true",
   117                       dest="w3c_compat_xref_elements",
   118                       help="Uses the same list of elements to look for cross-references in as the W3C CSS WG's Postprocessor, even when the elements shouldn't semantically be used for cross-reference terms.")
   119 
   120     parser.add_option("", "--w3c-compat-xref-a-placement", action="store_true",
   121                       dest="w3c_compat_xref_a_placement",
   122                       help="When cross-referencing elements apart from span, put the a element inside the element instead of outside the element.")
   123 
   124     parser.add_option("", "--w3c-compat-xref-normalization", action="store_true",
   125                       dest="w3c_compat_xref_normalization",
   126                       help="Only use ASCII letters, numbers, and spaces in comparison of cross-reference terms.")
   127 
   128     parser.add_option("", "--w3c-compat-class-toc", action="store_true",
   129                       dest="w3c_compat_class_toc",
   130                       help="Add @class='toc' on every ol element in the table of contents (instead of only the root ol element).")
   131 
   132     parser.add_option("", "--w3c-compat-substitutions", action="store_true",
   133                       dest="w3c_compat_substitutions",
   134                       help="Do W3C specific substitutions.")
   135 
   136     parser.add_option("", "--w3c-compat-crazy-substitutions", action="store_true",
   137                       dest="w3c_compat_crazy_substitutions",
   138                       help="Do crazy W3C specific substitutions, which may cause unexpected behaviour (i.e., replacing random strings within the document with no special marker).")
   139 
   140     profile = True
   141     try:
   142         import cProfile
   143         import pstats
   144     except ImportError:
   145         try:
   146             import hotshot
   147             import hotshot.stats
   148         except ImportError:
   149             profile = False
   150     
   151     if profile:
   152         parser.add_option("", "--profile", action="store_true",
   153             dest="profile", help=SUPPRESS_HELP)
   154 
   155     parser.add_option("", "--inject-meta-charset", action="store_true",
   156                       dest="inject_meta_charset", help=SUPPRESS_HELP)
   157 
   158     parser.add_option("", "--strip-whitespace", action="store_true",
   159                       dest="strip_whitespace", help=SUPPRESS_HELP)
   160 
   161     parser.add_option("", "--omit-optional-tags", action="store_true",
   162                       dest="omit_optional_tags", help=SUPPRESS_HELP)
   163 
   164     parser.add_option("", "--quote-attr-values", action="store_true",
   165                       dest="quote_attr_values", help=SUPPRESS_HELP)
   166 
   167     parser.add_option("", "--use-best-quote-char", action="store_true",
   168                       dest="use_best_quote_char", help=SUPPRESS_HELP)
   169 
   170     parser.add_option("", "--no-minimize-boolean-attributes",
   171                       action="store_false",
   172                       dest="minimize_boolean_attributes", help=SUPPRESS_HELP)
   173 
   174     parser.add_option("", "--use-trailing-solidus", action="store_true",
   175                       dest="use_trailing_solidus", help=SUPPRESS_HELP)
   176 
   177     parser.add_option("", "--space-before-trailing-solidus",
   178                       action="store_true",
   179                       dest="space_before_trailing_solidus", help=SUPPRESS_HELP)
   180 
   181     parser.add_option("", "--escape-lt-in-attrs", action="store_true",
   182                       dest="escape_lt_in_attrs", help=SUPPRESS_HELP)
   183 
   184     parser.add_option("", "--escape-rcdata", action="store_true",
   185                       dest="escape_rcdata", help=SUPPRESS_HELP)
   186 
   187     parser.add_option("", "--output-encoding", action="store", type=str,
   188                       dest="output_encoding", help="Output encoding")
   189 
   190     parser.set_defaults(
   191         processes=set(["sub", "xref", "toc"]),
   192         parser="html5lib",
   193         serializer="html5lib",
   194         newline_char=u"\n",
   195         indent_char=u" ",
   196         force_html4_id=False,
   197         min_depth=2,
   198         max_depth=6,
   199         allow_duplicate_dfns=False,
   200         w3c_compat=False,
   201         w3c_compat_xref_elements=False,
   202         w3c_compat_xref_a_placement=False,
   203         w3c_compat_xref_normalization=False,
   204         w3c_compat_class_toc=False,
   205         w3c_compat_substitutions=False,
   206         w3c_compat_crazy_substitutions=False,
   207         profile=False,
   208         inject_meta_charset=False,
   209         omit_optional_tags=False,
   210         quote_attr_values=False,
   211         use_best_quote_char=False,
   212         minimize_boolean_attributes=False,
   213         use_trailing_solidus=False,
   214         space_before_trailing_solidus=False,
   215         escape_lt_in_attrs=False,
   216         escape_rcdata=False,
   217         output_encoding="utf-8"
   218     )
   219 
   220     return parser
   221 
   222 if __name__ == "__main__":
   223     main()