Woops. Missed a case of "encoding" that has to be "output_encoding".
2 # Copyright (c) 2008 Geoffrey Sneddon
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to deal
6 # in the Software without restriction, including without limitation the rights
7 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 # copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 from lxml import etree
24 from anolislib import utils
26 # Rank of heading elements (these are negative so h1 > h6)
27 rank = {u"h1": -1, u"h2": -2, u"h3": -3, u"h4": -4, u"h5": -5, u"h6": -6,
32 """Represents the section of a document."""
37 return "<section %s>" % (repr(self.header))
39 def append(self, child):
40 list.append(self, child)
43 def extend(self, children):
44 list.extend(self, children)
45 for child in children:
50 """Build the outline of an HTML document."""
52 def __init__(self, ElementTree, **kwargs):
53 self.ElementTree = ElementTree
56 self.current_outlinee = None
57 self.current_section = None
59 def build(self, **kwargs):
60 for action, element in etree.iterwalk(self.ElementTree,
61 events=("start", "end")):
62 # If the top of the stack is an element, and you are exiting that
64 if action == "end" and self.stack and self.stack[-1] == element:
65 # Note: The element being exited is a heading content element.
66 assert element.tag in utils.heading_content
67 # Pop that element from the stack.
70 # If the top of the stack is a heading content element
71 elif self.stack and self.stack[-1].tag in utils.heading_content:
75 # When entering a sectioning content element or a sectioning root
77 elif action == "start" and \
78 (element.tag in utils.sectioning_content or \
79 element.tag in utils.sectioning_root):
80 # If current outlinee is not null, push current outlinee onto
82 if self.current_outlinee is not None:
83 self.stack.append(self.current_outlinee)
84 # Let current outlinee be the element that is being entered.
85 self.current_outlinee = element
86 # Let current section be a newly created section for the
87 # current outlinee element.
88 self.current_section = section()
89 # Let there be a new outline for the new current outlinee,
90 # initialized with just the new current section as the only
91 # section in the outline.
92 self.outlines[self.current_outlinee] = [self.current_section]
94 # When exiting a sectioning content element, if the stack is not
96 elif action == "end" and \
97 element.tag in utils.sectioning_content and self.stack:
98 # Pop the top element from the stack, and let the current
99 # outlinee be that element.
100 self.current_outlinee = self.stack.pop()
101 # Let current section be the last section in the outline of the
102 # current outlinee element.
103 self.current_section = self.outlines[self.current_outlinee][-1]
104 # Append the outline of the sectioning content element being
105 # exited to the current section. (This does not change which
106 # section is the last section in the outline.)
107 self.current_section += self.outlines[element]
109 # When exiting a sectioning root element, if the stack is not empty
110 elif action == "end" and element.tag in utils.sectioning_root and \
112 # Pop the top element from the stack, and let the current
113 # outlinee be that element.
114 self.current_outlinee = self.stack.pop()
115 # Let current section be the last section in the outline of the
116 # current outlinee element.
117 self.current_section = self.outlines[self.current_outlinee][-1]
118 # Loop: If current section has no child sections, stop these
120 while self.current_section:
121 # Let current section be the last child section of the
122 # current current section.
123 assert self.current_section != self.current_section[-1]
124 self.current_section = self.current_section[-1]
125 # Go back to the substep labeled Loop.
127 # When exiting a sectioning content element or a sectioning root
129 elif action == "end" and \
130 (element.tag in utils.sectioning_content or \
131 element.tag in utils.sectioning_root):
132 # Note: The current outlinee is the element being exited.
133 assert self.current_outlinee == element
134 # Let current section be the first section in the outline of
135 # the current outlinee element.
136 self.current_section = self.outlines[self.current_outlinee][0]
137 # Skip to the next step in the overall set of steps. (The walk
141 # If the current outlinee is null.
142 elif self.current_outlinee is None:
146 # When entering a heading content element
147 elif action == "start" and element.tag in utils.heading_content:
148 # If the current section has no heading, let the element being
149 # entered be the heading for the current section.
150 if self.current_section.header is None:
151 self.current_section.header = element
153 # Otherwise, if the element being entered has a rank equal to
154 # or greater than the heading of the last section of the
155 # outline of the current outlinee, then create a new section
156 # and append it to the outline of the current outlinee element,
157 # so that this new section is the new last section of that
158 # outline. Let current section be that new section. Let the
159 # element being entered be the new heading for the current
161 elif rank[element.tag] >= \
162 rank[self.outlines[self.current_outlinee][-1].header.tag]:
163 self.current_section = section()
164 self.outlines[self.current_outlinee] \
165 .append(self.current_section)
166 self.current_section.header = element
168 # Otherwise, run these substeps:
170 # Let candidate section be current section.
171 candidate_section = self.current_section
173 # If the element being entered has a rank lower than
174 # the rank of the heading of the candidate section,
175 # then create a new section, and append it to candidate
176 # section. (This does not change which section is the
177 # last section in the outline.) Let current section be
178 # this new section. Let the element being entered be
179 # the new heading for the current section. Abort these
181 if rank[element.tag] < \
182 rank[candidate_section.header.tag]:
183 self.current_section = section()
184 candidate_section.append(self.current_section)
185 self.current_section.header = element
187 # Let new candidate section be the section that
188 # contains candidate section in the outline of current
190 # Let candidate section be new candidate section.
191 candidate_section = candidate_section.parent
193 # Push the element being entered onto the stack. (This causes
194 # the algorithm to skip any descendants of the element.)
195 self.stack.append(element)
197 # If the current outlinee is null, then there was no sectioning content
198 # element or sectioning root element in the DOM. There is no outline.
200 return self.outlines[self.current_outlinee]