Update Outliner to match the latest version of the spec; header/hgroup change and hgroup rank change.
2 # Copyright (c) 2008 Geoffrey Sneddon
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to deal
6 # in the Software without restriction, including without limitation the rights
7 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 # copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 from lxml import etree
24 from anolislib import utils
26 # Rank of heading elements (these are negative so h1 > h6)
27 fixedRank = {u"h1": -1, u"h2": -2, u"h3": -3, u"h4": -4, u"h5": -5, u"h6": -6}
31 """Represents the section of a document."""
36 return "<section %s>" % (repr(self.header))
38 def append(self, child):
39 list.append(self, child)
42 def extend(self, children):
43 list.extend(self, children)
44 for child in children:
49 """Build the outline of an HTML document."""
51 def __init__(self, ElementTree, **kwargs):
52 self.ElementTree = ElementTree
55 self.current_outlinee = None
56 self.current_section = None
58 def _rank(self, element):
59 if element.tag in fixedRank:
60 return fixedRank[element.tag]
61 # The rank of an hgroup element is the rank of the highest-ranked
62 # h1–h6 element descendant of the hgroup element, if there are any such
63 # elements, or otherwise the same as for an h1 element (the highest
65 elif element.tag == u"hgroup":
67 if element.find(u".//h" + unicode(i)) is not None:
72 raise ValueError, "Only h1–h6 and hgroup elements have a rank"
74 def build(self, **kwargs):
75 for action, element in etree.iterwalk(self.ElementTree,
76 events=("start", "end")):
77 # If the top of the stack is an element, and you are exiting that
79 if action == "end" and self.stack and self.stack[-1] == element:
80 # Note: The element being exited is a heading content element.
81 assert element.tag in utils.heading_content
82 # Pop that element from the stack.
85 # If the top of the stack is a heading content element
86 elif self.stack and self.stack[-1].tag in utils.heading_content:
90 # When entering a sectioning content element or a sectioning root
92 elif action == "start" and \
93 (element.tag in utils.sectioning_content or \
94 element.tag in utils.sectioning_root):
95 # If current outlinee is not null, push current outlinee onto
97 if self.current_outlinee is not None:
98 self.stack.append(self.current_outlinee)
99 # Let current outlinee be the element that is being entered.
100 self.current_outlinee = element
101 # Let current section be a newly created section for the
102 # current outlinee element.
103 self.current_section = section()
104 # Let there be a new outline for the new current outlinee,
105 # initialized with just the new current section as the only
106 # section in the outline.
107 self.outlines[self.current_outlinee] = [self.current_section]
109 # When exiting a sectioning content element, if the stack is not
111 elif action == "end" and \
112 element.tag in utils.sectioning_content and self.stack:
113 # Pop the top element from the stack, and let the current
114 # outlinee be that element.
115 self.current_outlinee = self.stack.pop()
116 # Let current section be the last section in the outline of the
117 # current outlinee element.
118 self.current_section = self.outlines[self.current_outlinee][-1]
119 # Append the outline of the sectioning content element being
120 # exited to the current section. (This does not change which
121 # section is the last section in the outline.)
122 self.current_section += self.outlines[element]
124 # When exiting a sectioning root element, if the stack is not empty
125 elif action == "end" and element.tag in utils.sectioning_root and \
127 # Pop the top element from the stack, and let the current
128 # outlinee be that element.
129 self.current_outlinee = self.stack.pop()
130 # Let current section be the last section in the outline of the
131 # current outlinee element.
132 self.current_section = self.outlines[self.current_outlinee][-1]
133 # Loop: If current section has no child sections, stop these
135 while self.current_section:
136 # Let current section be the last child section of the
137 # current current section.
138 assert self.current_section != self.current_section[-1]
139 self.current_section = self.current_section[-1]
140 # Go back to the substep labeled Loop.
142 # When exiting a sectioning content element or a sectioning root
144 elif action == "end" and \
145 (element.tag in utils.sectioning_content or \
146 element.tag in utils.sectioning_root):
147 # Note: The current outlinee is the element being exited.
148 assert self.current_outlinee == element
149 # Let current section be the first section in the outline of
150 # the current outlinee element.
151 self.current_section = self.outlines[self.current_outlinee][0]
152 # Skip to the next step in the overall set of steps. (The walk
156 # If the current outlinee is null.
157 elif self.current_outlinee is None:
161 # When entering a heading content element
162 elif action == "start" and element.tag in utils.heading_content:
163 # If the current section has no heading, let the element being
164 # entered be the heading for the current section.
165 if self.current_section.header is None:
166 self.current_section.header = element
168 # Otherwise, if the element being entered has a rank equal to
169 # or greater than the heading of the last section of the
170 # outline of the current outlinee, then create a new section
171 # and append it to the outline of the current outlinee element,
172 # so that this new section is the new last section of that
173 # outline. Let current section be that new section. Let the
174 # element being entered be the new heading for the current
176 elif self._rank(element) >= \
177 self._rank(self.outlines[self.current_outlinee][-1].header):
178 self.current_section = section()
179 self.outlines[self.current_outlinee] \
180 .append(self.current_section)
181 self.current_section.header = element
183 # Otherwise, run these substeps:
185 # Let candidate section be current section.
186 candidate_section = self.current_section
188 # If the element being entered has a rank lower than
189 # the rank of the heading of the candidate section,
190 # then create a new section, and append it to candidate
191 # section. (This does not change which section is the
192 # last section in the outline.) Let current section be
193 # this new section. Let the element being entered be
194 # the new heading for the current section. Abort these
196 if self._rank(element) < \
197 self._rank(candidate_section.header):
198 self.current_section = section()
199 candidate_section.append(self.current_section)
200 self.current_section.header = element
202 # Let new candidate section be the section that
203 # contains candidate section in the outline of current
205 # Let candidate section be new candidate section.
206 candidate_section = candidate_section.parent
208 # Push the element being entered onto the stack. (This causes
209 # the algorithm to skip any descendants of the element.)
210 self.stack.append(element)
212 # If the current outlinee is null, then there was no sectioning content
213 # element or sectioning root element in the DOM. There is no outline.
215 return self.outlines[self.current_outlinee]