Start move to PEP 8 coding standards.
2 # Copyright (c) 2008 Geoffrey Sneddon
4 # Permission is hereby granted, free of charge, to any person obtaining a copy
5 # of this software and associated documentation files (the "Software"), to deal
6 # in the Software without restriction, including without limitation the rights
7 # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
8 # copies of the Software, and to permit persons to whom the Software is
9 # furnished to do so, subject to the following conditions:
11 # The above copyright notice and this permission notice shall be included in
12 # all copies or substantial portions of the Software.
14 # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
15 # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
16 # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
17 # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
18 # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
19 # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
22 from lxml import etree
24 from anolislib import utils
26 # Rank of heading elements (these are negative so h1 > h6)
27 rank = {u"h1": -1, u"h2": -2, u"h3": -3, u"h4": -4, u"h5": -5, u"h6": -6,
31 """Represents the section of a document."""
36 return "<section %s>" % (repr(self.header))
38 def append(self, child):
39 list.append(self, child)
42 def extend(self, children):
43 list.extend(self, children)
44 for child in children:
48 """Build the outline of an HTML document."""
50 def __init__(self, ElementTree, **kwargs):
51 self.ElementTree = ElementTree
54 self.current_outlinee = None
55 self.current_section = None
57 def build(self, **kwargs):
58 for action, element in etree.iterwalk(self.ElementTree,
59 events=("start", "end")):
60 # If the top of the stack is an element, and you are exiting that
62 if action == "end" and self.stack and self.stack[-1] == element:
63 # Note: The element being exited is a heading content element.
64 assert element.tag in utils.heading_content
65 # Pop that element from the stack.
68 # If the top of the stack is a heading content element
69 elif self.stack and self.stack[-1].tag in utils.heading_content:
73 # When entering a sectioning content element or a sectioning root
75 elif action == "start" and \
76 (element.tag in utils.sectioning_content or \
77 element.tag in utils.sectioning_root):
78 # If current outlinee is not null, push current outlinee onto
80 if self.current_outlinee is not None:
81 self.stack.append(self.current_outlinee)
82 # Let current outlinee be the element that is being entered.
83 self.current_outlinee = element
84 # Let current section be a newly created section for the
85 # current outlinee element.
86 self.current_section = section()
87 # Let there be a new outline for the new current outlinee,
88 # initialized with just the new current section as the only
89 # section in the outline.
90 self.outlines[self.current_outlinee] = [self.current_section]
92 # When exiting a sectioning content element, if the stack is not
94 elif action == "end" and \
95 element.tag in utils.sectioning_content and self.stack:
96 # Pop the top element from the stack, and let the current
97 # outlinee be that element.
98 self.current_outlinee = self.stack.pop()
99 # Let current section be the last section in the outline of the
100 # current outlinee element.
101 self.current_section = self.outlines[self.current_outlinee][-1]
102 # Append the outline of the sectioning content element being
103 # exited to the current section. (This does not change which
104 # section is the last section in the outline.)
105 self.current_section += self.outlines[element]
107 # When exiting a sectioning root element, if the stack is not empty
108 elif action == "end" and element.tag in utils.sectioning_root and \
110 # Pop the top element from the stack, and let the current
111 # outlinee be that element.
112 self.current_outlinee = self.stack.pop()
113 # Let current section be the last section in the outline of the
114 # current outlinee element.
115 self.current_section = self.outlines[self.current_outlinee][-1]
116 # Loop: If current section has no child sections, stop these
118 while self.current_section:
119 # Let current section be the last child section of the
120 # current current section.
121 assert self.current_section != self.current_section[-1]
122 self.current_section = self.current_section[-1]
123 # Go back to the substep labeled Loop.
125 # When exiting a sectioning content element or a sectioning root
127 elif action == "end" and \
128 (element.tag in utils.sectioning_content or \
129 element.tag in utils.sectioning_root):
130 # Note: The current outlinee is the element being exited.
131 assert self.current_outlinee == element
132 # Let current section be the first section in the outline of
133 # the current outlinee element.
134 self.current_section = self.outlines[self.current_outlinee][0]
135 # Skip to the next step in the overall set of steps. (The walk
139 # If the current outlinee is null.
140 elif self.current_outlinee is None:
144 # When entering a heading content element
145 elif action == "start" and element.tag in utils.heading_content:
146 # If the current section has no heading, let the element being
147 # entered be the heading for the current section.
148 if self.current_section.header is None:
149 self.current_section.header = element
151 # Otherwise, if the element being entered has a rank equal to
152 # or greater than the heading of the last section of the
153 # outline of the current outlinee, then create a new section
154 # and append it to the outline of the current outlinee element,
155 # so that this new section is the new last section of that
156 # outline. Let current section be that new section. Let the
157 # element being entered be the new heading for the current
159 elif rank[element.tag] >= \
160 rank[self.outlines[self.current_outlinee][-1].header.tag]:
161 self.current_section = section()
162 self.outlines[self.current_outlinee] \
163 .append(self.current_section)
164 self.current_section.header = element
166 # Otherwise, run these substeps:
168 # Let candidate section be current section.
169 candidate_section = self.current_section
171 # If the element being entered has a rank lower than
172 # the rank of the heading of the candidate section,
173 # then create a new section, and append it to candidate
174 # section. (This does not change which section is the
175 # last section in the outline.) Let current section be
176 # this new section. Let the element being entered be
177 # the new heading for the current section. Abort these
179 if rank[element.tag] < rank[candidate_section.header.tag]:
180 self.current_section = section()
181 candidate_section.append(self.current_section)
182 self.current_section.header = element
184 # Let new candidate section be the section that contains candidate section in the outline of current outlinee.
185 # Let candidate section be new candidate section.
186 candidate_section = candidate_section.parent
188 # Push the element being entered onto the stack. (This causes the algorithm to skip any descendants of the element.)
189 self.stack.append(element)
191 # If the current outlinee is null, then there was no sectioning content element or sectioning root element in the DOM. There is no outline.
193 return self.outlines[self.current_outlinee]