• R/O
  • HTTP
  • SSH
  • HTTPS

htmltree: コミット

メインリポジトリ


コミットメタ情報

リビジョン4391955c894b3b17bbf70418507c0633a08205b8 (tree)
日時2011-07-28 20:27:56
作者Hiromichi MATSUSHIMA <hirom@offi...>
コミッターHiromichi MATSUSHIMA

ログメッセージ

append TEXTRenderer and HTMLElement.inner_text()

変更サマリ

差分

--- a/htmltree.py
+++ b/htmltree.py
@@ -63,6 +63,34 @@ class HTMLRenderer(Renderer):
6363 elif elem.is_decl():
6464 texts.append("<!" + elem.name + ">")
6565
66+
67+class TEXTRenderer(Renderer):
68+ """Render HTMLElement as TEXT."""
69+ # TODO: check tags not need to close more strict...
70+ UNCLOSABLE_TAGS = ["br", "link", "meta", "img"]
71+
72+ def render_inner(self, elem):
73+ texts = []
74+ for child in elem:
75+ self._recursive(child, texts)
76+ return "".join(texts)
77+
78+ def render(self, elem):
79+ texts = []
80+ self._recursive(elem, texts)
81+ return "".join(texts)
82+
83+ def _recursive(self, elem, texts):
84+ if elem.is_tag():
85+ for child in elem:
86+ self._recursive(child, texts)
87+ elif elem.is_text():
88+ if elem.text():
89+ texts.append(elem.text())
90+ elif elem.is_root():
91+ for child in elem:
92+ self._recursive(child, texts)
93+
6694 class HTMLElement(list):
6795 """HTML element object to use as tree nodes."""
6896 ROOT = 0
@@ -135,6 +163,11 @@ class HTMLElement(list):
135163 rn = HTMLRenderer()
136164 return rn.render_inner(self)
137165
166+ def inner_text(self):
167+ "returns inner text"
168+ rn = TEXTRenderer()
169+ return rn.render_inner(self)
170+
138171 # navigation functions
139172 def parent(self):
140173 """returns tag's parent element."""
旧リポジトリブラウザで表示