ODFPY  1.2.0
teletype.py
Go to the documentation of this file.
1 # -*- coding: utf-8 -*-
2 #
3 # Create and extract text from ODF, handling whitespace correctly.
4 # Copyright (C) 2008 J. David Eisenberg
5 #
6 # This program is free software; you can redistribute it and/or modify
7 # it under the terms of the GNU General Public License as published by
8 # the Free Software Foundation; either version 2 of the License, or
9 # (at your option) any later version.
10 #
11 # This program is distributed in the hope that it will be useful,
12 # but WITHOUT ANY WARRANTY; without even the implied warranty of
13 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 # GNU General Public License for more details.
15 #
16 # You should have received a copy of the GNU General Public License along
17 # with this program; if not, write to the Free Software Foundation, Inc.,
18 # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
19 
20 
21 
30 
31 from odf.element import Node
32 import odf.opendocument
33 from odf.text import S,LineBreak,Tab
34 
36 
37  def __init__(self):
38  self.textBuffer = []
39  self.spaceCount = 0
40 
41 
48  def addTextToElement(self, odfElement, s):
49  i = 0
50  ch = ' '
51 
52  # When we encounter a tab or newline, we can immediately
53  # dump any accumulated text and then emit the appropriate
54  # ODF element.
55  #
56  # When we encounter a space, we add it to the text buffer,
57  # and then collect more spaces. If there are more spaces
58  # after the first one, we dump the text buffer and then
59  # then emit the appropriate <text:s> element.
60 
61  while i < len(s):
62  ch = s[i]
63  if ch == '\t':
64  self._emitTextBuffer(odfElement)
65  odfElement.addElement(Tab())
66  i += 1
67  elif ch == '\n':
68  self._emitTextBuffer(odfElement);
69  odfElement.addElement(LineBreak())
70  i += 1
71  elif ch == ' ':
72  self.textBuffer.append(' ')
73  i += 1
74  self.spaceCount = 0
75  while i < len(s) and (s[i] == ' '):
76  self.spaceCount += 1
77  i += 1
78  if self.spaceCount > 0:
79  self._emitTextBuffer(odfElement)
80  self._emitSpaces(odfElement)
81  else:
82  self.textBuffer.append(ch)
83  i += 1
84 
85  self._emitTextBuffer(odfElement)
86 
87 
91  def _emitTextBuffer(self, odfElement):
92  if len(self.textBuffer) > 0:
93  odfElement.addText(''.join(self.textBuffer))
94  self.textBuffer = []
95 
96 
97 
101  def _emitSpaces(self, odfElement):
102  if self.spaceCount > 0:
103  spaceElement = S(c=self.spaceCount)
104  odfElement.addElement(spaceElement)
105  self.spaceCount = 0
106 
107 def addTextToElement(odfElement, s):
108  wst = WhitespaceText()
109  wst.addTextToElement(odfElement, s)
110 
111 
118 def extractText(odfElement):
119  result = [];
120 
121  if len(odfElement.childNodes) != 0:
122  for child in odfElement.childNodes:
123  if child.nodeType == Node.TEXT_NODE:
124  result.append(child.data)
125  elif child.nodeType == Node.ELEMENT_NODE:
126  subElement = child
127  tagName = subElement.qname;
128  if tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"line-break"):
129  result.append("\n")
130  elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"tab"):
131  result.append("\t")
132  elif tagName == (u"urn:oasis:names:tc:opendocument:xmlns:text:1.0", u"s"):
133  c = subElement.getAttribute('c')
134  if c:
135  spaceCount = int(c)
136  else:
137  spaceCount = 1
138 
139  result.append(" " * spaceCount)
140  else:
141  result.append(extractText(subElement))
142  return ''.join(result)
def _emitTextBuffer(self, odfElement)
Creates a Text Node whose contents are the current textBuffer.
Definition: teletype.py:91
def S(args)
Definition: text.py:388
def LineBreak(args)
Definition: text.py:238
def addTextToElement(self, odfElement, s)
Process an input string, inserting <text:tab> elements for &#39;&#39;, <text:line-break> elements for &#39; &#39;...
Definition: teletype.py:48
def Tab(args)
Definition: text.py:472
Definition: text.py:1
def _emitSpaces(self, odfElement)
Creates a <text:s> element for the current spaceCount.
Definition: teletype.py:101
def extractText(odfElement)
Extract text content from an Element, with whitespace represented properly.
Definition: teletype.py:118
def addTextToElement(odfElement, s)
Definition: teletype.py:107