1398 lines
49 KiB
Python
1398 lines
49 KiB
Python
#!/usr/bin/env python
|
|
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
|
|
# Reichergasse 131, A-3411 Weidling.
|
|
# Web: http://www.runtux.com Email: office@runtux.com
|
|
# All rights reserved
|
|
# ****************************************************************************
|
|
#
|
|
# This library is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU Library General Public License as
|
|
# published by the Free Software Foundation; either version 2 of the
|
|
# License, or (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU Library General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU Library General Public
|
|
# License along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
# ****************************************************************************
|
|
|
|
from __future__ import absolute_import
|
|
|
|
import time
|
|
import re
|
|
try :
|
|
from xml.etree.ElementTree import dump, SubElement, Element, tostring
|
|
from xml.etree.ElementTree import _namespace_map
|
|
except ImportError :
|
|
from elementtree.ElementTree import dump, SubElement, Element, tostring
|
|
from elementtree.ElementTree import _namespace_map
|
|
from copy import deepcopy
|
|
from ooopy.OOoPy import OOoPy, autosuper
|
|
from ooopy.OOoPy import files, mimetypes, namespace_by_name
|
|
from ooopy.Version import VERSION
|
|
|
|
def OOo_Tag (namespace, name, mimetype) :
|
|
"""Return combined XML tag
|
|
|
|
>>> OOo_Tag ('xml', 'id', mimetypes [1])
|
|
'{http://www.w3.org/XML/1998/namespace}id'
|
|
>>> OOo_Tag ('text', 'list', mimetypes [1])
|
|
'{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list'
|
|
"""
|
|
return "{%s}%s" % (namespace_by_name [mimetype][namespace], name)
|
|
# end def OOo_Tag
|
|
|
|
def split_tag (tag) :
|
|
""" Split tag into symbolic namespace and name part -- inverse
|
|
operation of OOo_Tag.
|
|
"""
|
|
ns, t = tag.split ('}')
|
|
return (_namespace_map [ns [1:]], t)
|
|
# end def split_tag
|
|
|
|
class Transform (autosuper) :
|
|
"""
|
|
Base class for individual transforms on OOo files. An individual
|
|
transform needs a filename variable for specifying the OOo file
|
|
the transform should be applied to and an optional prio.
|
|
Individual transforms are applied according to their prio
|
|
setting, higher prio means later application of a transform.
|
|
|
|
The filename variable must specify one of the XML files which are
|
|
part of the OOo document (see files variable above). As
|
|
the names imply, content.xml contains the contents of the
|
|
document (text and ad-hoc style definitions), styles.xml contains
|
|
the style definitions, meta.xml contains meta information like
|
|
author, editing time, etc. and settings.xml is used to store
|
|
OOo's settings (menu Tools->Configure).
|
|
"""
|
|
prio = 100
|
|
textbody_names = \
|
|
{ mimetypes [0] : 'body'
|
|
, mimetypes [1] : 'text'
|
|
}
|
|
paragraph_props = \
|
|
{ mimetypes [0] : 'properties'
|
|
, mimetypes [1] : 'paragraph-properties'
|
|
}
|
|
font_decls = \
|
|
{ mimetypes [0] : 'font-decls'
|
|
, mimetypes [1] : 'font-face-decls'
|
|
}
|
|
|
|
def __init__ (self, prio = None, transformer = None) :
|
|
if prio is not None :
|
|
self.prio = prio
|
|
self.transformer = None
|
|
if transformer :
|
|
self.register (transformer)
|
|
# end def __init__
|
|
|
|
def apply (self, root) :
|
|
""" Apply myself to the element given as root """
|
|
raise NotImplementedError, 'derived transforms must implement "apply"'
|
|
# end def apply
|
|
|
|
def apply_all (self, trees) :
|
|
""" Apply myself to all the files given in trees. The variable
|
|
trees contains a dictionary of ElementTree indexed by the
|
|
name of the OOo File.
|
|
The standard case is that only one file (namely
|
|
self.filename) is used.
|
|
"""
|
|
assert (self.filename)
|
|
self.apply (trees [self.filename].getroot ())
|
|
# end def apply_all
|
|
|
|
def find_tbody (self, root) :
|
|
""" Find the node which really contains the text -- different
|
|
for different OOo versions.
|
|
"""
|
|
tbody = root
|
|
if tbody.tag != self.textbody_tag :
|
|
tbody = tbody.find ('.//' + self.textbody_tag)
|
|
return tbody
|
|
# end def find_tbody
|
|
|
|
def register (self, transformer) :
|
|
""" Registering with a transformer means being able to access
|
|
variables stored in the tranformer by other transforms.
|
|
|
|
Also needed for tag-computation: The transformer knows which
|
|
version of OOo document we are processing.
|
|
"""
|
|
self.transformer = transformer
|
|
mt = self.mimetype = transformer.mimetype
|
|
self.textbody_name = self.textbody_names [mt]
|
|
self.paragraph_props = self.paragraph_props [mt]
|
|
self.properties_tag = self.oootag ('style', self.paragraph_props)
|
|
self.textbody_tag = self.oootag ('office', self.textbody_name)
|
|
self.font_decls_tag = self.oootag ('office', self.font_decls [mt])
|
|
# end def register
|
|
|
|
def oootag (self, namespace, name) :
|
|
""" Compute long tag version """
|
|
return OOo_Tag (namespace, name, self.mimetype)
|
|
# end def oootag
|
|
|
|
def set (self, variable, value) :
|
|
""" Set variable in our transformer using naming convention. """
|
|
self.transformer [self._varname (variable)] = value
|
|
# end def set
|
|
|
|
def _varname (self, name) :
|
|
""" For fulfilling the naming convention of the transformer
|
|
dictionary (every entry in this dictionary should be prefixed
|
|
with the class name of the transform) we have this
|
|
convenience method.
|
|
Returns variable name prefixed with own class name.
|
|
"""
|
|
return ":".join ((self.__class__.__name__, name))
|
|
# end def _varname
|
|
|
|
# end class Transform
|
|
|
|
class Transformer (autosuper) :
|
|
"""
|
|
Class for applying a set of transforms to a given ooopy object.
|
|
The transforms are applied to the specified file in priority
|
|
order. When applying transforms we have a mechanism for
|
|
communication of transforms. We give the transformer to the
|
|
individual transforms as a parameter. The transforms may use the
|
|
transformer like a dictionary for storing values and retrieving
|
|
values left by previous transforms.
|
|
As a naming convention each transform should use its class name
|
|
as a prefix for storing values in the dictionary.
|
|
>>> import Transforms
|
|
>>> from Transforms import renumber_all, get_meta, set_meta, meta_counts
|
|
>>> try :
|
|
... from io import StringIO, BytesIO
|
|
... StringIO = BytesIO
|
|
... except ImportError :
|
|
... from StringIO import StringIO
|
|
>>> sio = BytesIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
|
|
'Standard'
|
|
>>> def cb (name) :
|
|
... r = { 'street' : 'Beispielstrasse 42'
|
|
... , 'firstname' : 'Hugo'
|
|
... , 'salutation' : 'Frau'
|
|
... }
|
|
... if r.has_key (name) : return r [name]
|
|
... return None
|
|
...
|
|
>>> p = get_meta (m)
|
|
>>> t = Transformer (m, p)
|
|
>>> t ['a'] = 'a'
|
|
>>> t ['a']
|
|
'a'
|
|
>>> t.transform (o)
|
|
>>> p.set ('a', 'b')
|
|
>>> t ['Attribute_Access:a']
|
|
'b'
|
|
>>> t = Transformer (
|
|
... m
|
|
... , Transforms.Autoupdate ()
|
|
... , Transforms.Editinfo ()
|
|
... , Transforms.Field_Replace (prio = 99, replace = cb)
|
|
... , Transforms.Field_Replace
|
|
... ( replace =
|
|
... { 'salutation' : ''
|
|
... , 'firstname' : 'Erika'
|
|
... , 'lastname' : 'Musterfrau'
|
|
... , 'country' : 'D'
|
|
... , 'postalcode' : '00815'
|
|
... , 'city' : 'Niemandsdorf'
|
|
... }
|
|
... )
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Addpagebreak ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> o.close ()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("testout.sxw", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> c = o.read ('content.xml')
|
|
>>> m = o.mimetype
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
|
|
>>> for node in body.findall (vset) :
|
|
... name = node.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', node.text
|
|
salutation : None
|
|
firstname : Erika
|
|
lastname : Musterfrau
|
|
street : Beispielstrasse 42
|
|
country : D
|
|
postalcode : 00815
|
|
city : Niemandsdorf
|
|
salutation : None
|
|
firstname : Erika
|
|
lastname : Musterfrau
|
|
street : Beispielstrasse 42
|
|
country : D
|
|
postalcode : 00815
|
|
city : Niemandsdorf
|
|
>>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m))
|
|
'P2'
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
|
|
>>> c = o.read ('content.xml')
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Mailmerge
|
|
... ( iterator =
|
|
... ( dict (firstname = 'Erika', lastname = 'Nobody')
|
|
... , dict (firstname = 'Eric', lastname = 'Wizard')
|
|
... , cb
|
|
... )
|
|
... )
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> for i in meta_counts :
|
|
... print i, t [':'.join (('Set_Attribute', i))]
|
|
character-count 951
|
|
image-count 0
|
|
object-count 0
|
|
page-count 3
|
|
paragraph-count 113
|
|
table-count 3
|
|
word-count 162
|
|
>>> name = t ['Addpagebreak_Style:stylename']
|
|
>>> name
|
|
'P2'
|
|
>>> o.close ()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("testout2.sxw", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', m))
|
|
>>> for n in body.findall ('.//*') :
|
|
... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
|
|
... if zidx :
|
|
... print ':'.join(split_tag (n.tag)), zidx
|
|
draw:text-box 0
|
|
draw:rect 1
|
|
draw:text-box 3
|
|
draw:rect 4
|
|
draw:text-box 6
|
|
draw:rect 7
|
|
draw:text-box 2
|
|
draw:text-box 5
|
|
draw:text-box 8
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
|
|
... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
|
|
... print n.tag
|
|
{http://openoffice.org/2000/text}p
|
|
{http://openoffice.org/2000/text}p
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
|
|
>>> for n in body.findall (vset) :
|
|
... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', n.text
|
|
firstname : Erika
|
|
lastname : Nobody
|
|
firstname : Eric
|
|
lastname : Wizard
|
|
firstname : Hugo
|
|
lastname : Testman
|
|
firstname : Erika
|
|
lastname : Nobody
|
|
firstname : Eric
|
|
lastname : Wizard
|
|
firstname : Hugo
|
|
lastname : Testman
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
|
|
... print n.get (OOo_Tag ('draw', 'name', m)),
|
|
... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
|
|
Frame1 1
|
|
Frame2 2
|
|
Frame3 3
|
|
Frame4 None
|
|
Frame5 None
|
|
Frame6 None
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
|
|
... print n.get (OOo_Tag ('text', 'name', m))
|
|
Section1
|
|
Section2
|
|
Section3
|
|
Section4
|
|
Section5
|
|
Section6
|
|
Section7
|
|
Section8
|
|
Section9
|
|
Section10
|
|
Section11
|
|
Section12
|
|
Section13
|
|
Section14
|
|
Section15
|
|
Section16
|
|
Section17
|
|
Section18
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
|
|
... print n.get (OOo_Tag ('table', 'name', m))
|
|
Table1
|
|
Table2
|
|
Table3
|
|
>>> r = o.read ('meta.xml')
|
|
>>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
|
|
>>> for i in meta_counts :
|
|
... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
|
|
character-count '951'
|
|
image-count '0'
|
|
object-count '0'
|
|
page-count '3'
|
|
paragraph-count '113'
|
|
table-count '3'
|
|
word-count '162'
|
|
>>> o.close ()
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio)
|
|
>>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw')
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Concatenate (*tf)
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> for i in meta_counts :
|
|
... print i, repr (t [':'.join (('Set_Attribute', i))])
|
|
character-count '1131'
|
|
image-count '0'
|
|
object-count '0'
|
|
page-count '3'
|
|
paragraph-count '168'
|
|
table-count '2'
|
|
word-count '160'
|
|
>>> o.close ()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("testout3.sxw", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> s = o.read ('styles.xml')
|
|
>>> for n in c.findall ('./*/*') :
|
|
... name = n.get (OOo_Tag ('style', 'name', m))
|
|
... if name :
|
|
... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
|
|
... print '"%s", "%s"' % (name, parent)
|
|
"Tahoma1", "None"
|
|
"Bitstream Vera Sans", "None"
|
|
"Tahoma", "None"
|
|
"Nimbus Roman No9 L", "None"
|
|
"Courier New", "None"
|
|
"Arial Black", "None"
|
|
"New Century Schoolbook", "None"
|
|
"Helvetica", "None"
|
|
"Table1", "None"
|
|
"Table1.A", "None"
|
|
"Table1.A1", "None"
|
|
"Table1.E1", "None"
|
|
"Table1.A2", "None"
|
|
"Table1.E2", "None"
|
|
"P1", "None"
|
|
"fr1", "Frame"
|
|
"fr2", "None"
|
|
"fr3", "Frame"
|
|
"Sect1", "None"
|
|
"gr1", "None"
|
|
"P2", "Standard"
|
|
"Standard_Concat", "None"
|
|
"Concat_P1", "Concat_Frame contents"
|
|
"Concat_P2", "Concat_Frame contents"
|
|
"P3", "Concat_Frame contents"
|
|
"P4", "Concat_Frame contents"
|
|
"P5", "Concat_Standard"
|
|
"P6", "Concat_Standard"
|
|
"P7", "Concat_Frame contents"
|
|
"P8", "Concat_Frame contents"
|
|
"P9", "Concat_Frame contents"
|
|
"P10", "Concat_Frame contents"
|
|
"P11", "Concat_Frame contents"
|
|
"P12", "Concat_Frame contents"
|
|
"P13", "Concat_Frame contents"
|
|
"P15", "Concat_Standard"
|
|
"P16", "Concat_Standard"
|
|
"P17", "Concat_Standard"
|
|
"P18", "Concat_Standard"
|
|
"P19", "Concat_Standard"
|
|
"P20", "Concat_Standard"
|
|
"P21", "Concat_Standard"
|
|
"P22", "Concat_Standard"
|
|
"P23", "Concat_Standard"
|
|
"T1", "None"
|
|
"Concat_fr1", "Concat_Frame"
|
|
"Concat_fr2", "Concat_Frame"
|
|
"Concat_fr3", "Concat_Frame"
|
|
"fr4", "Concat_Frame"
|
|
"fr5", "Concat_Frame"
|
|
"fr6", "Concat_Frame"
|
|
"Concat_Sect1", "None"
|
|
"N0", "None"
|
|
"N2", "None"
|
|
"P15_Concat", "Concat_Standard"
|
|
>>> for n in s.findall ('./*/*') :
|
|
... name = n.get (OOo_Tag ('style', 'name', m))
|
|
... if name :
|
|
... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
|
|
... print '"%s", "%s"' % (name, parent)
|
|
"Tahoma1", "None"
|
|
"Bitstream Vera Sans", "None"
|
|
"Tahoma", "None"
|
|
"Nimbus Roman No9 L", "None"
|
|
"Courier New", "None"
|
|
"Arial Black", "None"
|
|
"New Century Schoolbook", "None"
|
|
"Helvetica", "None"
|
|
"Standard", "None"
|
|
"Text body", "Standard"
|
|
"List", "Text body"
|
|
"Table Contents", "Text body"
|
|
"Table Heading", "Table Contents"
|
|
"Caption", "Standard"
|
|
"Frame contents", "Text body"
|
|
"Index", "Standard"
|
|
"Frame", "None"
|
|
"OLE", "None"
|
|
"Concat_Standard", "None"
|
|
"Concat_Text body", "Concat_Standard"
|
|
"Concat_List", "Concat_Text body"
|
|
"Concat_Caption", "Concat_Standard"
|
|
"Concat_Frame contents", "Concat_Text body"
|
|
"Concat_Index", "Concat_Standard"
|
|
"Horizontal Line", "Concat_Standard"
|
|
"Internet link", "None"
|
|
"Visited Internet Link", "None"
|
|
"Concat_Frame", "None"
|
|
"Concat_OLE", "None"
|
|
"pm1", "None"
|
|
"Concat_pm1", "None"
|
|
"Standard", "None"
|
|
"Concat_Standard", "None"
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name
|
|
salutation
|
|
firstname
|
|
lastname
|
|
street
|
|
country
|
|
postalcode
|
|
city
|
|
date
|
|
invoice.invoice_no
|
|
invoice.abo.aboprice.abotype.description
|
|
address.salutation
|
|
address.title
|
|
address.firstname
|
|
address.lastname
|
|
address.function
|
|
address.street
|
|
address.country
|
|
address.postalcode
|
|
address.city
|
|
invoice.subscriber.salutation
|
|
invoice.subscriber.title
|
|
invoice.subscriber.firstname
|
|
invoice.subscriber.lastname
|
|
invoice.subscriber.function
|
|
invoice.subscriber.street
|
|
invoice.subscriber.country
|
|
invoice.subscriber.postalcode
|
|
invoice.subscriber.city
|
|
invoice.period_start
|
|
invoice.period_end
|
|
invoice.currency.name
|
|
invoice.amount
|
|
invoice.subscriber.initial
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name
|
|
Illustration
|
|
Table
|
|
Text
|
|
Drawing
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'style-name', m))
|
|
... if not name or name.startswith ('Concat') :
|
|
... print ">%s<" % name
|
|
>Concat_P1<
|
|
>Concat_P2<
|
|
>Concat_Frame contents<
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) :
|
|
... attrs = 'name', 'style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
|
|
... print attrs
|
|
['Frame1', 'fr1', '0', '1']
|
|
['Frame2', 'fr1', '3', '2']
|
|
['Frame3', 'Concat_fr1', '6', '3']
|
|
['Frame4', 'Concat_fr2', '7', '3']
|
|
['Frame5', 'Concat_fr3', '8', '3']
|
|
['Frame6', 'Concat_fr1', '9', '3']
|
|
['Frame7', 'fr4', '10', '3']
|
|
['Frame8', 'fr4', '11', '3']
|
|
['Frame9', 'fr4', '12', '3']
|
|
['Frame10', 'fr4', '13', '3']
|
|
['Frame11', 'fr4', '14', '3']
|
|
['Frame12', 'fr4', '15', '3']
|
|
['Frame13', 'fr5', '16', '3']
|
|
['Frame14', 'fr4', '18', '3']
|
|
['Frame15', 'fr4', '19', '3']
|
|
['Frame16', 'fr4', '20', '3']
|
|
['Frame17', 'fr6', '17', '3']
|
|
['Frame18', 'fr4', '23', '3']
|
|
['Frame19', 'fr3', '2', None]
|
|
['Frame20', 'fr3', '5', None]
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
|
|
... attrs = 'name', 'style-name'
|
|
... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
|
|
... print attrs
|
|
['Section1', 'Sect1']
|
|
['Section2', 'Sect1']
|
|
['Section3', 'Sect1']
|
|
['Section4', 'Sect1']
|
|
['Section5', 'Sect1']
|
|
['Section6', 'Sect1']
|
|
['Section7', 'Concat_Sect1']
|
|
['Section8', 'Concat_Sect1']
|
|
['Section9', 'Concat_Sect1']
|
|
['Section10', 'Concat_Sect1']
|
|
['Section11', 'Concat_Sect1']
|
|
['Section12', 'Concat_Sect1']
|
|
['Section13', 'Concat_Sect1']
|
|
['Section14', 'Concat_Sect1']
|
|
['Section15', 'Concat_Sect1']
|
|
['Section16', 'Concat_Sect1']
|
|
['Section17', 'Concat_Sect1']
|
|
['Section18', 'Concat_Sect1']
|
|
['Section19', 'Concat_Sect1']
|
|
['Section20', 'Concat_Sect1']
|
|
['Section21', 'Concat_Sect1']
|
|
['Section22', 'Concat_Sect1']
|
|
['Section23', 'Concat_Sect1']
|
|
['Section24', 'Concat_Sect1']
|
|
['Section25', 'Concat_Sect1']
|
|
['Section26', 'Concat_Sect1']
|
|
['Section27', 'Concat_Sect1']
|
|
['Section28', 'Sect1']
|
|
['Section29', 'Sect1']
|
|
['Section30', 'Sect1']
|
|
['Section31', 'Sect1']
|
|
['Section32', 'Sect1']
|
|
['Section33', 'Sect1']
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
|
|
... attrs = 'style-name', 'text-style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
|
|
... print attrs
|
|
['gr1', 'P1', '1', '1']
|
|
['gr1', 'P1', '4', '2']
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
|
|
... attrs = 'style-name', 'text-style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... print attrs
|
|
['gr1', 'P1', '24']
|
|
['gr1', 'P1', '22']
|
|
['gr1', 'P1', '21']
|
|
>>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
|
|
... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
|
|
... attrs = 'name', 'class', 'family'
|
|
... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
|
|
... print attrs
|
|
... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
|
|
... if props is not None and len (props) :
|
|
... props [0].tag
|
|
['Concat_Standard', 'text', 'paragraph']
|
|
'{http://openoffice.org/2000/style}tab-stops'
|
|
['Concat_Text body', 'text', 'paragraph']
|
|
['Concat_List', 'list', 'paragraph']
|
|
['Concat_Caption', 'extra', 'paragraph']
|
|
['Concat_Frame contents', 'extra', 'paragraph']
|
|
['Concat_Index', 'index', 'paragraph']
|
|
['Concat_Frame', None, 'graphics']
|
|
['Concat_OLE', None, 'graphics']
|
|
>>> for n in c.findall ('.//*') :
|
|
... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
|
|
... if zidx :
|
|
... print ':'.join(split_tag (n.tag)), zidx
|
|
draw:text-box 0
|
|
draw:rect 1
|
|
draw:text-box 3
|
|
draw:rect 4
|
|
draw:text-box 6
|
|
draw:text-box 7
|
|
draw:text-box 8
|
|
draw:text-box 9
|
|
draw:text-box 10
|
|
draw:text-box 11
|
|
draw:text-box 12
|
|
draw:text-box 13
|
|
draw:text-box 14
|
|
draw:text-box 15
|
|
draw:text-box 16
|
|
draw:text-box 18
|
|
draw:text-box 19
|
|
draw:text-box 20
|
|
draw:text-box 17
|
|
draw:text-box 23
|
|
draw:line 24
|
|
draw:text-box 2
|
|
draw:text-box 5
|
|
draw:line 22
|
|
draw:line 21
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio)
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Mailmerge
|
|
... ( iterator =
|
|
... ( dict
|
|
... ( Spett = "Spettabile"
|
|
... , contraente = "First person"
|
|
... , indirizzo = "street? 1"
|
|
... , tipo = "racc. A.C."
|
|
... , luogo = "Varese"
|
|
... , oggetto = "Saluti"
|
|
... )
|
|
... , dict
|
|
... ( Spett = "Egregio"
|
|
... , contraente = "Second Person"
|
|
... , indirizzo = "street? 2"
|
|
... , tipo = "Raccomandata"
|
|
... , luogo = "Gavirate"
|
|
... , oggetto = "Ossequi"
|
|
... )
|
|
... )
|
|
... )
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform(o)
|
|
>>> o.close()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("carta-out.stw", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
|
|
>>> for node in body.findall (vset) :
|
|
... name = node.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', node.text
|
|
Spett : Spettabile
|
|
contraente : First person
|
|
indirizzo : street? 1
|
|
Spett : Egregio
|
|
contraente : Second Person
|
|
indirizzo : street? 2
|
|
tipo : racc. A.C.
|
|
luogo : Varese
|
|
oggetto : Saluti
|
|
tipo : Raccomandata
|
|
luogo : Gavirate
|
|
oggetto : Ossequi
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Mailmerge
|
|
... ( iterator =
|
|
... ( dict (firstname = 'Erika', lastname = 'Nobody')
|
|
... , dict (firstname = 'Eric', lastname = 'Wizard')
|
|
... , cb
|
|
... )
|
|
... )
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> for i in meta_counts :
|
|
... print i, t [':'.join (('Set_Attribute', i))]
|
|
character-count 951
|
|
image-count 0
|
|
object-count 0
|
|
page-count 3
|
|
paragraph-count 53
|
|
table-count 3
|
|
word-count 162
|
|
>>> name = t ['Addpagebreak_Style:stylename']
|
|
>>> name
|
|
'P2'
|
|
>>> o.close ()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("testout.odt", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', m))
|
|
>>> for n in body.findall ('.//*') :
|
|
... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
|
|
... if zidx :
|
|
... print ':'.join(split_tag (n.tag)), zidx
|
|
draw:frame 0
|
|
draw:rect 1
|
|
draw:frame 3
|
|
draw:rect 4
|
|
draw:frame 6
|
|
draw:rect 7
|
|
draw:frame 2
|
|
draw:frame 5
|
|
draw:frame 8
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) :
|
|
... if n.get (OOo_Tag ('text', 'style-name', m)) == name :
|
|
... print n.tag
|
|
{urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
|
|
{urn:oasis:names:tc:opendocument:xmlns:text:1.0}p
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', m)
|
|
>>> for n in body.findall (vset) :
|
|
... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', n.text
|
|
firstname : Erika
|
|
lastname : Nobody
|
|
firstname : Eric
|
|
lastname : Wizard
|
|
firstname : Hugo
|
|
lastname : Testman
|
|
firstname : Erika
|
|
lastname : Nobody
|
|
firstname : Eric
|
|
lastname : Wizard
|
|
firstname : Hugo
|
|
lastname : Testman
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
|
|
... print n.get (OOo_Tag ('draw', 'name', m)),
|
|
... print n.get (OOo_Tag ('text', 'anchor-page-number', m))
|
|
Frame1 1
|
|
Frame2 2
|
|
Frame3 3
|
|
Frame4 None
|
|
Frame5 None
|
|
Frame6 None
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) :
|
|
... print n.get (OOo_Tag ('text', 'name', m))
|
|
Section1
|
|
Section2
|
|
Section3
|
|
Section4
|
|
Section5
|
|
Section6
|
|
Section7
|
|
Section8
|
|
Section9
|
|
Section10
|
|
Section11
|
|
Section12
|
|
Section13
|
|
Section14
|
|
Section15
|
|
Section16
|
|
Section17
|
|
Section18
|
|
>>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) :
|
|
... print n.get (OOo_Tag ('table', 'name', m))
|
|
Table1
|
|
Table2
|
|
Table3
|
|
>>> r = o.read ('meta.xml')
|
|
>>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m))
|
|
>>> for i in meta_counts :
|
|
... print i, repr (meta.get (OOo_Tag ('meta', i, m)))
|
|
character-count '951'
|
|
image-count '0'
|
|
object-count '0'
|
|
page-count '3'
|
|
paragraph-count '53'
|
|
table-count '3'
|
|
word-count '162'
|
|
>>> o.close ()
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio)
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Mailmerge
|
|
... ( iterator =
|
|
... ( dict
|
|
... ( Spett = "Spettabile"
|
|
... , contraente = "First person"
|
|
... , indirizzo = "street? 1"
|
|
... , tipo = "racc. A.C."
|
|
... , luogo = "Varese"
|
|
... , oggetto = "Saluti"
|
|
... )
|
|
... , dict
|
|
... ( Spett = "Egregio"
|
|
... , contraente = "Second Person"
|
|
... , indirizzo = "street? 2"
|
|
... , tipo = "Raccomandata"
|
|
... , luogo = "Gavirate"
|
|
... , oggetto = "Ossequi"
|
|
... )
|
|
... )
|
|
... )
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform(o)
|
|
>>> o.close()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("carta-out.odt", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
|
|
>>> for node in body.findall (vset) :
|
|
... name = node.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', node.text
|
|
Spett : Spettabile
|
|
contraente : First person
|
|
indirizzo : street? 1
|
|
Spett : Egregio
|
|
contraente : Second Person
|
|
indirizzo : street? 2
|
|
tipo : racc. A.C.
|
|
luogo : Varese
|
|
oggetto : Saluti
|
|
tipo : Raccomandata
|
|
luogo : Gavirate
|
|
oggetto : Ossequi
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio)
|
|
>>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt')
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Concatenate (*tf)
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> for i in meta_counts :
|
|
... print i, repr (t [':'.join (('Set_Attribute', i))])
|
|
character-count '1131'
|
|
image-count '0'
|
|
object-count '0'
|
|
page-count '3'
|
|
paragraph-count '80'
|
|
table-count '2'
|
|
word-count '159'
|
|
>>> o.close ()
|
|
>>> ov = sio.getvalue ()
|
|
>>> f = open ("testout3.odt", "wb")
|
|
>>> f.write (ov)
|
|
>>> f.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> s = o.read ('styles.xml')
|
|
>>> for n in c.findall ('./*/*') :
|
|
... name = n.get (OOo_Tag ('style', 'name', m))
|
|
... if name :
|
|
... parent = n.get (OOo_Tag ('style', 'parent-style-name', m))
|
|
... print '"%s", "%s"' % (name, parent)
|
|
"Tahoma1", "None"
|
|
"Bitstream Vera Sans", "None"
|
|
"Tahoma", "None"
|
|
"Nimbus Roman No9 L", "None"
|
|
"Courier New", "None"
|
|
"Arial Black", "None"
|
|
"New Century Schoolbook", "None"
|
|
"Times New Roman", "None"
|
|
"Arial", "None"
|
|
"Helvetica", "None"
|
|
"Table1", "None"
|
|
"Table1.A", "None"
|
|
"Table1.A1", "None"
|
|
"Table1.E1", "None"
|
|
"Table1.A2", "None"
|
|
"Table1.E2", "None"
|
|
"P1", "None"
|
|
"fr1", "Frame"
|
|
"fr2", "Frame"
|
|
"Sect1", "None"
|
|
"gr1", "None"
|
|
"P2", "Standard"
|
|
"Standard_Concat", "None"
|
|
"Concat_P1", "Concat_Frame_20_contents"
|
|
"Concat_P2", "Concat_Frame_20_contents"
|
|
"P3", "Concat_Frame_20_contents"
|
|
"P4", "Concat_Standard"
|
|
"P5", "Concat_Standard"
|
|
"P6", "Concat_Frame_20_contents"
|
|
"P7", "Concat_Frame_20_contents"
|
|
"P8", "Concat_Frame_20_contents"
|
|
"P9", "Concat_Frame_20_contents"
|
|
"P10", "Concat_Frame_20_contents"
|
|
"P11", "Concat_Frame_20_contents"
|
|
"P12", "Concat_Frame_20_contents"
|
|
"P14", "Concat_Standard"
|
|
"P15", "Concat_Standard"
|
|
"P16", "Concat_Standard"
|
|
"P17", "Concat_Standard"
|
|
"P18", "Concat_Standard"
|
|
"P19", "Concat_Standard"
|
|
"P20", "Concat_Standard"
|
|
"P21", "Concat_Standard"
|
|
"P22", "Concat_Standard"
|
|
"P23", "Concat_Standard"
|
|
"Concat_fr1", "Frame"
|
|
"Concat_fr2", "Frame"
|
|
"fr3", "Frame"
|
|
"fr4", "Frame"
|
|
"fr5", "Frame"
|
|
"fr6", "Frame"
|
|
"Concat_gr1", "None"
|
|
"N0", "None"
|
|
"N2", "None"
|
|
"P14_Concat", "Concat_Standard"
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name
|
|
salutation
|
|
firstname
|
|
lastname
|
|
street
|
|
country
|
|
postalcode
|
|
city
|
|
date
|
|
invoice.invoice_no
|
|
invoice.abo.aboprice.abotype.description
|
|
address.salutation
|
|
address.title
|
|
address.firstname
|
|
address.lastname
|
|
address.function
|
|
address.street
|
|
address.country
|
|
address.postalcode
|
|
address.city
|
|
invoice.subscriber.salutation
|
|
invoice.subscriber.title
|
|
invoice.subscriber.firstname
|
|
invoice.subscriber.lastname
|
|
invoice.subscriber.function
|
|
invoice.subscriber.street
|
|
invoice.subscriber.country
|
|
invoice.subscriber.postalcode
|
|
invoice.subscriber.city
|
|
invoice.period_start
|
|
invoice.period_end
|
|
invoice.currency.name
|
|
invoice.amount
|
|
invoice.subscriber.initial
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'name', m))
|
|
... print name
|
|
Illustration
|
|
Table
|
|
Text
|
|
Drawing
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) :
|
|
... name = n.get (OOo_Tag ('text', 'style-name', m))
|
|
... if not name or name.startswith ('Concat') :
|
|
... print ':'.join(split_tag (n.tag)), ">%s<" % name
|
|
text:p >None<
|
|
text:p >None<
|
|
text:p >Concat_P1<
|
|
text:p >Concat_P1<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_P2<
|
|
text:p >Concat_Frame_20_contents<
|
|
text:p >None<
|
|
text:p >None<
|
|
text:p >None<
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) :
|
|
... attrs = 'name', 'style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
|
|
... print attrs
|
|
['Frame1', 'fr1', '0', '1']
|
|
['Frame2', 'fr1', '3', '2']
|
|
['Frame3', 'Concat_fr1', '6', '3']
|
|
['Frame4', 'Concat_fr2', '7', '3']
|
|
['Frame5', 'fr3', '8', '3']
|
|
['Frame6', 'Concat_fr1', '9', '3']
|
|
['Frame7', 'fr4', '10', '3']
|
|
['Frame8', 'fr4', '11', '3']
|
|
['Frame9', 'fr4', '12', '3']
|
|
['Frame10', 'fr4', '13', '3']
|
|
['Frame11', 'fr4', '14', '3']
|
|
['Frame12', 'fr4', '15', '3']
|
|
['Frame13', 'fr5', '16', '3']
|
|
['Frame14', 'fr4', '18', '3']
|
|
['Frame15', 'fr4', '19', '3']
|
|
['Frame16', 'fr4', '20', '3']
|
|
['Frame17', 'fr6', '17', '3']
|
|
['Frame18', 'fr4', '23', '3']
|
|
['Frame19', 'fr2', '2', None]
|
|
['Frame20', 'fr2', '5', None]
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) :
|
|
... attrs = 'name', 'style-name'
|
|
... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs]
|
|
... print attrs
|
|
['Section1', 'Sect1']
|
|
['Section2', 'Sect1']
|
|
['Section3', 'Sect1']
|
|
['Section4', 'Sect1']
|
|
['Section5', 'Sect1']
|
|
['Section6', 'Sect1']
|
|
['Section7', 'Sect1']
|
|
['Section8', 'Sect1']
|
|
['Section9', 'Sect1']
|
|
['Section10', 'Sect1']
|
|
['Section11', 'Sect1']
|
|
['Section12', 'Sect1']
|
|
['Section13', 'Sect1']
|
|
['Section14', 'Sect1']
|
|
['Section15', 'Sect1']
|
|
['Section16', 'Sect1']
|
|
['Section17', 'Sect1']
|
|
['Section18', 'Sect1']
|
|
['Section19', 'Sect1']
|
|
['Section20', 'Sect1']
|
|
['Section21', 'Sect1']
|
|
['Section22', 'Sect1']
|
|
['Section23', 'Sect1']
|
|
['Section24', 'Sect1']
|
|
['Section25', 'Sect1']
|
|
['Section26', 'Sect1']
|
|
['Section27', 'Sect1']
|
|
['Section28', 'Sect1']
|
|
['Section29', 'Sect1']
|
|
['Section30', 'Sect1']
|
|
['Section31', 'Sect1']
|
|
['Section32', 'Sect1']
|
|
['Section33', 'Sect1']
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) :
|
|
... attrs = 'style-name', 'text-style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m)))
|
|
... print attrs
|
|
['gr1', 'P1', '1', '1']
|
|
['gr1', 'P1', '4', '2']
|
|
>>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) :
|
|
... attrs = 'style-name', 'text-style-name', 'z-index'
|
|
... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs]
|
|
... print attrs
|
|
['Concat_gr1', 'P1', '24']
|
|
['Concat_gr1', 'P1', '22']
|
|
['Concat_gr1', 'P1', '21']
|
|
>>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) :
|
|
... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') :
|
|
... attrs = 'name', 'display-name', 'class', 'family'
|
|
... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs]
|
|
... print attrs
|
|
... props = n.find ('./' + OOo_Tag ('style', 'properties', m))
|
|
... if props is not None and len (props) :
|
|
... props [0].tag
|
|
['Concat_Standard', None, 'text', 'paragraph']
|
|
['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph']
|
|
['Concat_List', None, 'list', 'paragraph']
|
|
['Concat_Caption', None, 'extra', 'paragraph']
|
|
['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph']
|
|
['Concat_Index', None, 'index', 'paragraph']
|
|
>>> for n in c.findall ('.//*') :
|
|
... zidx = n.get (OOo_Tag ('draw', 'z-index', m))
|
|
... if zidx :
|
|
... print ':'.join(split_tag (n.tag)), zidx
|
|
draw:frame 0
|
|
draw:rect 1
|
|
draw:frame 3
|
|
draw:rect 4
|
|
draw:frame 6
|
|
draw:frame 7
|
|
draw:frame 8
|
|
draw:frame 9
|
|
draw:frame 10
|
|
draw:frame 11
|
|
draw:frame 12
|
|
draw:frame 13
|
|
draw:frame 14
|
|
draw:frame 15
|
|
draw:frame 16
|
|
draw:frame 18
|
|
draw:frame 19
|
|
draw:frame 20
|
|
draw:frame 17
|
|
draw:frame 23
|
|
draw:line 24
|
|
draw:frame 2
|
|
draw:frame 5
|
|
draw:line 22
|
|
draw:line 21
|
|
>>> from os import system
|
|
>>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt '
|
|
... '-o testout.odt '
|
|
... 'salutation=Frau firstname=Erika lastname=Musterfrau '
|
|
... 'country=D postalcode=00815 city=Niemandsdorf '
|
|
... 'street="Beispielstrasse 42"')
|
|
0
|
|
>>> o = OOoPy (infile = 'testout.odt')
|
|
>>> c = o.read ('content.xml')
|
|
>>> m = o.mimetype
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
|
|
>>> for node in body.findall (vset) :
|
|
... name = node.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', node.text
|
|
salutation : Frau
|
|
firstname : Erika
|
|
lastname : Musterfrau
|
|
street : Beispielstrasse 42
|
|
country : D
|
|
postalcode : 00815
|
|
city : Niemandsdorf
|
|
salutation : Frau
|
|
firstname : Erika
|
|
lastname : Musterfrau
|
|
street : Beispielstrasse 42
|
|
country : D
|
|
postalcode : 00815
|
|
city : Niemandsdorf
|
|
>>> o.close ()
|
|
>>> system ("bin/ooo_mailmerge -o testout.odt -d'|' "
|
|
... "testfiles/carta.odt testfiles/x.csv")
|
|
0
|
|
>>> o = OOoPy (infile = 'testout.odt')
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m)
|
|
>>> for node in body.findall (vset) :
|
|
... name = node.get (OOo_Tag ('text', 'name', m))
|
|
... print name, ':', node.text
|
|
Spett : Spettabile
|
|
contraente : First person
|
|
indirizzo : street? 1
|
|
Spett : Egregio
|
|
contraente : Second Person
|
|
indirizzo : street? 2
|
|
tipo : racc. A.C.
|
|
luogo : Varese
|
|
oggetto : Saluti
|
|
tipo : Raccomandata
|
|
luogo : Gavirate
|
|
oggetto : Ossequi
|
|
>>> o.close ()
|
|
>>> infile = 'testfiles/testenum.odt'
|
|
>>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt')
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Addpagebreak_Style ()
|
|
... , Transforms.Mailmerge
|
|
... ( iterator =
|
|
... ( dict (firstname = 'Erika', lastname = 'Nobody')
|
|
... , dict (firstname = 'Eric', lastname = 'Wizard')
|
|
... , cb
|
|
... )
|
|
... )
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> o.close ()
|
|
>>> o = OOoPy (infile = 'xyzzy.odt')
|
|
>>> m = o.mimetype
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> textlist = './/' + OOo_Tag ('text', 'list', m)
|
|
>>> for node in body.findall (textlist) :
|
|
... id = node.get (OOo_Tag ('xml', 'id', m))
|
|
... print 'xml:id', ':', id
|
|
xml:id : list1
|
|
xml:id : list2
|
|
xml:id : list3
|
|
>>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt')
|
|
>>> m = o.mimetype
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Concatenate ('testfiles/page2.odt')
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... , Transforms.Manifest_Append ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> o.close ()
|
|
>>> o = OOoPy (infile = 'xyzzy.odt')
|
|
>>> c = o.read ('META-INF/manifest.xml')
|
|
>>> for node in c.getroot () :
|
|
... fe = node.get (OOo_Tag ('manifest', 'full-path', m))
|
|
... print fe
|
|
/
|
|
Pictures/10000000000000C80000007941B1A419.jpg
|
|
Pictures/10000000000000DC000000B02E191635.jpg
|
|
Pictures/10000000000000DC000000A337377AAA.jpg
|
|
meta.xml
|
|
settings.xml
|
|
content.xml
|
|
Thumbnails/thumbnail.png
|
|
layout-cache
|
|
manifest.rdf
|
|
Configurations2/accelerator/current.xml
|
|
Configurations2/
|
|
styles.xml
|
|
>>> for f in o.izip.infolist () :
|
|
... print f.filename
|
|
mimetype
|
|
settings.xml
|
|
META-INF/manifest.xml
|
|
content.xml
|
|
meta.xml
|
|
styles.xml
|
|
Pictures/10000000000000C80000007941B1A419.jpg
|
|
Pictures/10000000000000DC000000B02E191635.jpg
|
|
Pictures/10000000000000DC000000A337377AAA.jpg
|
|
Thumbnails/thumbnail.png
|
|
layout-cache
|
|
manifest.rdf
|
|
Configurations2/images/Bitmaps/
|
|
Configurations2/accelerator/current.xml
|
|
>>> sio = StringIO ()
|
|
>>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio)
|
|
>>> m = o.mimetype
|
|
>>> t = Transformer (
|
|
... o.mimetype
|
|
... , get_meta (o.mimetype)
|
|
... , Transforms.Concatenate ('testfiles/tbl_second.odt')
|
|
... , renumber_all (o.mimetype)
|
|
... , set_meta (o.mimetype)
|
|
... , Transforms.Fix_OOo_Tag ()
|
|
... , Transforms.Manifest_Append ()
|
|
... )
|
|
>>> t.transform (o)
|
|
>>> o.close ()
|
|
>>> o = OOoPy (infile = sio)
|
|
>>> c = o.read ('content.xml')
|
|
>>> body = c.find (OOo_Tag ('office', 'body', mimetype = m))
|
|
>>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m)
|
|
>>> for table in body.findall (tbls) :
|
|
... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m))
|
|
... if name :
|
|
... print name
|
|
... for t in table.findall ('.//') :
|
|
... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m))
|
|
... if name :
|
|
... print name
|
|
Tabella1
|
|
Tabella1.A
|
|
Tabella1.A1
|
|
Tabella1.B1
|
|
Tabella1.A2
|
|
Tabella1.B2
|
|
Tabella1
|
|
Tabella1.A
|
|
Tabella1.A1
|
|
Tabella1.B1
|
|
Tabella1.A2
|
|
Tabella1.B2
|
|
"""
|
|
|
|
def __init__ (self, mimetype, *tf) :
|
|
assert (mimetype in mimetypes)
|
|
self.mimetype = mimetype
|
|
self.transforms = {}
|
|
for t in tf :
|
|
self.insert (t)
|
|
self.dictionary = {}
|
|
self.has_key = self.dictionary.has_key
|
|
self.__contains__ = self.has_key
|
|
# 2-tuples of filename, content
|
|
self.appendfiles = []
|
|
# end def __init__
|
|
|
|
def insert (self, transform) :
|
|
"""Insert a new transform"""
|
|
t = transform
|
|
if t.prio not in self.transforms :
|
|
self.transforms [t.prio] = []
|
|
self.transforms [t.prio].append (t)
|
|
t.register (self)
|
|
# end def append
|
|
|
|
def transform (self, ooopy) :
|
|
"""
|
|
Apply all the transforms in priority order.
|
|
Priority order is global over all transforms.
|
|
"""
|
|
self.trees = {}
|
|
for f in files :
|
|
self.trees [f] = ooopy.read (f)
|
|
#self.dictionary = {} # clear dict when transforming another ooopy
|
|
prios = self.transforms.keys ()
|
|
prios.sort ()
|
|
for p in prios :
|
|
for t in self.transforms [p] :
|
|
t.apply_all (self.trees)
|
|
for e in self.trees.itervalues () :
|
|
e.write ()
|
|
for fname, fcontent in self.appendfiles :
|
|
e.ooopy.append_file (fname, fcontent)
|
|
# end def transform
|
|
|
|
def __getitem__ (self, key) :
|
|
return self.dictionary [key]
|
|
# end def __getitem__
|
|
|
|
def __setitem__ (self, key, value) :
|
|
self.dictionary [key] = value
|
|
# end def __setitem__
|
|
# end class Transformer
|