#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com # All rights reserved # **************************************************************************** # # This library is free software; you can redistribute it and/or modify # it under the terms of the GNU Library General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU Library General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** from __future__ import absolute_import import time import re try : from xml.etree.ElementTree import dump, SubElement, Element, tostring from xml.etree.ElementTree import _namespace_map except ImportError : from elementtree.ElementTree import dump, SubElement, Element, tostring from elementtree.ElementTree import _namespace_map from copy import deepcopy from ooopy.OOoPy import OOoPy, autosuper from ooopy.OOoPy import files, mimetypes, namespace_by_name from ooopy.Version import VERSION def OOo_Tag (namespace, name, mimetype) : """Return combined XML tag >>> OOo_Tag ('xml', 'id', mimetypes [1]) '{http://www.w3.org/XML/1998/namespace}id' >>> OOo_Tag ('text', 'list', mimetypes [1]) '{urn:oasis:names:tc:opendocument:xmlns:text:1.0}list' """ return "{%s}%s" % (namespace_by_name [mimetype][namespace], name) # end def OOo_Tag def split_tag (tag) : """ Split tag into symbolic namespace and name part -- inverse operation of OOo_Tag. """ ns, t = tag.split ('}') return (_namespace_map [ns [1:]], t) # end def split_tag class Transform (autosuper) : """ Base class for individual transforms on OOo files. An individual transform needs a filename variable for specifying the OOo file the transform should be applied to and an optional prio. Individual transforms are applied according to their prio setting, higher prio means later application of a transform. The filename variable must specify one of the XML files which are part of the OOo document (see files variable above). As the names imply, content.xml contains the contents of the document (text and ad-hoc style definitions), styles.xml contains the style definitions, meta.xml contains meta information like author, editing time, etc. and settings.xml is used to store OOo's settings (menu Tools->Configure). """ prio = 100 textbody_names = \ { mimetypes [0] : 'body' , mimetypes [1] : 'text' } paragraph_props = \ { mimetypes [0] : 'properties' , mimetypes [1] : 'paragraph-properties' } font_decls = \ { mimetypes [0] : 'font-decls' , mimetypes [1] : 'font-face-decls' } def __init__ (self, prio = None, transformer = None) : if prio is not None : self.prio = prio self.transformer = None if transformer : self.register (transformer) # end def __init__ def apply (self, root) : """ Apply myself to the element given as root """ raise NotImplementedError, 'derived transforms must implement "apply"' # end def apply def apply_all (self, trees) : """ Apply myself to all the files given in trees. The variable trees contains a dictionary of ElementTree indexed by the name of the OOo File. The standard case is that only one file (namely self.filename) is used. """ assert (self.filename) self.apply (trees [self.filename].getroot ()) # end def apply_all def find_tbody (self, root) : """ Find the node which really contains the text -- different for different OOo versions. """ tbody = root if tbody.tag != self.textbody_tag : tbody = tbody.find ('.//' + self.textbody_tag) return tbody # end def find_tbody def register (self, transformer) : """ Registering with a transformer means being able to access variables stored in the tranformer by other transforms. Also needed for tag-computation: The transformer knows which version of OOo document we are processing. """ self.transformer = transformer mt = self.mimetype = transformer.mimetype self.textbody_name = self.textbody_names [mt] self.paragraph_props = self.paragraph_props [mt] self.properties_tag = self.oootag ('style', self.paragraph_props) self.textbody_tag = self.oootag ('office', self.textbody_name) self.font_decls_tag = self.oootag ('office', self.font_decls [mt]) # end def register def oootag (self, namespace, name) : """ Compute long tag version """ return OOo_Tag (namespace, name, self.mimetype) # end def oootag def set (self, variable, value) : """ Set variable in our transformer using naming convention. """ self.transformer [self._varname (variable)] = value # end def set def _varname (self, name) : """ For fulfilling the naming convention of the transformer dictionary (every entry in this dictionary should be prefixed with the class name of the transform) we have this convenience method. Returns variable name prefixed with own class name. """ return ":".join ((self.__class__.__name__, name)) # end def _varname # end class Transform class Transformer (autosuper) : """ Class for applying a set of transforms to a given ooopy object. The transforms are applied to the specified file in priority order. When applying transforms we have a mechanism for communication of transforms. We give the transformer to the individual transforms as a parameter. The transforms may use the transformer like a dictionary for storing values and retrieving values left by previous transforms. As a naming convention each transform should use its class name as a prefix for storing values in the dictionary. >>> import Transforms >>> from Transforms import renumber_all, get_meta, set_meta, meta_counts >>> try : ... from io import StringIO, BytesIO ... StringIO = BytesIO ... except ImportError : ... from StringIO import StringIO >>> sio = BytesIO () >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) 'Standard' >>> def cb (name) : ... r = { 'street' : 'Beispielstrasse 42' ... , 'firstname' : 'Hugo' ... , 'salutation' : 'Frau' ... } ... if r.has_key (name) : return r [name] ... return None ... >>> p = get_meta (m) >>> t = Transformer (m, p) >>> t ['a'] = 'a' >>> t ['a'] 'a' >>> t.transform (o) >>> p.set ('a', 'b') >>> t ['Attribute_Access:a'] 'b' >>> t = Transformer ( ... m ... , Transforms.Autoupdate () ... , Transforms.Editinfo () ... , Transforms.Field_Replace (prio = 99, replace = cb) ... , Transforms.Field_Replace ... ( replace = ... { 'salutation' : '' ... , 'firstname' : 'Erika' ... , 'lastname' : 'Musterfrau' ... , 'country' : 'D' ... , 'postalcode' : '00815' ... , 'city' : 'Niemandsdorf' ... } ... ) ... , Transforms.Addpagebreak_Style () ... , Transforms.Addpagebreak () ... ) >>> t.transform (o) >>> o.close () >>> ov = sio.getvalue () >>> f = open ("testout.sxw", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> c = o.read ('content.xml') >>> m = o.mimetype >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) >>> for node in body.findall (vset) : ... name = node.get (OOo_Tag ('text', 'name', m)) ... print name, ':', node.text salutation : None firstname : Erika lastname : Musterfrau street : Beispielstrasse 42 country : D postalcode : 00815 city : Niemandsdorf salutation : None firstname : Erika lastname : Musterfrau street : Beispielstrasse 42 country : D postalcode : 00815 city : Niemandsdorf >>> body [-1].get (OOo_Tag ('text', 'style-name', mimetype = m)) 'P2' >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) >>> c = o.read ('content.xml') >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Addpagebreak_Style () ... , Transforms.Mailmerge ... ( iterator = ... ( dict (firstname = 'Erika', lastname = 'Nobody') ... , dict (firstname = 'Eric', lastname = 'Wizard') ... , cb ... ) ... ) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform (o) >>> for i in meta_counts : ... print i, t [':'.join (('Set_Attribute', i))] character-count 951 image-count 0 object-count 0 page-count 3 paragraph-count 113 table-count 3 word-count 162 >>> name = t ['Addpagebreak_Style:stylename'] >>> name 'P2' >>> o.close () >>> ov = sio.getvalue () >>> f = open ("testout2.sxw", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', m)) >>> for n in body.findall ('.//*') : ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) ... if zidx : ... print ':'.join(split_tag (n.tag)), zidx draw:text-box 0 draw:rect 1 draw:text-box 3 draw:rect 4 draw:text-box 6 draw:rect 7 draw:text-box 2 draw:text-box 5 draw:text-box 8 >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : ... print n.tag {http://openoffice.org/2000/text}p {http://openoffice.org/2000/text}p >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) >>> for n in body.findall (vset) : ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name, ':', n.text firstname : Erika lastname : Nobody firstname : Eric lastname : Wizard firstname : Hugo lastname : Testman firstname : Erika lastname : Nobody firstname : Eric lastname : Wizard firstname : Hugo lastname : Testman >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : ... print n.get (OOo_Tag ('draw', 'name', m)), ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) Frame1 1 Frame2 2 Frame3 3 Frame4 None Frame5 None Frame6 None >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : ... print n.get (OOo_Tag ('text', 'name', m)) Section1 Section2 Section3 Section4 Section5 Section6 Section7 Section8 Section9 Section10 Section11 Section12 Section13 Section14 Section15 Section16 Section17 Section18 >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : ... print n.get (OOo_Tag ('table', 'name', m)) Table1 Table2 Table3 >>> r = o.read ('meta.xml') >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) >>> for i in meta_counts : ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) character-count '951' image-count '0' object-count '0' page-count '3' paragraph-count '113' table-count '3' word-count '162' >>> o.close () >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = sio) >>> tf = ('testfiles/test.sxw', 'testfiles/rechng.sxw') >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Concatenate (*tf) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform (o) >>> for i in meta_counts : ... print i, repr (t [':'.join (('Set_Attribute', i))]) character-count '1131' image-count '0' object-count '0' page-count '3' paragraph-count '168' table-count '2' word-count '160' >>> o.close () >>> ov = sio.getvalue () >>> f = open ("testout3.sxw", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> s = o.read ('styles.xml') >>> for n in c.findall ('./*/*') : ... name = n.get (OOo_Tag ('style', 'name', m)) ... if name : ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) ... print '"%s", "%s"' % (name, parent) "Tahoma1", "None" "Bitstream Vera Sans", "None" "Tahoma", "None" "Nimbus Roman No9 L", "None" "Courier New", "None" "Arial Black", "None" "New Century Schoolbook", "None" "Helvetica", "None" "Table1", "None" "Table1.A", "None" "Table1.A1", "None" "Table1.E1", "None" "Table1.A2", "None" "Table1.E2", "None" "P1", "None" "fr1", "Frame" "fr2", "None" "fr3", "Frame" "Sect1", "None" "gr1", "None" "P2", "Standard" "Standard_Concat", "None" "Concat_P1", "Concat_Frame contents" "Concat_P2", "Concat_Frame contents" "P3", "Concat_Frame contents" "P4", "Concat_Frame contents" "P5", "Concat_Standard" "P6", "Concat_Standard" "P7", "Concat_Frame contents" "P8", "Concat_Frame contents" "P9", "Concat_Frame contents" "P10", "Concat_Frame contents" "P11", "Concat_Frame contents" "P12", "Concat_Frame contents" "P13", "Concat_Frame contents" "P15", "Concat_Standard" "P16", "Concat_Standard" "P17", "Concat_Standard" "P18", "Concat_Standard" "P19", "Concat_Standard" "P20", "Concat_Standard" "P21", "Concat_Standard" "P22", "Concat_Standard" "P23", "Concat_Standard" "T1", "None" "Concat_fr1", "Concat_Frame" "Concat_fr2", "Concat_Frame" "Concat_fr3", "Concat_Frame" "fr4", "Concat_Frame" "fr5", "Concat_Frame" "fr6", "Concat_Frame" "Concat_Sect1", "None" "N0", "None" "N2", "None" "P15_Concat", "Concat_Standard" >>> for n in s.findall ('./*/*') : ... name = n.get (OOo_Tag ('style', 'name', m)) ... if name : ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) ... print '"%s", "%s"' % (name, parent) "Tahoma1", "None" "Bitstream Vera Sans", "None" "Tahoma", "None" "Nimbus Roman No9 L", "None" "Courier New", "None" "Arial Black", "None" "New Century Schoolbook", "None" "Helvetica", "None" "Standard", "None" "Text body", "Standard" "List", "Text body" "Table Contents", "Text body" "Table Heading", "Table Contents" "Caption", "Standard" "Frame contents", "Text body" "Index", "Standard" "Frame", "None" "OLE", "None" "Concat_Standard", "None" "Concat_Text body", "Concat_Standard" "Concat_List", "Concat_Text body" "Concat_Caption", "Concat_Standard" "Concat_Frame contents", "Concat_Text body" "Concat_Index", "Concat_Standard" "Horizontal Line", "Concat_Standard" "Internet link", "None" "Visited Internet Link", "None" "Concat_Frame", "None" "Concat_OLE", "None" "pm1", "None" "Concat_pm1", "None" "Standard", "None" "Concat_Standard", "None" >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name salutation firstname lastname street country postalcode city date invoice.invoice_no invoice.abo.aboprice.abotype.description address.salutation address.title address.firstname address.lastname address.function address.street address.country address.postalcode address.city invoice.subscriber.salutation invoice.subscriber.title invoice.subscriber.firstname invoice.subscriber.lastname invoice.subscriber.function invoice.subscriber.street invoice.subscriber.country invoice.subscriber.postalcode invoice.subscriber.city invoice.period_start invoice.period_end invoice.currency.name invoice.amount invoice.subscriber.initial >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name Illustration Table Text Drawing >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : ... name = n.get (OOo_Tag ('text', 'style-name', m)) ... if not name or name.startswith ('Concat') : ... print ">%s<" % name >Concat_P1< >Concat_P2< >Concat_Frame contents< >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'text-box', m)) : ... attrs = 'name', 'style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) ... print attrs ['Frame1', 'fr1', '0', '1'] ['Frame2', 'fr1', '3', '2'] ['Frame3', 'Concat_fr1', '6', '3'] ['Frame4', 'Concat_fr2', '7', '3'] ['Frame5', 'Concat_fr3', '8', '3'] ['Frame6', 'Concat_fr1', '9', '3'] ['Frame7', 'fr4', '10', '3'] ['Frame8', 'fr4', '11', '3'] ['Frame9', 'fr4', '12', '3'] ['Frame10', 'fr4', '13', '3'] ['Frame11', 'fr4', '14', '3'] ['Frame12', 'fr4', '15', '3'] ['Frame13', 'fr5', '16', '3'] ['Frame14', 'fr4', '18', '3'] ['Frame15', 'fr4', '19', '3'] ['Frame16', 'fr4', '20', '3'] ['Frame17', 'fr6', '17', '3'] ['Frame18', 'fr4', '23', '3'] ['Frame19', 'fr3', '2', None] ['Frame20', 'fr3', '5', None] >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : ... attrs = 'name', 'style-name' ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] ... print attrs ['Section1', 'Sect1'] ['Section2', 'Sect1'] ['Section3', 'Sect1'] ['Section4', 'Sect1'] ['Section5', 'Sect1'] ['Section6', 'Sect1'] ['Section7', 'Concat_Sect1'] ['Section8', 'Concat_Sect1'] ['Section9', 'Concat_Sect1'] ['Section10', 'Concat_Sect1'] ['Section11', 'Concat_Sect1'] ['Section12', 'Concat_Sect1'] ['Section13', 'Concat_Sect1'] ['Section14', 'Concat_Sect1'] ['Section15', 'Concat_Sect1'] ['Section16', 'Concat_Sect1'] ['Section17', 'Concat_Sect1'] ['Section18', 'Concat_Sect1'] ['Section19', 'Concat_Sect1'] ['Section20', 'Concat_Sect1'] ['Section21', 'Concat_Sect1'] ['Section22', 'Concat_Sect1'] ['Section23', 'Concat_Sect1'] ['Section24', 'Concat_Sect1'] ['Section25', 'Concat_Sect1'] ['Section26', 'Concat_Sect1'] ['Section27', 'Concat_Sect1'] ['Section28', 'Sect1'] ['Section29', 'Sect1'] ['Section30', 'Sect1'] ['Section31', 'Sect1'] ['Section32', 'Sect1'] ['Section33', 'Sect1'] >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : ... attrs = 'style-name', 'text-style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) ... print attrs ['gr1', 'P1', '1', '1'] ['gr1', 'P1', '4', '2'] >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : ... attrs = 'style-name', 'text-style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... print attrs ['gr1', 'P1', '24'] ['gr1', 'P1', '22'] ['gr1', 'P1', '21'] >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : ... attrs = 'name', 'class', 'family' ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] ... print attrs ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) ... if props is not None and len (props) : ... props [0].tag ['Concat_Standard', 'text', 'paragraph'] '{http://openoffice.org/2000/style}tab-stops' ['Concat_Text body', 'text', 'paragraph'] ['Concat_List', 'list', 'paragraph'] ['Concat_Caption', 'extra', 'paragraph'] ['Concat_Frame contents', 'extra', 'paragraph'] ['Concat_Index', 'index', 'paragraph'] ['Concat_Frame', None, 'graphics'] ['Concat_OLE', None, 'graphics'] >>> for n in c.findall ('.//*') : ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) ... if zidx : ... print ':'.join(split_tag (n.tag)), zidx draw:text-box 0 draw:rect 1 draw:text-box 3 draw:rect 4 draw:text-box 6 draw:text-box 7 draw:text-box 8 draw:text-box 9 draw:text-box 10 draw:text-box 11 draw:text-box 12 draw:text-box 13 draw:text-box 14 draw:text-box 15 draw:text-box 16 draw:text-box 18 draw:text-box 19 draw:text-box 20 draw:text-box 17 draw:text-box 23 draw:line 24 draw:text-box 2 draw:text-box 5 draw:line 22 draw:line 21 >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/carta.stw', outfile = sio) >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Addpagebreak_Style () ... , Transforms.Mailmerge ... ( iterator = ... ( dict ... ( Spett = "Spettabile" ... , contraente = "First person" ... , indirizzo = "street? 1" ... , tipo = "racc. A.C." ... , luogo = "Varese" ... , oggetto = "Saluti" ... ) ... , dict ... ( Spett = "Egregio" ... , contraente = "Second Person" ... , indirizzo = "street? 2" ... , tipo = "Raccomandata" ... , luogo = "Gavirate" ... , oggetto = "Ossequi" ... ) ... ) ... ) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform(o) >>> o.close() >>> ov = sio.getvalue () >>> f = open ("carta-out.stw", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) >>> for node in body.findall (vset) : ... name = node.get (OOo_Tag ('text', 'name', m)) ... print name, ':', node.text Spett : Spettabile contraente : First person indirizzo : street? 1 Spett : Egregio contraente : Second Person indirizzo : street? 2 tipo : racc. A.C. luogo : Varese oggetto : Saluti tipo : Raccomandata luogo : Gavirate oggetto : Ossequi >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Addpagebreak_Style () ... , Transforms.Mailmerge ... ( iterator = ... ( dict (firstname = 'Erika', lastname = 'Nobody') ... , dict (firstname = 'Eric', lastname = 'Wizard') ... , cb ... ) ... ) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform (o) >>> for i in meta_counts : ... print i, t [':'.join (('Set_Attribute', i))] character-count 951 image-count 0 object-count 0 page-count 3 paragraph-count 53 table-count 3 word-count 162 >>> name = t ['Addpagebreak_Style:stylename'] >>> name 'P2' >>> o.close () >>> ov = sio.getvalue () >>> f = open ("testout.odt", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', m)) >>> for n in body.findall ('.//*') : ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) ... if zidx : ... print ':'.join(split_tag (n.tag)), zidx draw:frame 0 draw:rect 1 draw:frame 3 draw:rect 4 draw:frame 6 draw:rect 7 draw:frame 2 draw:frame 5 draw:frame 8 >>> for n in body.findall ('.//' + OOo_Tag ('text', 'p', m)) : ... if n.get (OOo_Tag ('text', 'style-name', m)) == name : ... print n.tag {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p {urn:oasis:names:tc:opendocument:xmlns:text:1.0}p >>> vset = './/' + OOo_Tag ('text', 'variable-set', m) >>> for n in body.findall (vset) : ... if n.get (OOo_Tag ('text', 'name', m), None).endswith ('name') : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name, ':', n.text firstname : Erika lastname : Nobody firstname : Eric lastname : Wizard firstname : Hugo lastname : Testman firstname : Erika lastname : Nobody firstname : Eric lastname : Wizard firstname : Hugo lastname : Testman >>> for n in body.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : ... print n.get (OOo_Tag ('draw', 'name', m)), ... print n.get (OOo_Tag ('text', 'anchor-page-number', m)) Frame1 1 Frame2 2 Frame3 3 Frame4 None Frame5 None Frame6 None >>> for n in body.findall ('.//' + OOo_Tag ('text', 'section', m)) : ... print n.get (OOo_Tag ('text', 'name', m)) Section1 Section2 Section3 Section4 Section5 Section6 Section7 Section8 Section9 Section10 Section11 Section12 Section13 Section14 Section15 Section16 Section17 Section18 >>> for n in body.findall ('.//' + OOo_Tag ('table', 'table', m)) : ... print n.get (OOo_Tag ('table', 'name', m)) Table1 Table2 Table3 >>> r = o.read ('meta.xml') >>> meta = r.find ('.//' + OOo_Tag ('meta', 'document-statistic', m)) >>> for i in meta_counts : ... print i, repr (meta.get (OOo_Tag ('meta', i, m))) character-count '951' image-count '0' object-count '0' page-count '3' paragraph-count '53' table-count '3' word-count '162' >>> o.close () >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/carta.odt', outfile = sio) >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Addpagebreak_Style () ... , Transforms.Mailmerge ... ( iterator = ... ( dict ... ( Spett = "Spettabile" ... , contraente = "First person" ... , indirizzo = "street? 1" ... , tipo = "racc. A.C." ... , luogo = "Varese" ... , oggetto = "Saluti" ... ) ... , dict ... ( Spett = "Egregio" ... , contraente = "Second Person" ... , indirizzo = "street? 2" ... , tipo = "Raccomandata" ... , luogo = "Gavirate" ... , oggetto = "Ossequi" ... ) ... ) ... ) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform(o) >>> o.close() >>> ov = sio.getvalue () >>> f = open ("carta-out.odt", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) >>> for node in body.findall (vset) : ... name = node.get (OOo_Tag ('text', 'name', m)) ... print name, ':', node.text Spett : Spettabile contraente : First person indirizzo : street? 1 Spett : Egregio contraente : Second Person indirizzo : street? 2 tipo : racc. A.C. luogo : Varese oggetto : Saluti tipo : Raccomandata luogo : Gavirate oggetto : Ossequi >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/test.odt', outfile = sio) >>> tf = ('testfiles/test.odt', 'testfiles/rechng.odt') >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Concatenate (*tf) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform (o) >>> for i in meta_counts : ... print i, repr (t [':'.join (('Set_Attribute', i))]) character-count '1131' image-count '0' object-count '0' page-count '3' paragraph-count '80' table-count '2' word-count '159' >>> o.close () >>> ov = sio.getvalue () >>> f = open ("testout3.odt", "wb") >>> f.write (ov) >>> f.close () >>> o = OOoPy (infile = sio) >>> m = o.mimetype >>> c = o.read ('content.xml') >>> s = o.read ('styles.xml') >>> for n in c.findall ('./*/*') : ... name = n.get (OOo_Tag ('style', 'name', m)) ... if name : ... parent = n.get (OOo_Tag ('style', 'parent-style-name', m)) ... print '"%s", "%s"' % (name, parent) "Tahoma1", "None" "Bitstream Vera Sans", "None" "Tahoma", "None" "Nimbus Roman No9 L", "None" "Courier New", "None" "Arial Black", "None" "New Century Schoolbook", "None" "Times New Roman", "None" "Arial", "None" "Helvetica", "None" "Table1", "None" "Table1.A", "None" "Table1.A1", "None" "Table1.E1", "None" "Table1.A2", "None" "Table1.E2", "None" "P1", "None" "fr1", "Frame" "fr2", "Frame" "Sect1", "None" "gr1", "None" "P2", "Standard" "Standard_Concat", "None" "Concat_P1", "Concat_Frame_20_contents" "Concat_P2", "Concat_Frame_20_contents" "P3", "Concat_Frame_20_contents" "P4", "Concat_Standard" "P5", "Concat_Standard" "P6", "Concat_Frame_20_contents" "P7", "Concat_Frame_20_contents" "P8", "Concat_Frame_20_contents" "P9", "Concat_Frame_20_contents" "P10", "Concat_Frame_20_contents" "P11", "Concat_Frame_20_contents" "P12", "Concat_Frame_20_contents" "P14", "Concat_Standard" "P15", "Concat_Standard" "P16", "Concat_Standard" "P17", "Concat_Standard" "P18", "Concat_Standard" "P19", "Concat_Standard" "P20", "Concat_Standard" "P21", "Concat_Standard" "P22", "Concat_Standard" "P23", "Concat_Standard" "Concat_fr1", "Frame" "Concat_fr2", "Frame" "fr3", "Frame" "fr4", "Frame" "fr5", "Frame" "fr6", "Frame" "Concat_gr1", "None" "N0", "None" "N2", "None" "P14_Concat", "Concat_Standard" >>> for n in c.findall ('.//' + OOo_Tag ('text', 'variable-decl', m)) : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name salutation firstname lastname street country postalcode city date invoice.invoice_no invoice.abo.aboprice.abotype.description address.salutation address.title address.firstname address.lastname address.function address.street address.country address.postalcode address.city invoice.subscriber.salutation invoice.subscriber.title invoice.subscriber.firstname invoice.subscriber.lastname invoice.subscriber.function invoice.subscriber.street invoice.subscriber.country invoice.subscriber.postalcode invoice.subscriber.city invoice.period_start invoice.period_end invoice.currency.name invoice.amount invoice.subscriber.initial >>> for n in c.findall ('.//' + OOo_Tag ('text', 'sequence-decl', m)) : ... name = n.get (OOo_Tag ('text', 'name', m)) ... print name Illustration Table Text Drawing >>> for n in c.findall ('.//' + OOo_Tag ('text', 'p', m)) : ... name = n.get (OOo_Tag ('text', 'style-name', m)) ... if not name or name.startswith ('Concat') : ... print ':'.join(split_tag (n.tag)), ">%s<" % name text:p >None< text:p >None< text:p >Concat_P1< text:p >Concat_P1< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_P2< text:p >Concat_Frame_20_contents< text:p >None< text:p >None< text:p >None< >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'frame', m)) : ... attrs = 'name', 'style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) ... print attrs ['Frame1', 'fr1', '0', '1'] ['Frame2', 'fr1', '3', '2'] ['Frame3', 'Concat_fr1', '6', '3'] ['Frame4', 'Concat_fr2', '7', '3'] ['Frame5', 'fr3', '8', '3'] ['Frame6', 'Concat_fr1', '9', '3'] ['Frame7', 'fr4', '10', '3'] ['Frame8', 'fr4', '11', '3'] ['Frame9', 'fr4', '12', '3'] ['Frame10', 'fr4', '13', '3'] ['Frame11', 'fr4', '14', '3'] ['Frame12', 'fr4', '15', '3'] ['Frame13', 'fr5', '16', '3'] ['Frame14', 'fr4', '18', '3'] ['Frame15', 'fr4', '19', '3'] ['Frame16', 'fr4', '20', '3'] ['Frame17', 'fr6', '17', '3'] ['Frame18', 'fr4', '23', '3'] ['Frame19', 'fr2', '2', None] ['Frame20', 'fr2', '5', None] >>> for n in c.findall ('.//' + OOo_Tag ('text', 'section', m)) : ... attrs = 'name', 'style-name' ... attrs = [n.get (OOo_Tag ('text', i, m)) for i in attrs] ... print attrs ['Section1', 'Sect1'] ['Section2', 'Sect1'] ['Section3', 'Sect1'] ['Section4', 'Sect1'] ['Section5', 'Sect1'] ['Section6', 'Sect1'] ['Section7', 'Sect1'] ['Section8', 'Sect1'] ['Section9', 'Sect1'] ['Section10', 'Sect1'] ['Section11', 'Sect1'] ['Section12', 'Sect1'] ['Section13', 'Sect1'] ['Section14', 'Sect1'] ['Section15', 'Sect1'] ['Section16', 'Sect1'] ['Section17', 'Sect1'] ['Section18', 'Sect1'] ['Section19', 'Sect1'] ['Section20', 'Sect1'] ['Section21', 'Sect1'] ['Section22', 'Sect1'] ['Section23', 'Sect1'] ['Section24', 'Sect1'] ['Section25', 'Sect1'] ['Section26', 'Sect1'] ['Section27', 'Sect1'] ['Section28', 'Sect1'] ['Section29', 'Sect1'] ['Section30', 'Sect1'] ['Section31', 'Sect1'] ['Section32', 'Sect1'] ['Section33', 'Sect1'] >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'rect', m)) : ... attrs = 'style-name', 'text-style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... attrs.append (n.get (OOo_Tag ('text', 'anchor-page-number', m))) ... print attrs ['gr1', 'P1', '1', '1'] ['gr1', 'P1', '4', '2'] >>> for n in c.findall ('.//' + OOo_Tag ('draw', 'line', m)) : ... attrs = 'style-name', 'text-style-name', 'z-index' ... attrs = [n.get (OOo_Tag ('draw', i, m)) for i in attrs] ... print attrs ['Concat_gr1', 'P1', '24'] ['Concat_gr1', 'P1', '22'] ['Concat_gr1', 'P1', '21'] >>> for n in s.findall ('.//' + OOo_Tag ('style', 'style', m)) : ... if n.get (OOo_Tag ('style', 'name', m)).startswith ('Co') : ... attrs = 'name', 'display-name', 'class', 'family' ... attrs = [n.get (OOo_Tag ('style', i, m)) for i in attrs] ... print attrs ... props = n.find ('./' + OOo_Tag ('style', 'properties', m)) ... if props is not None and len (props) : ... props [0].tag ['Concat_Standard', None, 'text', 'paragraph'] ['Concat_Text_20_body', 'Concat Text body', 'text', 'paragraph'] ['Concat_List', None, 'list', 'paragraph'] ['Concat_Caption', None, 'extra', 'paragraph'] ['Concat_Frame_20_contents', 'Concat Frame contents', 'extra', 'paragraph'] ['Concat_Index', None, 'index', 'paragraph'] >>> for n in c.findall ('.//*') : ... zidx = n.get (OOo_Tag ('draw', 'z-index', m)) ... if zidx : ... print ':'.join(split_tag (n.tag)), zidx draw:frame 0 draw:rect 1 draw:frame 3 draw:rect 4 draw:frame 6 draw:frame 7 draw:frame 8 draw:frame 9 draw:frame 10 draw:frame 11 draw:frame 12 draw:frame 13 draw:frame 14 draw:frame 15 draw:frame 16 draw:frame 18 draw:frame 19 draw:frame 20 draw:frame 17 draw:frame 23 draw:line 24 draw:frame 2 draw:frame 5 draw:line 22 draw:line 21 >>> from os import system >>> system ('python bin/ooo_fieldreplace -i testfiles/test.odt ' ... '-o testout.odt ' ... 'salutation=Frau firstname=Erika lastname=Musterfrau ' ... 'country=D postalcode=00815 city=Niemandsdorf ' ... 'street="Beispielstrasse 42"') 0 >>> o = OOoPy (infile = 'testout.odt') >>> c = o.read ('content.xml') >>> m = o.mimetype >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) >>> for node in body.findall (vset) : ... name = node.get (OOo_Tag ('text', 'name', m)) ... print name, ':', node.text salutation : Frau firstname : Erika lastname : Musterfrau street : Beispielstrasse 42 country : D postalcode : 00815 city : Niemandsdorf salutation : Frau firstname : Erika lastname : Musterfrau street : Beispielstrasse 42 country : D postalcode : 00815 city : Niemandsdorf >>> o.close () >>> system ("bin/ooo_mailmerge -o testout.odt -d'|' " ... "testfiles/carta.odt testfiles/x.csv") 0 >>> o = OOoPy (infile = 'testout.odt') >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> vset = './/' + OOo_Tag ('text', 'variable-set', mimetype = m) >>> for node in body.findall (vset) : ... name = node.get (OOo_Tag ('text', 'name', m)) ... print name, ':', node.text Spett : Spettabile contraente : First person indirizzo : street? 1 Spett : Egregio contraente : Second Person indirizzo : street? 2 tipo : racc. A.C. luogo : Varese oggetto : Saluti tipo : Raccomandata luogo : Gavirate oggetto : Ossequi >>> o.close () >>> infile = 'testfiles/testenum.odt' >>> o = OOoPy (infile = infile, outfile = 'xyzzy.odt') >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Addpagebreak_Style () ... , Transforms.Mailmerge ... ( iterator = ... ( dict (firstname = 'Erika', lastname = 'Nobody') ... , dict (firstname = 'Eric', lastname = 'Wizard') ... , cb ... ) ... ) ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... ) >>> t.transform (o) >>> o.close () >>> o = OOoPy (infile = 'xyzzy.odt') >>> m = o.mimetype >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> textlist = './/' + OOo_Tag ('text', 'list', m) >>> for node in body.findall (textlist) : ... id = node.get (OOo_Tag ('xml', 'id', m)) ... print 'xml:id', ':', id xml:id : list1 xml:id : list2 xml:id : list3 >>> o = OOoPy (infile = 'testfiles/page1.odt', outfile = 'xyzzy.odt') >>> m = o.mimetype >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Concatenate ('testfiles/page2.odt') ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... , Transforms.Manifest_Append () ... ) >>> t.transform (o) >>> o.close () >>> o = OOoPy (infile = 'xyzzy.odt') >>> c = o.read ('META-INF/manifest.xml') >>> for node in c.getroot () : ... fe = node.get (OOo_Tag ('manifest', 'full-path', m)) ... print fe / Pictures/10000000000000C80000007941B1A419.jpg Pictures/10000000000000DC000000B02E191635.jpg Pictures/10000000000000DC000000A337377AAA.jpg meta.xml settings.xml content.xml Thumbnails/thumbnail.png layout-cache manifest.rdf Configurations2/accelerator/current.xml Configurations2/ styles.xml >>> for f in o.izip.infolist () : ... print f.filename mimetype settings.xml META-INF/manifest.xml content.xml meta.xml styles.xml Pictures/10000000000000C80000007941B1A419.jpg Pictures/10000000000000DC000000B02E191635.jpg Pictures/10000000000000DC000000A337377AAA.jpg Thumbnails/thumbnail.png layout-cache manifest.rdf Configurations2/images/Bitmaps/ Configurations2/accelerator/current.xml >>> sio = StringIO () >>> o = OOoPy (infile = 'testfiles/tbl_first.odt', outfile = sio) >>> m = o.mimetype >>> t = Transformer ( ... o.mimetype ... , get_meta (o.mimetype) ... , Transforms.Concatenate ('testfiles/tbl_second.odt') ... , renumber_all (o.mimetype) ... , set_meta (o.mimetype) ... , Transforms.Fix_OOo_Tag () ... , Transforms.Manifest_Append () ... ) >>> t.transform (o) >>> o.close () >>> o = OOoPy (infile = sio) >>> c = o.read ('content.xml') >>> body = c.find (OOo_Tag ('office', 'body', mimetype = m)) >>> tbls = './/' + OOo_Tag ('table', 'table', mimetype = m) >>> for table in body.findall (tbls) : ... name = table.get (OOo_Tag ('table', 'style-name', mimetype = m)) ... if name : ... print name ... for t in table.findall ('.//') : ... name = t.get (OOo_Tag ('table', 'style-name', mimetype = m)) ... if name : ... print name Tabella1 Tabella1.A Tabella1.A1 Tabella1.B1 Tabella1.A2 Tabella1.B2 Tabella1 Tabella1.A Tabella1.A1 Tabella1.B1 Tabella1.A2 Tabella1.B2 """ def __init__ (self, mimetype, *tf) : assert (mimetype in mimetypes) self.mimetype = mimetype self.transforms = {} for t in tf : self.insert (t) self.dictionary = {} self.has_key = self.dictionary.has_key self.__contains__ = self.has_key # 2-tuples of filename, content self.appendfiles = [] # end def __init__ def insert (self, transform) : """Insert a new transform""" t = transform if t.prio not in self.transforms : self.transforms [t.prio] = [] self.transforms [t.prio].append (t) t.register (self) # end def append def transform (self, ooopy) : """ Apply all the transforms in priority order. Priority order is global over all transforms. """ self.trees = {} for f in files : self.trees [f] = ooopy.read (f) #self.dictionary = {} # clear dict when transforming another ooopy prios = self.transforms.keys () prios.sort () for p in prios : for t in self.transforms [p] : t.apply_all (self.trees) for e in self.trees.itervalues () : e.write () for fname, fcontent in self.appendfiles : e.ooopy.append_file (fname, fcontent) # end def transform def __getitem__ (self, key) : return self.dictionary [key] # end def __getitem__ def __setitem__ (self, key, value) : self.dictionary [key] = value # end def __setitem__ # end class Transformer