#!/usr/bin/env python # -*- coding: iso-8859-1 -*- # Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting. # Reichergasse 131, A-3411 Weidling. # Web: http://www.runtux.com Email: office@runtux.com # All rights reserved # **************************************************************************** # # This library is free software; you can redistribute it and/or modify # it under the terms of the GNU Library General Public License as # published by the Free Software Foundation; either version 2 of the # License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU Library General Public License for more details. # # You should have received a copy of the GNU Library General Public # License along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # **************************************************************************** from __future__ import absolute_import import time import re try : from xml.etree.ElementTree import dump, SubElement, Element, tostring except ImportError : from elementtree.ElementTree import dump, SubElement, Element, tostring from copy import deepcopy from ooopy.OOoPy import OOoPy, autosuper from ooopy.Transformer import files, split_tag, OOo_Tag, Transform from ooopy.Transformer import mimetypes, namespace_by_name from ooopy.Version import VERSION # counts in meta.xml meta_counts = \ ( 'character-count', 'image-count', 'object-count', 'page-count' , 'paragraph-count', 'table-count', 'word-count' ) class Access_Attribute (autosuper) : """ For performance reasons we do not specify a separate transform for each attribute-read or -change operation. Instead we define all the attribute accesses we want to perform as objects that follow the attribute access api and apply them all using an Attribute_Access in one go. """ def __init__ (self, key = None, prefix = None, ** kw) : self.__super.__init__ (key = key, prefix = prefix, **kw) self.key = key if key : if not prefix : prefix = self.__class__.__name__ self.key = ':'.join ((prefix, key)) # end def __init__ def register (self, transformer) : self.transformer = transformer # end def register def use_value (self, oldval = None) : """ Can change the given value by returning the new value. If returning None or oldval the attribute stays unchanged. """ raise NotImplementedError, "use_value must be defined in derived class" # end def use_value # end class Access_Attribute class Get_Attribute (Access_Attribute) : """ An example of not changing an attribute but only storing the value in the transformer """ def __init__ (self, tag, attr, key, transform = None, ** kw) : self.__super.__init__ (key = key, **kw) self.tag = tag self.attribute = attr self.transform = transform # end def __init__ def use_value (self, oldval = None) : self.transformer [self.key] = oldval return None # end def use_value # end def Get_Attribute class Get_Max (Access_Attribute) : """ Get the maximum value of an attribute """ def __init__ (self, tag, attr, key, transform = None, ** kw) : self.__super.__init__ (key = key, **kw) self.tag = tag self.attribute = attr self.transform = transform # end def __init__ def register (self, transformer) : self.__super.register (transformer) self.transformer [self.key] = -1 # end def register def use_value (self, oldval = None) : if self.transformer [self.key] < oldval : self.transformer [self.key] = oldval return None # end def use_value # end def Get_Max class Renumber (Access_Attribute) : """ Specifies a renumbering transform. OOo has a 'name' attribute for several different tags, e.g., tables, frames, sections etc. These names must be unique in the whole document. OOo itself solves this by appending a unique number to a basename for each element, e.g., sections are named 'Section1', 'Section2', ... Renumber transforms can be applied to correct the numbering after operations that destroy the unique numbering, e.g., after a mailmerge where the same document is repeatedly appended. The force parameter specifies if the new renumbered name should be inserted even if the attribute in question does not exist. """ def __init__ \ (self, tag, name = None, attr = None, start = 1, force = False) : self.__super.__init__ () tag_ns, tag_name = split_tag (tag) self.tag_ns = tag_ns self.tag = tag self.name = name or tag_name [0].upper () + tag_name [1:] self.num = start self.force = force self.attribute = attr # end def __init__ def register (self, transformer) : self.__super.register (transformer) if not self.attribute : self.attribute = OOo_Tag (self.tag_ns, 'name', transformer.mimetype) # end def register def use_value (self, oldval = None) : if oldval is None and not self.force : return name = "%s%d" % (self.name, self.num) self.num += 1 return name # end def use_value # end class Renumber class Set_Attribute (Access_Attribute) : """ Similar to the renumbering transform in that we are assigning new values to some attributes. But in this case we give keys into the Transformer dict to replace some tag attributes. """ def __init__ \ ( self , tag , attr , key = None , transform = None , value = None , oldvalue = None , ** kw ) : self.__super.__init__ (key = key, ** kw) self.tag = tag self.attribute = attr self.transform = transform self.value = value self.oldvalue = oldvalue # end def __init__ def use_value (self, oldval) : if oldval is None : return None if self.oldvalue and oldval != self.oldvalue : return None if self.key and self.transformer.has_key (self.key) : return str (self.transformer [self.key]) return self.value # end def use_value # end class Set_Attribute def set_attributes_from_dict (tag, attr, d) : """ Convenience function: iterate over a dict and return a list of Set_Attribute objects specifying replacement of attributes in the dictionary """ return [Set_Attribute (tag, attr, oldvalue = k, value = v) for k,v in d.iteritems () ] # end def set_attributes_from_dict class Reanchor (Access_Attribute) : """ Similar to the renumbering transform in that we are assigning new values to some attributes. But in this case we want to relocate objects that are anchored to a page. """ def __init__ (self, offset, tag, attr = None) : self.__super.__init__ () self.offset = int (offset) self.tag = tag self.attribute = attr # end def __init__ def register (self, transformer) : self.__super.register (transformer) if not self.attribute : self.attribute = \ OOo_Tag ('text', 'anchor-page-number', transformer.mimetype) # end def register def use_value (self, oldval) : if oldval is None : return oldval return "%d" % (int (oldval) + self.offset) # end def use_value # end class Reanchor # # general transforms applicable to several .xml files # class Attribute_Access (Transform) : """ Read or Change attributes in an OOo document. Can be used for renumbering, moving anchored objects, etc. Expects a list of attribute changer objects that follow the attribute changer API. This API is very simple: - Member function "use_value" returns the new value of an attribute, or if unchanged the old value - The attribute "tag" gives the tag for an element we are searching - The attribute "attribute" gives the name of the attribute we want to read or change. For examples of the attribute changer API, see Renumber and Reanchor above. """ filename = 'content.xml' prio = 110 def __init__ (self, attrchangers, filename = None, ** kw) : self.filename = filename or self.filename self.attrchangers = {} # allow several changers for a single tag self.attrchangers [None] = [] self.changers = attrchangers self.__super.__init__ (** kw) # end def __init__ def register (self, transformer) : """ Register transformer with all attrchangers. """ self.__super.register (transformer) for r in self.changers : if r.tag not in self.attrchangers : self.attrchangers [r.tag] = [] self.attrchangers [r.tag].append (r) r.register (transformer) # end def register def apply (self, root) : """ Search for all tags for which we renumber and replace name """ for n in [root] + root.findall ('.//*') : changers = \ self.attrchangers [None] + self.attrchangers.get (n.tag, []) for r in changers : nval = r.use_value (n.get (r.attribute)) if nval is not None : n.set (r.attribute, nval) # end def apply # end class Attribute_Access # # META-INF/manifest.xml transforms # class Manifest_Append (Transform) : """ The Transformer stores a list of files (and contents) to append. These files are added to the archive later but need to be present in the manifest, too. The file list in the Transformer currently doesn't store a media type (which is one of the parameters in the manifest), the current application of this transform is to add pictures -- these don't have a media type in the files that were checked. So for now we add an empty media type. """ filename = 'META-INF/manifest.xml' prio = 1000 def apply (self, root) : for n, node in enumerate (root) : assert node.tag == self.oootag ('manifest', 'file-entry') path = node.get (self.oootag ('manifest', 'full-path')) assert (path) if path == '/' : break else : assert (not "The manifest needs a '/' entry") for f, _ in self.transformer.appendfiles : e = Element (self.oootag ('manifest', 'file-entry')) e.attrib [self.oootag ('manifest', 'full-path')] = f e.attrib [self.oootag ('manifest', 'media-type')] = '' root.insert (n + 1, e) n += 1 # end def apply # end class Manifest_Append # # meta.xml transforms # class Editinfo (Transform) : """ This is an example of modifying OOo meta info (edit information, author, etc). We set some of the items (program that generated the OOo file, modification time, number of edit cyles and overall edit duration). It's easy to subclass this transform and replace the "replace" variable (pun intended) in the derived class. """ filename = 'meta.xml' prio = 20 repl = \ { ('meta', 'generator') : 'OOoPy field replacement' , ('dc', 'date') : time.strftime ('%Y-%m-%dT%H:%M:%S') , ('meta', 'editing-cycles') : '0' , ('meta', 'editing-duration') : 'PT0M0S' } replace = {} # iterate over all mimetypes, so this works for all known mimetypes # of OOo documents. for m in mimetypes : for params, value in repl.iteritems () : replace [OOo_Tag (mimetype = m, *params)] = value def apply (self, root) : for node in root.findall (self.oootag ('office', 'meta') + '/*') : if self.replace.has_key (node.tag) : node.text = self.replace [node.tag] # end def apply # end class Editinfo # # settings.xml transforms # class Autoupdate (Transform) : """ This is an example of modifying OOo settings. We set some of the AutoUpdate configuration items in OOo to true. We also specify that links should be updated when reading. This was originally intended to make OOo correctly display fields if they were changed with the Field_Replace below (similar to pressing F9 after loading the generated document in OOo). In particular I usually make spaces depend on field contents so that I don't have spurious spaces if a field is empty. Now it would be nice if OOo displayed the spaces correctly after loading a document (It does update the fields before printing, so this is only a cosmetic problem :-). This apparently does not work. If anybody knows how to achieve this, please let me know: mailto:rsc@runtux.com """ filename = 'settings.xml' prio = 20 def apply (self, root) : config = None for config in root.findall \ ( self.oootag ('office', 'settings') + '/' + self.oootag ('config', 'config-item-set') ) : name = config.get (self.oootag ('config', 'name')) if name == 'configuration-settings' : break for node in config.findall (self.oootag ('config', 'config-item')) : name = node.get (self.oootag ('config', 'name')) if name == 'LinkUpdateMode' : # update when reading node.text = '2' # update fields when reading if name == 'FieldAutoUpdate' or name == 'ChartAutoUpdate' : node.text = 'true' # end def apply # end class Autoupdate # # content.xml transforms # class Field_Replace (Transform) : """ Takes a dict of replacement key-value pairs. The key is the name of a variable in OOo. Additional replacement key-value pairs may be specified in ** kw. Alternatively a callback mechanism for variable name lookups is provided. The callback function is given the name of a variable in OOo and is expected to return the replacement value or None if the variable value should not be replaced. """ filename = 'content.xml' prio = 100 def __init__ (self, prio = None, replace = None, ** kw) : """ replace is something behaving like a dict or something callable for name lookups """ self.__super.__init__ (prio, ** kw) self.replace = replace or {} self.dict = kw # end def __init__ def apply (self, root) : tbody = self.find_tbody (root) for tag in 'variable-set', 'variable-get', 'variable-input' : for node in tbody.findall ('.//' + self.oootag ('text', tag)) : attr = 'name' if tag == 'text-input' : attr = 'description' name = node.get (self.oootag ('text', attr)) if callable (self.replace) : replace = self.replace (name) if replace : node.text = replace elif name in self.replace : node.text = self.replace [name] elif name in self.dict : node.text = self.dict [name] # end def apply # end class Field_Replace class Addpagebreak_Style (Transform) : """ This transformation adds a new ad-hoc paragraph style to the content part of the OOo document. This is needed to be able to add new page breaks to an OOo document. Adding a new page break is then a matter of adding an empty paragraph with the given page break style. We first look through all defined paragraph styles for determining a new paragraph style number. Convention is P for paragraph styles. We search the highest number and use this incremented by one for the new style to insert. Then we insert the new style and store the resulting style name in the transformer under the key class_name:stylename where class_name is our own class name. """ filename = 'content.xml' prio = 30 para = re.compile (r'P([0-9]+)') def apply (self, root) : max_style = 0 styles = root.find (self.oootag ('office', 'automatic-styles')) for s in styles.findall ('./' + self.oootag ('style', 'style')) : m = self.para.match (s.get (self.oootag ('style', 'name'), '')) if m : num = int (m.group (1)) if num > max_style : max_style = num stylename = 'P%d' % (max_style + 1) new = SubElement \ ( styles , self.oootag ('style', 'style') , { self.oootag ('style', 'name') : stylename , self.oootag ('style', 'family') : 'paragraph' , self.oootag ('style', 'parent-style-name') : 'Standard' } ) SubElement \ ( new , self.properties_tag , { self.oootag ('fo', 'break-after') : 'page' } ) self.set ('stylename', stylename) # end def apply # end class Addpagebreak_Style class Addpagebreak (Transform) : """ This transformation adds a page break to the last page of the OOo text. This is needed, e.g., when doing mail-merge: We append a page break to the tbody and then append the next page. This transform needs the name of the paragraph style specifying the page break style. Default is to use 'Addpagebreak_Style:stylename' as the key for retrieving the page style. Alternatively the page style or the page style key can be specified in the constructor. """ filename = 'content.xml' prio = 50 def __init__ (self, stylename = None, stylekey = None, ** kw) : self.__super.__init__ (** kw) self.stylename = stylename self.stylekey = stylekey or 'Addpagebreak_Style:stylename' # end def __init__ def apply (self, root) : """append to tbody e.g., """ tbody = self.find_tbody (root) stylename = self.stylename or self.transformer [self.stylekey] SubElement \ ( tbody , self.oootag ('text', 'p') , { self.oootag ('text', 'style-name') : stylename } ) # end def apply # end class Addpagebreak class Fix_OOo_Tag (Transform) : """ OOo writer conditions are attributes where the *value* is prefixed by an XML namespace. If the ooow namespace declaration is not in scope, all conditions will evaluate to false. I consider this a bug (a violation of the ideas of XML) of OOo. Nevertheless to make conditions work, we insert the ooow namespace declaration into the top-level element. """ filename = 'content.xml' prio = 10000 def apply (self, root) : if self.mimetype == mimetypes [1] : root.set ('xmlns:ooow', namespace_by_name [self.mimetype]['ooow']) # end def apply # end class Fix_OOo_Tag class _Body_Concat (Transform) : """ Various methods for modifying the tbody split into various pieces that have to keep sequence in order to not confuse OOo. """ ooo_sections = {} for m in mimetypes : ooo_sections [m] = \ [ { OOo_Tag ('text', 'variable-decls', m) : 1 , OOo_Tag ('text', 'sequence-decls', m) : 1 , OOo_Tag ('text', 'user-field-decls', m) : 1 , OOo_Tag ('office', 'forms', m) : 1 } , { OOo_Tag ('draw', 'frame', m) : 1 , OOo_Tag ('draw', 'rect', m) : 1 , OOo_Tag ('draw', 'text-box', m) : 1 } ] def _textbody (self) : """ We use the office:body (OOo 1.X)/office:text (OOo 1.X) element as a container for various transforms... """ return Element (self.textbody_tag) # end def _textbody def _divide (self, textbody) : """ Divide self.copy into parts that must keep their sequence. We use another textbody tag for storing the parts... Side-effect of setting self.copyparts is intended. """ self.copyparts = self._textbody () self.copyparts.append (self._textbody ()) l = len (self.ooo_sections [self.mimetype]) idx = 0 for e in textbody : while idx < l : if e.tag in self.ooo_sections [self.mimetype][idx] : break else : self.copyparts.append (self._textbody ()) idx += 1 self.copyparts [-1].append (e) declarations = self.copyparts [0] del self.copyparts [0] return declarations # end def _divide def divide_body (self, root) : cont = root if cont.tag != self.oootag ('office', 'document-content') : cont = root.find (self.oootag ('office', 'document-content')) tbody = cont.find (self.oootag ('office', 'body')) # OOo 2.X has an office:text inside office:body that contains # the real text contents: if self.mimetype == mimetypes [1] : cont = tbody tbody = cont.find (self.oootag ('office', 'text')) idx = cont [:].index (tbody) self.tbody = cont [idx] = self._textbody () self.declarations = self._divide (tbody) self.bodyparts = self.copyparts # end def divide_body def append_declarations (self) : for e in self.declarations : self.tbody.append (e) # end def append_declarations def append_to_body (self, cp) : for i in range (len (self.bodyparts)) : for j in cp [i] : self.bodyparts [i].append (j) # end def append_to_body def assemble_body (self) : for p in self.bodyparts : for e in p : self.tbody.append (e) # end def assemble_body def _get_meta (self, var, classname = 'Get_Attribute', prefix = "") : """ get page- and paragraph-count etc. meta-info """ return int (self.transformer [':'.join ((classname, prefix + var))]) # end def _get_meta def _set_meta (self, var, value, classname = 'Set_Attribute', prefix = "") : """ set page- and paragraph-count etc. meta-info """ self.transformer [':'.join ((classname, prefix + var))] = str (value) # end def _set_meta # end class _Body_Concat class Mailmerge (_Body_Concat) : """ This transformation is used to create a mailmerge document using the current document as the template. In the constructor we get an iterator that provides a data set for each item in the iteration. Elements the iterator has to provide are either something that follows the Mapping Type interface (it looks like a dict) or something that is callable and can be used for name-value lookups. A precondition for this transform is the application of the Addpagebreak_Style to guarantee that we know the style for adding a page break to the current document. Alternatively the stylename (or the stylekey if a different name should be used for lookup in the current transformer) can be given in the constructor. """ filename = 'content.xml' prio = 60 def __init__ \ (self, iterator, stylename = None, stylekey = None, ** kw) : self.__super.__init__ (** kw) self.iterator = iterator self.stylename = stylename self.stylekey = stylekey # end def __init__ def apply (self, root) : """ Copy old tbody, create new empty one and repeatedly append the new tbody. """ pb = Addpagebreak \ ( stylename = self.stylename , stylekey = self.stylekey , transformer = self.transformer ) zi = Attribute_Access \ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) , transformer = self.transformer ) zi.apply (root) pagecount = self._get_meta ('page-count') z_index = self._get_meta ('z-index', classname = 'Get_Max') + 1 ra = Attribute_Access \ ( ( Reanchor (pagecount, self.oootag ('draw', 'text-box')) , Reanchor (pagecount, self.oootag ('draw', 'rect')) , Reanchor (pagecount, self.oootag ('draw', 'frame')) , Reanchor (z_index, None, self.oootag ('draw', 'z-index')) ) , transformer = self.transformer # transformer added ) self.divide_body (root) self.bodyparts = [self._textbody () for i in self.copyparts] count = 0 for i in self.iterator : count += 1 fr = Field_Replace (replace = i, transformer = self.transformer) # add page break only to non-empty tbody # reanchor only after the first mailmerge if len (self.tbody) : # tbody non-empty (but existing!) pb.apply (self.bodyparts [-1]) ra.apply (self.copyparts) else : self.append_declarations () cp = deepcopy (self.copyparts) fr.apply (cp) self.append_to_body (cp) # new page-count: for i in meta_counts : self._set_meta (i, count * self._get_meta (i)) # we have added count-1 paragraphs, because each page-break is a # paragraph. p = 'paragraph-count' self._set_meta \ (p, self._get_meta (p, classname = 'Set_Attribute') + (count - 1)) self.assemble_body () # end def apply # end class Mailmerge def tree_serialise (element, prefix = '', mimetype = mimetypes [1]) : """ Serialise a style-element of an OOo document (e.g., a style:font-decl, style:default-style, etc declaration). We remove the name of the style and return something that is a representation of the style element which can be used as a dictionary key. The serialisation format is a tuple containing the tag as the first item, the attributes (as key,value pairs returned by items()) as the second item and the following items are serialisations of children. """ attr = dict (element.attrib) stylename = OOo_Tag ('style', 'name', mimetype) if stylename in attr : del attr [stylename] attr = attr.items () attr.sort () attr = tuple (attr) serial = [prefix + element.tag, attr] for e in element : serial.append (tree_serialise (e, prefix, mimetype)) return tuple (serial) # end def tree_serialise class Concatenate (_Body_Concat) : """ This transformation is used to create a new document from a concatenation of several documents. In the constructor we get a list of documents to append to the master document. """ prio = 80 style_containers = {} ref_attrs = {} for m in mimetypes : style_containers.update \ ({ OOo_Tag ('office', 'font-decls', m) : 1 , OOo_Tag ('office', 'font-face-decls', m) : 1 , OOo_Tag ('office', 'styles', m) : 1 , OOo_Tag ('office', 'automatic-styles', m) : 1 , OOo_Tag ('office', 'master-styles', m) : 1 }) # Cross-references in OOo document: # 'attribute' references another element with 'tag'. # If attribute names change, we must replace references, too. # attribute : # tag ref_attrs.update \ ({ OOo_Tag ('style', 'parent-style-name', m) : OOo_Tag ('style', 'style', m) , OOo_Tag ('style', 'master-page-name', m) : OOo_Tag ('style', 'master-page', m) , OOo_Tag ('style', 'page-layout-name', m) : # OOo 2.X OOo_Tag ('style', 'page-layout', m) , OOo_Tag ('style', 'page-master-name', m) : OOo_Tag ('style', 'page-master', m) , OOo_Tag ('table', 'style-name', m) : OOo_Tag ('style', 'style', m) , OOo_Tag ('text', 'style-name', m) : OOo_Tag ('style', 'style', m) , OOo_Tag ('draw', 'style-name', m) : OOo_Tag ('style', 'style', m) , OOo_Tag ('draw', 'text-style-name', m) : OOo_Tag ('style', 'style', m) }) stylefiles = ['styles.xml', 'content.xml'] oofiles = stylefiles + ['meta.xml'] body_decl_sections = ['variable-decl', 'sequence-decl'] def __init__ (self, * docs, ** kw) : self.__super.__init__ (** kw) self.docs = [] for doc in docs : self.docs.append (OOoPy (infile = doc)) assert (self.docs [-1].mimetype == self.docs [0].mimetype) # end def __init__ def apply_all (self, trees) : assert (self.docs [0].mimetype == self.transformer.mimetype) self.serialised = {} self.stylenames = {} self.namemaps = [{}] self.tab_depend = {} for s in self.ref_attrs.itervalues () : self.namemaps [0][s] = {} self.body_decls = {} for s in self.body_decl_sections : self.body_decls [s] = {} self.trees = {} for f in self.oofiles : self.trees [f] = [trees [f].getroot ()] self.sections = {} for f in self.stylefiles : self.sections [f] = {} for node in self.trees [f][0] : self.sections [f][node.tag] = node for d in self.docs : self.namemaps.append ({}) for s in self.ref_attrs.itervalues () : self.namemaps [-1][s] = {} for f in self.oofiles : self.trees [f].append (d.read (f).getroot ()) # append a pagebreak style, will be optimized away if duplicate pbs = Addpagebreak_Style (transformer = self.transformer) pbs.apply (self.trees ['content.xml'][0]) get_attr = [] for attr in meta_counts : a = self.oootag ('meta', attr) t = self.oootag ('meta', 'document-statistic') get_attr.append (Get_Attribute (t, a, 'concat-' + attr)) zi = Attribute_Access \ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'z-index'),) , transformer = self.transformer ) zi.apply (self.trees ['content.xml'][0]) self.zi = Attribute_Access \ ( (Get_Max (None, self.oootag ('draw', 'z-index'), 'concat-z-index') , ) , transformer = self.transformer ) self.getmeta = Attribute_Access \ (get_attr, filename = 'meta.xml', transformer = self.transformer) self.pbname = self.transformer \ [':'.join (('Addpagebreak_Style', 'stylename'))] for s in self.trees ['styles.xml'][0].findall \ ('.//' + self.oootag ('style', 'default-style')) : if s.get (self.oootag ('style', 'family')) == 'paragraph' : default_style = s break self.default_properties = default_style.find \ ('./' + self.properties_tag) self.set_pagestyle () for f in 'styles.xml', 'content.xml' : self.style_merge (f) self.body_concat () self.append_pictures () # end def apply_all def apply_tab_correction (self, node) : """ Check if node depends on a style which has corrected tabs if yes, insert all the default tabs *after* the maximum tab position in that style. """ tab_stops = self.oootag ('style', 'tab-stops') tab_stop = self.oootag ('style', 'tab-stop') tab_pos = self.oootag ('style', 'position') parent = node.get (self.oootag ('style', 'parent-style-name')) if parent in self.tab_depend : for prop in node : if prop.tag != self.properties_tag : continue for sub in prop : if sub.tag == tab_stops : self.tab_depend [parent] = 1 max = 0 for ts in sub : assert (ts.tag == tab_stop) pos = float (ts.get (tab_pos) [:-2]) if max < pos : max = pos self.insert_tabs (sub, max) # end def apply_tab_correction def _attr_rename (self, idx) : r = sum \ ( [ set_attributes_from_dict (None, k, self.namemaps [idx][v]) for k,v in self.ref_attrs.iteritems () ] , [] ) return Attribute_Access (r, transformer = self.transformer) # end def _attr_rename def body_concat (self) : count = {} for i in meta_counts : count [i] = self._get_meta (i) count ['z-index'] = self._get_meta \ ('z-index', classname = 'Get_Max') + 1 pb = Addpagebreak \ (stylename = self.pbname, transformer = self.transformer) self.divide_body (self.trees ['content.xml'][0]) self.body_decl (self.declarations, append = 0) for idx in range (1, len (self.docs) + 1) : meta = self.trees ['meta.xml'][idx] content = self.trees ['content.xml'][idx] tbody = self.find_tbody (content) self.getmeta.apply (meta) self.zi.apply (tbody) ra = Attribute_Access \ ( ( Reanchor (count ['page-count'], self.oootag ('draw', 'text-box')) , Reanchor (count ['page-count'], self.oootag ('draw', 'rect')) , Reanchor (count ['page-count'], self.oootag ('draw', 'frame')) , Reanchor (count ['z-index'], None, self.oootag ('draw', 'z-index')) ) , transformer = self.transformer # transformer added ) for i in meta_counts : count [i] += self._get_meta (i, prefix = 'concat-') count ['paragraph-count'] += 1 count ['z-index'] += self._get_meta \ ('z-index', classname = 'Get_Max', prefix = 'concat-') + 1 namemap = self.namemaps [idx][self.oootag ('style', 'style')] tr = self._attr_rename (idx) pb.apply (self.bodyparts [-1]) tr.apply (content) ra.apply (content) declarations = self._divide (tbody) self.body_decl (declarations) self.append_to_body (self.copyparts) self.append_declarations () self.assemble_body () for i in meta_counts : self._set_meta (i, count [i]) # end def body_concat def body_decl (self, decl_section, append = 1) : for sect in self.body_decl_sections : s = self.declarations.find \ ('.//' + self.oootag ('text', sect + 's')) d = self.body_decls [sect] t = self.oootag ('text', sect) for n in decl_section.findall ('.//' + t) : name = n.get (self.oootag ('text', 'name')) if name not in d : if append and s is not None : s.append (n) d [name] = 1 # end def body_decl def insert_tabs (self, element, max = 0) : """ Insert tab stops into the current element. Optionally after max = the current maximum tab-position """ dist_tag = self.oootag ('style', 'tab-stop-distance') for k in range (1, len (self.tab_correct)) : if self.tab_correct [-k].isdigit() : break l = float (self.tab_correct [:-k]) unit = self.tab_correct [-k:] for ts in range (35) : pos = l * (ts + 1) if pos > max : SubElement \ ( element , self.oootag ('style', 'tab-stop') , { self.oootag ('style', 'position') : '%s%s' % (pos, unit) } ) # end def insert_tabs def merge_defaultstyle (self, default_style, node) : assert default_style is not None assert node is not None proppath = './' + self.properties_tag defprops = default_style.find (proppath) props = node.find (proppath) sn = self.oootag ('style', 'name') if props is None : props = Element (self.properties_tag) for k, v in defprops.attrib.iteritems () : if self.default_properties.get (k) != v and not props.get (k) : if k == self.oootag ('style', 'tab-stop-distance') : self.tab_correct = v self.tab_depend = {node.get (sn) : 1} stps = SubElement \ (props, self.oootag ('style', 'tab-stops')) self.insert_tabs (stps) else : props.set (k,v) if len (props) or props.attrib : node.append (props) # end def merge_defaultstyle def _newname (self, key, oldname) : stylenum = 0 if (key, oldname) not in self.stylenames : self.stylenames [(key, oldname)] = 1 return oldname newname = basename = 'Concat_%s' % oldname while (key, newname) in self.stylenames : stylenum += 1 newname = '%s%d' % (basename, stylenum) self.stylenames [(key, newname)] = 1 return newname # end def _newname def set_pagestyle (self) : """ For all documents: search for the first paragraph of the tbody and get its style. Modify this style to include a reference to the default page-style if it doesn't contain a reference to a page style. Insert the new style into the list of styles and modify the first paragraph to use the new page style. This procedure is necessary to make appended documents use their page style instead of the master page style of the first document. FIXME: We should search the style hierarchy backwards for the style of the first paragraph to check if there is a reference to a page-style somewhere and not override the page-style in this case. Otherwise appending complex documents that use a different page-style for the first page will not work if the page style is referenced in a style from which the first paragraph style derives. """ for idx in range (1, len (self.docs) + 1) : croot = self.trees ['content.xml'][idx] sroot = self.trees ['styles.xml'] [idx] tbody = self.find_tbody (croot) para = tbody.find ('./' + self.oootag ('text', 'p')) if para is None : para = tbody.find ('./' + self.oootag ('text', 'list')) tsn = self.oootag ('text', 'style-name') sname = para.get (tsn) styles = croot.find (self.oootag ('office', 'automatic-styles')) ost = sroot.find (self.oootag ('office', 'styles')) mst = sroot.find (self.oootag ('office', 'master-styles')) assert mst is not None and len (mst) assert mst [0].tag == self.oootag ('style', 'master-page') sntag = self.oootag ('style', 'name') master = mst [0].get (sntag) mpn = self.oootag ('style', 'master-page-name') stytag = self.oootag ('style', 'style') style = None for s in styles : if s.tag == stytag : # Explicit references to default style converted to # explicit references to new page style. if s.get (mpn) == '' : s.set (mpn, master) if s.get (sntag) == sname : style = s if style is None : for s in ost : if s.tag == stytag and s.get (sntag) == sname : style = s break if style is not None and not style.get (mpn) : newstyle = deepcopy (style) # Don't register with newname: will be rewritten later # when appending. We assume that an original doc does # not already contain a style with _Concat suffix. newname = sname + '_Concat' para.set (tsn, newname) newstyle.set (self.oootag ('style', 'name'), newname) newstyle.set (mpn, master) styles.append (newstyle) # end def set_pagestyle def style_merge (self, oofile) : """ Loop over all the docs in our document list and look up the styles there. If a style matches an existing style in the original document, register the style name for later transformation if the style name in the original document does not match the style name in the appended document. If no match is found, append style to master document and add to serialisation. If the style name already exists in the master document, a new style name is created. Names of parent styles are changed when appending -- this means that parent style names already have to be defined earlier in the document. If there is a reference to a parent style that is not yet defined, and the parent style is defined later, it is already too late, so an assertion is raised in this case. OOo seems to ensure declaration order of dependent styles, so this should not be a problem. """ for idx in range (len (self.trees [oofile])) : namemap = self.namemaps [idx] root = self.trees [oofile][idx] delnode = [] for nodeidx, node in enumerate (root) : if node.tag not in self.style_containers : continue prefix = '' # font_decls may have same name in styles.xml and content.xml if node.tag == self.font_decls_tag : prefix = oofile default_style = None for n in node : if ( n.tag == self.oootag ('style', 'default-style') and ( n.get (self.oootag ('style', 'family')) == 'paragraph' ) ) : default_style = n name = n.get (self.oootag ('style', 'name'), None) if not name : continue if ( idx != 0 and name == 'Standard' and n.get (self.oootag ('style', 'class')) == 'text' and ( n.get (self.oootag ('style', 'family')) == 'paragraph' ) ) : self.merge_defaultstyle (default_style, n) self.apply_tab_correction (n) key = prefix + n.tag if key not in namemap : namemap [key] = {} tr = self._attr_rename (idx) tr.apply (n) sn = tree_serialise (n, prefix, self.mimetype) if sn in self.serialised : newname = self.serialised [sn] if name != newname : assert \ ( name not in namemap [key] or namemap [key][name] == newname ) namemap [key][name] = newname # optimize original doc: remove duplicate styles if not idx and node.tag != self.font_decls_tag : pass #delnode.append (nodeidx) else : newname = self._newname (key, name) self.serialised [sn] = newname if newname != name : n.set (self.oootag ('style', 'name'), newname) dn = self.oootag ('style', 'display-name') disp_name = n.get (dn) if disp_name : n.set (dn, 'Concat ' + disp_name) namemap [key][name] = newname if idx != 0 : self.sections [oofile][node.tag].append (n) assert not delnode or not idx delnode.reverse () for i in delnode : del node [i] # end style_merge def append_pictures (self) : for doc in self.docs : for f in doc.izip.infolist () : if f.filename.startswith ('Pictures/') : self.transformer.appendfiles.append \ ((f.filename, doc.izip.read (f.filename))) # end def append_pictures # end class Concatenate def renumber_frames (mimetype) : return \ [ Renumber (OOo_Tag ('draw', 'text-box', mimetype), 'Frame') # OOo 1.X , Renumber (OOo_Tag ('draw', 'frame', mimetype), 'Frame') # OOo 2.X ] # end def renumber_frames def renumber_sections (mimetype) : return [Renumber (OOo_Tag ('text', 'section', mimetype))] # end def renumber_sections def renumber_tables (mimetype) : return [Renumber (OOo_Tag ('table', 'table', mimetype))] # end def renumber_tables def renumber_images (mimetype) : return [Renumber (OOo_Tag ('draw', 'image', mimetype))] # end def renumber_images def renumber_xml_id (mimetype) : if mimetype == mimetypes [0] : return [] xmlid = OOo_Tag ('xml', 'id', mimetype) return [Renumber (OOo_Tag ('text', 'list', mimetype), 'list', xmlid)] # end def renumber_xml_id def renumber_all (mimetype) : """ Factory function for all renumberings parameterized with mimetype """ return Attribute_Access \ ( renumber_frames (mimetype) + renumber_sections (mimetype) + renumber_tables (mimetype) + renumber_images (mimetype) + renumber_xml_id (mimetype) ) # end def renumber_all # used to have a separate Pagecount transform -- generalized to get # some of the meta information using an Attribute_Access transform # and set the same information later after possibly being updated by # other transforms. We use another naming convention here for storing # the info retrieved from the OOo document: We use the attribute name in # the meta-information to store (and later retrieve) the information. def get_meta (mimetype) : """ Factory function for Attribute_Access to get all interesting meta-data """ get_attr = [] for attr in meta_counts : a = OOo_Tag ('meta', attr, mimetype) t = OOo_Tag ('meta', 'document-statistic', mimetype) get_attr.append (Get_Attribute (t, a, attr)) return Attribute_Access (get_attr, prio = 20, filename = 'meta.xml') # end def get_meta def set_meta (mimetype) : """ Factory function for Attribute_Access to set all interesting meta-data """ set_attr = [] for attr in meta_counts : a = OOo_Tag ('meta', attr, mimetype) t = OOo_Tag ('meta', 'document-statistic', mimetype) set_attr.append (Set_Attribute (t, a, attr)) return Attribute_Access (set_attr, prio = 120, filename = 'meta.xml') # end def set_meta