ooopy/OOoPy-1.11/ooopy/OOoPy.py

318 lines
12 KiB
Python

#!/usr/bin/env python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2005-14 Dr. Ralf Schlatterbeck Open Source Consulting.
# Reichergasse 131, A-3411 Weidling.
# Web: http://www.runtux.com Email: office@runtux.com
# All rights reserved
# ****************************************************************************
#
# This library is free software; you can redistribute it and/or modify
# it under the terms of the GNU Library General Public License as
# published by the Free Software Foundation; either version 2 of the
# License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU Library General Public License for more details.
#
# You should have received a copy of the GNU Library General Public
# License along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
# ****************************************************************************
from __future__ import absolute_import
from zipfile import ZipFile, ZIP_DEFLATED, ZipInfo
try :
from StringIO import StringIO
except ImportError :
from io import StringIO
from datetime import datetime
try :
from xml.etree.ElementTree import ElementTree, fromstring, _namespace_map
except ImportError :
from elementtree.ElementTree import ElementTree, fromstring, _namespace_map
from tempfile import mkstemp
from ooopy.Version import VERSION
import os
class _autosuper (type) :
def __init__ (cls, name, bases, dict) :
super (_autosuper, cls).__init__ (name, bases, dict)
setattr (cls, "_%s__super" % name, super (cls))
# end def __init__
# end class _autosuper
class autosuper (object) :
__metaclass__ = _autosuper
def __init__ (self, *args, **kw) :
self.__super.__init__ ()
# end def __init__
# end class autosuper
files = \
[ 'content.xml'
, 'styles.xml'
, 'meta.xml'
, 'settings.xml'
, 'META-INF/manifest.xml'
]
mimetypes = \
[ 'application/vnd.sun.xml.writer'
, 'application/vnd.oasis.opendocument.text'
]
namespace_by_name = \
{ mimetypes [0] :
{ 'chart' : "http://openoffice.org/2000/chart"
, 'config' : "http://openoffice.org/2001/config"
, 'dc' : "http://purl.org/dc/elements/1.1/"
, 'dr3d' : "http://openoffice.org/2000/dr3d"
, 'draw' : "http://openoffice.org/2000/drawing"
, 'fo' : "http://www.w3.org/1999/XSL/Format"
, 'form' : "http://openoffice.org/2000/form"
, 'math' : "http://www.w3.org/1998/Math/MathML"
, 'meta' : "http://openoffice.org/2000/meta"
, 'number' : "http://openoffice.org/2000/datastyle"
, 'office' : "http://openoffice.org/2000/office"
, 'script' : "http://openoffice.org/2000/script"
, 'style' : "http://openoffice.org/2000/style"
, 'svg' : "http://www.w3.org/2000/svg"
, 'table' : "http://openoffice.org/2000/table"
, 'text' : "http://openoffice.org/2000/text"
, 'xlink' : "http://www.w3.org/1999/xlink"
, 'manifest' : "http://openoffice.org/2001/manifest"
}
, mimetypes [1] :
{ 'chart' : "urn:oasis:names:tc:opendocument:xmlns:chart:1.0"
, 'config' : "urn:oasis:names:tc:opendocument:xmlns:config:1.0"
, 'dc' : "http://purl.org/dc/elements/1.1/"
, 'dr3d' : "urn:oasis:names:tc:opendocument:xmlns:dr3d:1.0"
, 'draw' : "urn:oasis:names:tc:opendocument:xmlns:drawing:1.0"
, 'fo' : "urn:oasis:names:tc:opendocument:xmlns:"
"xsl-fo-compatible:1.0"
, 'form' : "urn:oasis:names:tc:opendocument:xmlns:form:1.0"
, 'math' : "http://www.w3.org/1998/Math/MathML"
, 'meta' : "urn:oasis:names:tc:opendocument:xmlns:meta:1.0"
, 'number' : "urn:oasis:names:tc:opendocument:xmlns:datastyle:1.0"
, 'office' : "urn:oasis:names:tc:opendocument:xmlns:office:1.0"
, 'officeooo': "http://openoffice.org/2009/office"
, 'script' : "urn:oasis:names:tc:opendocument:xmlns:script:1.0"
, 'style' : "urn:oasis:names:tc:opendocument:xmlns:style:1.0"
, 'svg' : "urn:oasis:names:tc:opendocument:xmlns:svg-compatible:1.0"
, 'table' : "urn:oasis:names:tc:opendocument:xmlns:table:1.0"
, 'text' : "urn:oasis:names:tc:opendocument:xmlns:text:1.0"
, 'xlink' : "http://www.w3.org/1999/xlink"
, 'manifest' : "urn:oasis:names:tc:opendocument:xmlns:manifest:1.0"
, 'tableooo' : "http://openoffice.org/2009/table"
, 'transformation' : "http://www.w3.org/2003/g/data-view#"
# OOo 1.X tags and some others:
, 'ooo' : "http://openoffice.org/2004/office"
, 'ooow' : "http://openoffice.org/2004/writer"
, 'oooc' : "http://openoffice.org/2004/calc"
, 'o_dom' : "http://www.w3.org/2001/xml-events"
, 'o_xforms' : "http://www.w3.org/2002/xforms"
, 'xs' : "http://www.w3.org/2001/XMLSchema"
, 'xsi' : "http://www.w3.org/2001/XMLSchema-instance"
# predefined xml namespace, see
# http://www.w3.org/TR/2006/REC-xml-names11-20060816/
# "It MAY, but need not, be declared, and MUST NOT be undeclared
# or bound to any other namespace name."
, 'xml' : "http://www.w3.org/XML/1998/namespace"
}
}
for mimetype in namespace_by_name.itervalues () :
for k, v in mimetype.iteritems () :
if v in _namespace_map :
assert (_namespace_map [v] == k)
_namespace_map [v] = k
class OOoElementTree (autosuper) :
"""
An ElementTree for OOo document XML members. Behaves like the
orginal ElementTree (in fact it delegates almost everything to a
real instance of ElementTree) except for the write method, that
writes itself back to the OOo XML file in the OOo zip archive it
came from.
"""
def __init__ (self, ooopy, zname, root) :
self.ooopy = ooopy
self.zname = zname
self.tree = ElementTree (root)
# end def __init__
def write (self) :
self.ooopy.write (self.zname, self.tree)
# end def write
def __getattr__ (self, name) :
"""
Delegate everything to our ElementTree attribute.
"""
if not name.startswith ('__') :
result = getattr (self.tree, name)
setattr (self, name, result)
return result
raise AttributeError (name)
# end def __getattr__
# end class OOoElementTree
class OOoPy (autosuper) :
"""
Wrapper for OpenOffice.org zip files (all OOo documents are
really zip files internally).
from ooopy.OOoPy import OOoPy
>>> o = OOoPy (infile = 'testfiles/test.sxw', outfile = 'out.sxw')
>>> o.mimetype
'application/vnd.sun.xml.writer'
>>> for f in files :
... e = o.read (f)
... e.write ()
...
>>> o.close ()
>>> o = OOoPy (infile = 'testfiles/test.odt', outfile = 'out2.odt')
>>> o.mimetype
'application/vnd.oasis.opendocument.text'
>>> for f in files :
... e = o.read (f)
... e.write ()
...
>>> o.append_file ('Pictures/empty', '')
>>> o.close ()
>>> o = OOoPy (infile = 'out2.odt')
>>> for f in o.izip.infolist () :
... print f.filename, f.create_system, f.compress_type
mimetype 0 8
content.xml 0 8
styles.xml 0 8
meta.xml 0 8
settings.xml 0 8
META-INF/manifest.xml 0 8
Pictures/empty 0 8
Configurations2/statusbar/ 0 0
Configurations2/accelerator/current.xml 0 8
Configurations2/floater/ 0 0
Configurations2/popupmenu/ 0 0
Configurations2/progressbar/ 0 0
Configurations2/menubar/ 0 0
Configurations2/toolbar/ 0 0
Configurations2/images/Bitmaps/ 0 0
Thumbnails/thumbnail.png 0 8
"""
def __init__ \
( self
, infile = None
, outfile = None
, write_mode = 'w'
, mimetype = None
) :
"""
Open an OOo document, if no outfile is given, we open the
file read-only. Otherwise the outfile has to be different
from the infile -- the python ZipFile can't deal with
read-write access. In case an outfile is given, we open it
in "w" mode as a zip file, unless write_mode is specified
(the only allowed case would be "a" for appending to an
existing file, see pythons ZipFile documentation for
details). If no infile is given, the user is responsible for
providing all necessary files in the resulting output file.
It seems that OOo needs to have the mimetype as the first
archive member (at least with mimetype as the first member
it works, the order may not be arbitrary) to recognize a zip
archive as an OOo file. When copying from a given infile, we
use the same order of elements in the resulting output. When
creating new elements we make sure the mimetype is the first
in the resulting archive.
Note that both, infile and outfile can either be filenames
or file-like objects (e.g. StringIO).
The mimetype is automatically determined if an infile is
given. If only writing is desired, the mimetype should be
set.
"""
assert (infile != outfile)
self.izip = self.ozip = None
if infile :
self.izip = ZipFile (infile, 'r', ZIP_DEFLATED)
if outfile :
self.ozip = ZipFile (outfile, write_mode, ZIP_DEFLATED)
self.written = {}
if mimetype :
self.mimetype = mimetype
elif self.izip :
self.mimetype = self.izip.read ('mimetype')
# end def __init__
def read (self, zname) :
"""
return an OOoElementTree object for the given OOo document
archive member name. Currently an OOo document contains the
following XML files::
* content.xml: the text of the OOo document
* styles.xml: style definitions
* meta.xml: meta-information (author, last changed, ...)
* settings.xml: settings in OOo
* META-INF/manifest.xml: contents of the archive
There is an additional file "mimetype" that always contains
the string "application/vnd.sun.xml.writer" for OOo 1.X files
and the string "application/vnd.oasis.opendocument.text" for
OOo 2.X files.
"""
assert (self.izip)
return OOoElementTree (self, zname, fromstring (self.izip.read (zname)))
# end def read
def _write (self, zname, str) :
now = datetime.utcnow ().timetuple ()
info = ZipInfo (zname, date_time = now)
info.create_system = 0 # pretend to be fat
info.compress_type = ZIP_DEFLATED
self.ozip.writestr (info, str)
self.written [zname] = 1
# end def _write
def write (self, zname, etree) :
assert (self.ozip)
# assure mimetype is the first member in new archive
if 'mimetype' not in self.written :
self._write ('mimetype', self.mimetype)
str = StringIO ()
etree.write (str)
self._write (zname, str.getvalue ())
# end def write
def append_file (self, zname, str) :
""" Official interface to _write: Append a file to the end of
the archive.
"""
if zname not in self.written :
self._write (zname, str)
# end def append_file
def close (self) :
"""
Close the zip files. According to documentation of zipfile in
the standard python lib, this has to be done to be sure
everything is written. We copy over the not-yet written files
from izip before closing ozip.
"""
if self.izip and self.ozip :
for f in self.izip.infolist () :
if f.filename not in self.written :
self.ozip.writestr (f, self.izip.read (f.filename))
for i in self.izip, self.ozip :
if i : i.close ()
self.izip = self.ozip = None
# end def close
__del__ = close # auto-close on deletion of object
# end class OOoPy