\documentclass[final,pdf,slideColor]{prosper} %-- Packages --------------------------------------------------------------- \usepackage[latin1]{inputenc} \usepackage[english]{babel} \usepackage[T1]{fontenc} \usepackage{url} \usepackage{amsmath} \usepackage{amssymb} \usepackage{epsfig} \usepackage{makeidx} %--------------------------------------------------------------------------- \title{Practical Office Automation} \subtitle{Hacking the OpenOffice.org File Format} \author{Jacob Sparre Andersen} \institution{\epsfig{file=gulch_logo.ps,width=80mm}} \email{[email protected]} \slideCaption{LinuxDay/Cagliari 2005: Practical Office Automation --- \url{http://edb.jacob-sparre.dk/foredrag/OOo/}} %--------------------------------------------------------------------------- \begin{document} \maketitle \begin{slide} {Ouverture} \begin{itemize} \item {\bf Subject:} \\ This talk is about extracting and using meta-data from the OpenOffice.org/OpenDocument file format. \item {\bf Audience:} \\ System administrators, system programmers and information system decision makers. \end{itemize} I will talk about what you can (tell your programmers to) do, if your documents are in an open format. Once I have told you what you {\em can} do, I will give you some examples of {\em how} to do it with standard Linux tools. ~ Feel free to ask questions at {\em any} time during the talk. \end{slide} % -------------------------------------------------------------------------- % Overview \begin{slide} {Overview (1)} \begin{itemize} \item {\em What you can do, if your documents are in an open format.} \item A look into an OpenOffice.org file. \item Indexing OpenOffice.org documents. \item Preventing document histories from leaking out through your firewall. \end{itemize} \end{slide} % -------------------------------------------------------------------------- % For managers \begin{slide} {Open file formats} \begin{quotation} The minimum requirements for an open standard are that the document format is completely described in publicly accessible documents, [\ldots] and that the document format may be implemented in programs without restrictions, royalty-free, and with no legal bindings. \end{quotation} {\small \url{http://europa.eu.int/idabc/servlets/Doc?id=17982}} \end{slide} \begin{slide} {Benefits from using open file formats} \begin{itemize} \item Not tied to a single software provider. \item Lower price on off-the-shelf software. \item Freedom to (make your programmers) implement special in-house tools. \item It is more likely that you can find Open Source programs which already solve your problems. \end{itemize} \end{slide} \begin{slide} {Ideas for special in-house tools} Since the OpenOffice.org/OpenDocument file format is open, you are free to (make your programmers) write special tools for manipulating the files: \begin{itemize} \item Extracting titles and keywords for automated document indices. \item Blocking documents containing their editing history from exiting through the corporate firewall. \item Warning authors about lacking project codes in documents. \item \ldots \end{itemize} Only your imagination (and your ability to explain your ideas) sets limits. \end{slide} % -------------------------------------------------------------------------- % Overview: \begin{slide} {Overview (2)} \begin{itemize} \item What you can do, if your documents are in an open format. \item {\em A look into an OpenOffice.org file.} \item Indexing OpenOffice.org documents. \item Preventing document histories from leaking out through your firewall. \end{itemize} \end{slide} % -------------------------------------------------------------------------- % An OOo file: \begin{slide} {Looking into an OpenOffice.org file (1)} \begin{verbatim} % unzip -l skriv-og-slet.sxw Length Date Time Name -------- ---- ---- ---- 30 11-14-05 11:40 mimetype 1958 11-14-05 11:40 content.xml 5979 11-14-05 11:40 styles.xml 1282 11-14-05 11:40 meta.xml 6280 11-14-05 11:40 settings.xml 752 11-14-05 11:40 META-INF/manifest.xml -------- ------- 16281 6 files \end{verbatim} \end{slide} \begin{slide} {Looking into an OpenOffice.org file (2)} \begin{verbatim} % unzip -ap skriv-og-slet.sxw meta.xml \ > | sed 's/></>\n</g' <?xml version="1.0" encoding="UTF-8"?> <!DOCTYPE office:document-meta PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "office.dtd"> <office:document-meta xmlns:office="http://openoffice.org/2000/office" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="http://openoffice.org/2000/meta" office:version="1.0"> <office:meta> <meta:generator>OpenOffice.org 1.1.4 (Unix)</meta:generator> <!--645(Build:8824)--> <dc:title>Writes and deletions</dc:title> <meta:creation-date>2005-11-14T12:31:10</meta:creation-date> <dc:date>2005-11-14T12:40:48</dc:date> <meta:keywords> \end{verbatim} \end{slide} \begin{slide} {Looking into an OpenOffice.org file (3)} {\small \begin{verbatim} % unzip -ap skriv-og-slet.sxw meta.xml \ > | sed 's/></>\n</g' \ > | grep '<meta:keyword>' <meta:keyword>OOo</meta:keyword> <meta:keyword>file format</meta:keyword> <meta:keyword>demonstration</meta:keyword> <meta:keyword>changes</meta:keyword> % \end{verbatim} } \end{slide} \begin{slide} {Looking into an OpenOffice.org file (4)} \begin{verbatim} % unzip -ap skriv-og-slet.sxw meta.xml \ > | sed 's/></>\n</g' \ > | grep '<dc:title>' <dc:title>Writes and deletions</dc:title> % \end{verbatim} \end{slide} \begin{slide} {Looking into an OpenOffice.org file (5)} \begin{verbatim} % unzip -ap skriv-og-slet.sxw content.xml \ > | sed 's/></>\n</g' \ > | grep '<text:tracked-changes>' <text:tracked-changes> % \end{verbatim} \end{slide} % -------------------------------------------------------------------------- % Overview: \begin{slide} {Overview (3)} \begin{itemize} \item What you can do, if your documents are in an open format. \item A look into an OpenOffice.org file. \item {\em Indexing OpenOffice.org documents.} \item Preventing document histories from leaking out through your firewall. \end{itemize} \end{slide} % -------------------------------------------------------------------------- \begin{slide} {Indexing OpenOffice.org documents} Practical demonstration of indexing of OpenOffice.org documents. \end{slide} % -------------------------------------------------------------------------- % Overview: \begin{slide} {Overview (4)} \begin{itemize} \item What you can do, if your documents are in an open format. \item A look into an OpenOffice.org file. \item Indexing OpenOffice.org documents. \item {\em Preventing document histories from leaking out through your firewall.} \end{itemize} \end{slide} % -------------------------------------------------------------------------- \begin{slide} {Checking for document histories} Practical demonstration of checking OpenOffice.org documents for change information. \end{slide} % -------------------------------------------------------------------------- \begin{slide} {Further information} \begin{itemize} \item A commented command history from the practical demonstrations will be published on {\small\url{http://edb.jacob-sparre.dk/foredrag/OOo/}} after the talk. \item Write me at \url{[email protected]} if you have questions related to the talk. \end{itemize} \vspace{8mm}\hspace{85mm} The End. \end{slide} % -------------------------------------------------------------------------- \end{document}