\documentclass[final,pdf,slideColor]{prosper}
%-- Packages ---------------------------------------------------------------
\usepackage[latin1]{inputenc}
\usepackage[english]{babel}
\usepackage[T1]{fontenc}
\usepackage{url}
\usepackage{amsmath}
\usepackage{amssymb}
\usepackage{epsfig}
\usepackage{makeidx}
%---------------------------------------------------------------------------
\title{Practical Office Automation}
\subtitle{Hacking the OpenOffice.org File Format}
\author{Jacob Sparre Andersen}
\institution{\epsfig{file=gulch_logo.ps,width=80mm}}
\email{[email protected]}
\slideCaption{LinuxDay/Cagliari 2005: Practical Office Automation ---
\url{http://edb.jacob-sparre.dk/foredrag/OOo/}}
%---------------------------------------------------------------------------
\begin{document}
\maketitle
\begin{slide}
{Ouverture}
\begin{itemize}
\item {\bf Subject:} \\
This talk is about extracting and using meta-data
from the OpenOffice.org/OpenDocument file format.
\item {\bf Audience:} \\
System administrators, system programmers and
information system decision makers.
\end{itemize}
I will talk about what you can (tell your programmers to) do,
if your documents are in an open
format. Once I have told you what you {\em can} do, I will give
you some examples of {\em how} to do it with standard Linux tools.
~
Feel free to ask questions at {\em any} time during the talk.
\end{slide}
% --------------------------------------------------------------------------
% Overview
\begin{slide}
{Overview (1)}
\begin{itemize}
\item {\em What you can do, if your documents are in an open
format.}
\item A look into an OpenOffice.org file.
\item Indexing OpenOffice.org documents.
\item Preventing document histories from leaking out through your
firewall.
\end{itemize}
\end{slide}
% --------------------------------------------------------------------------
% For managers
\begin{slide}
{Open file formats}
\begin{quotation}
The minimum requirements for an open standard are that the
document format is completely described in publicly accessible
documents, [\ldots] and that the document format may be
implemented in programs without restrictions, royalty-free, and
with no legal bindings.
\end{quotation}
{\small \url{http://europa.eu.int/idabc/servlets/Doc?id=17982}}
\end{slide}
\begin{slide}
{Benefits from using open file formats}
\begin{itemize}
\item Not tied to a single software provider.
\item Lower price on off-the-shelf software.
\item Freedom to (make your programmers) implement special
in-house tools.
\item It is more likely that you can find Open Source programs
which already solve your problems.
\end{itemize}
\end{slide}
\begin{slide}
{Ideas for special in-house tools}
Since the OpenOffice.org/OpenDocument file format is open, you
are free to (make your programmers) write
special tools for manipulating the files:
\begin{itemize}
\item Extracting titles and keywords for automated document indices.
\item Blocking documents containing their editing history from
exiting through the corporate firewall.
\item Warning authors about lacking project codes in documents.
\item \ldots
\end{itemize}
Only your imagination (and your ability to explain your ideas)
sets limits.
\end{slide}
% --------------------------------------------------------------------------
% Overview:
\begin{slide}
{Overview (2)}
\begin{itemize}
\item What you can do, if your documents are in an open format.
\item {\em A look into an OpenOffice.org file.}
\item Indexing OpenOffice.org documents.
\item Preventing document histories from leaking out through your
firewall.
\end{itemize}
\end{slide}
% --------------------------------------------------------------------------
% An OOo file:
\begin{slide}
{Looking into an OpenOffice.org file (1)}
\begin{verbatim}
% unzip -l skriv-og-slet.sxw
Length Date Time Name
-------- ---- ---- ----
30 11-14-05 11:40 mimetype
1958 11-14-05 11:40 content.xml
5979 11-14-05 11:40 styles.xml
1282 11-14-05 11:40 meta.xml
6280 11-14-05 11:40 settings.xml
752 11-14-05 11:40 META-INF/manifest.xml
-------- -------
16281 6 files
\end{verbatim}
\end{slide}
\begin{slide}
{Looking into an OpenOffice.org file (2)}
\begin{verbatim}
% unzip -ap skriv-og-slet.sxw meta.xml \
> | sed 's/></>\n</g'
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE office:document-meta PUBLIC "-//OpenOffice.org//DTD OfficeDocument 1.0//EN" "office.dtd">
<office:document-meta xmlns:office="http://openoffice.org/2000/office" xmlns:xlink="http://www.w3.org/1999/xlink" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:meta="http://openoffice.org/2000/meta" office:version="1.0">
<office:meta>
<meta:generator>OpenOffice.org 1.1.4 (Unix)</meta:generator>
<!--645(Build:8824)-->
<dc:title>Writes and deletions</dc:title>
<meta:creation-date>2005-11-14T12:31:10</meta:creation-date>
<dc:date>2005-11-14T12:40:48</dc:date>
<meta:keywords>
\end{verbatim}
\end{slide}
\begin{slide}
{Looking into an OpenOffice.org file (3)}
{\small
\begin{verbatim}
% unzip -ap skriv-og-slet.sxw meta.xml \
> | sed 's/></>\n</g' \
> | grep '<meta:keyword>'
<meta:keyword>OOo</meta:keyword>
<meta:keyword>file format</meta:keyword>
<meta:keyword>demonstration</meta:keyword>
<meta:keyword>changes</meta:keyword>
%
\end{verbatim}
}
\end{slide}
\begin{slide}
{Looking into an OpenOffice.org file (4)}
\begin{verbatim}
% unzip -ap skriv-og-slet.sxw meta.xml \
> | sed 's/></>\n</g' \
> | grep '<dc:title>'
<dc:title>Writes and deletions</dc:title>
%
\end{verbatim}
\end{slide}
\begin{slide}
{Looking into an OpenOffice.org file (5)}
\begin{verbatim}
% unzip -ap skriv-og-slet.sxw content.xml \
> | sed 's/></>\n</g' \
> | grep '<text:tracked-changes>'
<text:tracked-changes>
%
\end{verbatim}
\end{slide}
% --------------------------------------------------------------------------
% Overview:
\begin{slide}
{Overview (3)}
\begin{itemize}
\item What you can do, if your documents are in an open format.
\item A look into an OpenOffice.org file.
\item {\em Indexing OpenOffice.org documents.}
\item Preventing document histories from leaking out through your
firewall.
\end{itemize}
\end{slide}
% --------------------------------------------------------------------------
\begin{slide}
{Indexing OpenOffice.org documents}
Practical demonstration of indexing of OpenOffice.org documents.
\end{slide}
% --------------------------------------------------------------------------
% Overview:
\begin{slide}
{Overview (4)}
\begin{itemize}
\item What you can do, if your documents are in an open format.
\item A look into an OpenOffice.org file.
\item Indexing OpenOffice.org documents.
\item {\em Preventing document histories from leaking out through
your firewall.}
\end{itemize}
\end{slide}
% --------------------------------------------------------------------------
\begin{slide}
{Checking for document histories}
Practical demonstration of checking OpenOffice.org documents for
change information.
\end{slide}
% --------------------------------------------------------------------------
\begin{slide}
{Further information}
\begin{itemize}
\item A commented command history from the practical
demonstrations will be published on
{\small\url{http://edb.jacob-sparre.dk/foredrag/OOo/}} after the
talk.
\item Write me at \url{[email protected]} if you have questions
related to the talk.
\end{itemize}
\vspace{8mm}\hspace{85mm} The End.
\end{slide}
% --------------------------------------------------------------------------
\end{document}