[daisy] Problem presavehook generating xinfo-info file

Bart Van den Abeele bvda at schaubroeck.be
Thu Oct 12 05:41:55 CDT 2006


Skipped content of type multipart/alternative-------------- next part --------------
/*
 * Copyright 2004 Outerthought bvba and Schaubroeck nv
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package be.schaubroeck.digidoc;

import org.outerj.daisy.repository.*;
import org.apache.avalon.framework.service.Serviceable;
import org.apache.avalon.framework.service.ServiceManager;
import org.apache.avalon.framework.service.ServiceException;
import org.apache.avalon.framework.activity.Disposable;
import org.apache.avalon.framework.configuration.Configurable;
import org.apache.avalon.framework.configuration.Configuration;
import org.apache.avalon.framework.configuration.ConfigurationException;
import org.apache.avalon.framework.logger.LogEnabled;
import org.apache.avalon.framework.logger.Logger;

import javax.imageio.ImageIO;
import java.awt.image.BufferedImage;
import java.awt.*;
import java.io.InputStream;
import java.io.ByteArrayOutputStream;
import java.util.Map;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Collections;

import EDU.oswego.cs.dl.util.concurrent.FIFOSemaphore;
import com.sun.image.codec.jpeg.JPEGImageDecoder;
import com.sun.image.codec.jpeg.JPEGCodec;
import com.sun.image.codec.jpeg.JPEGDecodeParam;

/* import com.drew.metadata.Metadata;
import com.drew.metadata.Directory;
import com.drew.metadata.MetadataException;
import com.drew.metadata.Tag;
import com.drew.metadata.exif.ExifDirectory;
import com.drew.imaging.jpeg.JpegMetadataReader;
*/


/**
 * A pre-save hook for extracting information from images and generating
 * thumbnail & preview images. It can be configured to work for multiple
 * document types.
 *
 * @avalon.component version="1.0" name="scandocument-postsavehook" lifestyle="singleton" 
 */
public class ScanDocumentPostSaveHook implements PreSaveHook, Serviceable, Disposable, Configurable, LogEnabled {
    private Logger logger;
    private ServiceManager serviceManager;
    private PreSaveHookRegistrar preSaveHookRegistrar;
    private FIFOSemaphore imageProcessingSemaphore;
    private Map<String, DoctypeConfig> docTypeConfigs = new HashMap<String, DoctypeConfig>();

    public void enableLogging(Logger logger) {
        this.logger = logger;
    }

    /**
     * @avalon.dependency key="presavehook-registrar" type="org.outerj.daisy.repository.PreSaveHookRegistrar"
     */
    public void service(ServiceManager serviceManager) throws ServiceException {
        this.serviceManager = serviceManager;
        preSaveHookRegistrar = (PreSaveHookRegistrar)serviceManager.lookup("postsavehook-registrar");
        preSaveHookRegistrar.registerPreSaveHook(this);
    }

    public void configure(Configuration configuration) throws ConfigurationException {
        int concurrentOperations = configuration.getChild("maxConcurrentOperations").getValueAsInteger();
        imageProcessingSemaphore = new FIFOSemaphore(concurrentOperations);

        Configuration[] configs = configuration.getChildren("config");
        for (int i = 0; i < configs.length; i++) {
            Configuration config = configs[i];
            String docType = config.getAttribute("documentType");

            if (docTypeConfigs.containsKey(docType)) {
                // user overwritten configs seem to come before built-in ones, so use the
                // first one
                continue;
            }

            DoctypeConfig dtconfig = new DoctypeConfig();
            dtconfig.enabled = config.getChild("enabled").getValueAsBoolean(true);
          /*  dtconfig.maxImageSize = config.getChild("maxImageSize").getValueAsInteger(3000000);
            dtconfig.imagePartName = config.getChild("imagePartName").getValue();
            dtconfig.widthFieldName = config.getChild("widthFieldName").getValue(null);
            dtconfig.heightFieldName = config.getChild("heightFieldName").getValue(null);
            dtconfig.previewPartName = config.getChild("previewPartName").getValue(null);
            dtconfig.previewMaxSize = config.getChild("previewMaxSize").getValueAsInteger(250);
            dtconfig.thumbnailPartName = config.getChild("thumbnailPartName").getValue(null);
            dtconfig.thumbnailMaxSize = config.getChild("thumbnailMaxSize").getValueAsInteger(125);
            */
/*
            Configuration[] metadatas = config.getChildren("metadata");
            for (int k = 0; k < metadatas.length; k++) {
                String type = metadatas[k].getAttribute("type");
                if (!METADATA_VALUE_GETTERS.containsKey(type))
                    throw new ConfigurationException("ImagePreSaveHook: metadata/@type has an invalid value: " + type + " at " + metadatas[k].getLocation());
                dtconfig.addMetadata(metadatas[k].getAttribute("tag"), metadatas[k].getAttribute("field"), type);
            }
*/
            docTypeConfigs.put(docType, dtconfig);
        }
    }

    public void dispose() {
        preSaveHookRegistrar.unregisterPreSaveHook(this);
        serviceManager.release(preSaveHookRegistrar);
    }

    public String getName() {
        return "scandocument-post-save-hook";
    }

    public void process(Document document, Repository repository) throws Exception {
        // The semaphore is to avoid that dozens of threads would start concurrently generating image thumbnails,
        // which makes little sense as this is not much I/O bound and could eat lots of memory
        imageProcessingSemaphore.acquire();
        try {
            String documentTypeName = repository.getRepositorySchema().getDocumentTypeById(document.getDocumentTypeId(), false).getName();
            DoctypeConfig dtconfig = (DoctypeConfig)docTypeConfigs.get(documentTypeName);
            if (dtconfig == null)
                return;
            if (!dtconfig.enabled)
                return;

            // First, clear out all data which is automatically assigned, so that if extraction of image
            // information fails for some reason, no old data is left in the automatically assigned parts and fields
       /*     if (dtconfig.previewPartName != null)
                document.deletePart(dtconfig.previewPartName);
            if (dtconfig.thumbnailPartName != null)
                document.deletePart(dtconfig.thumbnailPartName);
            if (dtconfig.widthFieldName != null)
                document.deleteField(dtconfig.widthFieldName);
            if (dtconfig.heightFieldName != null)
                document.deleteField(dtconfig.heightFieldName);
            Iterator metadataInfoIt = dtconfig.getMetadataInfoIterator();
            while (metadataInfoIt.hasNext()) {
                MetadataInfo metadataInfo = (MetadataInfo)metadataInfoIt.next();
                document.deleteField(metadataInfo.field);
            }

            if (!document.hasPart(dtconfig.imagePartName))
                return;

            Part part = document.getPart(dtconfig.imagePartName);

            // Protection against too large images
            long size = part.getSize();
            if (size > dtconfig.maxImageSize) {
                if (logger.isInfoEnabled()) {
                    logger.info("Skipped image information extraction as the image was too large.");
                }
            } else if (size == 0) {
                logger.info("Skipped image information extraction as the image size was unknown.");
            }

            // Read the image
            InputStream is = null;
            BufferedImage sourceImage;
            Metadata metadata = null;
            try {
                is = part.getDataStream();
                String mimeType = part.getMimeType();
                if (mimeType.equals("image/jpeg") || mimeType.equals("image/x-jpeg")) {
                    JPEGImageDecoder jpegDecoder = JPEGCodec.createJPEGDecoder(is);
                    sourceImage = jpegDecoder.decodeAsBufferedImage();
                    JPEGDecodeParam decodeParam = jpegDecoder.getJPEGDecodeParam();
                    metadata = JpegMetadataReader.readMetadata(decodeParam);
                } else {
                    sourceImage = ImageIO.read(is);
                }
            } finally {
                if (is != null)
                    is.close();
            }

            if (sourceImage == null) {
                // The image could not be read (unsupported format)
                return;
            }

            // Extract size info
            int width = sourceImage.getWidth();
            int height = sourceImage.getHeight();

            if (dtconfig.widthFieldName != null)
                document.setField(dtconfig.widthFieldName, new Long(width));
            if (dtconfig.heightFieldName != null)
                document.setField(dtconfig.heightFieldName, new Long(height));

            if (metadata != null && dtconfig.isSetMetadata()) {
                Directory dir = metadata.getDirectory(ExifDirectory.class);
                MetadataInfo metadataInfo;

                Iterator tagIt = dir.getTagIterator();
                while (tagIt.hasNext()) {
                    Tag tag = (Tag)tagIt.next();
                    if (logger.isDebugEnabled())
                        logger.debug("[" + tag.getTagType() + "] " + tag.getTagName() + " : " + tag.getDescription() + " (" + dir.getObject(tag.getTagType()).getClass().getName() + ") (" + dir.getObject(tag.getTagType()) + ")");
                    metadataInfo = dtconfig.getMetadataInfo(tag.getTagName());
                    if (metadataInfo != null) {
                        Object value = metadataInfo.getValueGetter().getValue(tag.getTagType(), dir);
                        document.setField(metadataInfo.field, value);
                    }
                }
            }

            // Create preview and thumbnail
            if (dtconfig.previewPartName != null) {
                BufferedImage previewImage = resizeImage(sourceImage, dtconfig.previewMaxSize);
                sourceImage = previewImage; // start thumbnail from here, this is often significantly faster
                ByteArrayOutputStream previewData = new ByteArrayOutputStream();
                ImageIO.write(previewImage, "jpg", previewData);
                document.setPart(dtconfig.previewPartName, "image/jpeg", previewData.toByteArray());
            }

            if (dtconfig.thumbnailPartName != null) {
                BufferedImage thumbnailImage = resizeImage(sourceImage, dtconfig.thumbnailMaxSize);
                ByteArrayOutputStream thumbnailData = new ByteArrayOutputStream();
                ImageIO.write(thumbnailImage, "jpg", thumbnailData);
                document.setPart(dtconfig.thumbnailPartName, "image/jpeg", thumbnailData.toByteArray());
            }*/
            
            document.setField("presaved", true);
        } finally {
            imageProcessingSemaphore.release();
        }
    }

    private BufferedImage resizeImage(BufferedImage sourceImage, int maxSize) {
        Image resizedImage;
        int width = sourceImage.getWidth();
        int height = sourceImage.getHeight();

        // if it's already small enough, do nothing
        if (Math.max(width, height) <= maxSize)
            return sourceImage;

        if (width > height)
            resizedImage = sourceImage.getScaledInstance(maxSize, (maxSize * height) / width, Image.SCALE_SMOOTH);
        else
            resizedImage = sourceImage.getScaledInstance((maxSize * width) / height, maxSize, Image.SCALE_SMOOTH);

        // Create the buffered image.
        BufferedImage bufferedImage = new BufferedImage(resizedImage.getWidth(null), resizedImage.getHeight(null),
                BufferedImage.TYPE_INT_RGB);
        // Copy image to buffered image.
        Graphics g = bufferedImage.createGraphics();

        // Clear background: important when having transparent images which otherwise use a black background
        g.setColor(Color.white);
        g.fillRect(0, 0, resizedImage.getWidth(null), resizedImage.getHeight(null));

        g.drawImage(resizedImage, 0, 0, null);
        g.dispose();
        return bufferedImage;
    }

    static class DoctypeConfig {
        public boolean enabled;
        public int maxImageSize;
        public String imagePartName;
        public String widthFieldName;
        public String heightFieldName;
        public String previewPartName;
        public int previewMaxSize;
        public String thumbnailPartName;
        public int thumbnailMaxSize;
        private Map metadatas;

        public boolean isSetMetadata() {
            return metadatas != null;
        }

     /*   public MetadataInfo getMetadataInfo(String tagName) {
            if (metadatas != null)
                return (MetadataInfo)metadatas.get(tagName);
            else
                return null;
        }*/

        public Iterator getMetadataInfoIterator() {
            if (metadatas != null)
                return metadatas.values().iterator();
            else
                return Collections.EMPTY_LIST.iterator();
        }

        public void addMetadata(String tagName, String field, String type) {
          /*  if (metadatas == null)
                metadatas = new HashMap();
            MetadataInfo metadataInfo = new MetadataInfo();
            metadataInfo.field = field;
            metadataInfo.type = type;
            metadatas.put(tagName, metadataInfo);*/
        }
    }

    static class MetadataInfo {
        public String field;
        public String type;

      /*  public MetadataValueGetter getValueGetter() {
            return (MetadataValueGetter)METADATA_VALUE_GETTERS.get(type);
        }*/
    }

  /*  static interface MetadataValueGetter {
        Object getValue(int tagType, Directory directory) throws MetadataException;
    }

    static class StringValueGetter implements MetadataValueGetter {
        public Object getValue(int tagType, Directory directory) {
            return directory.getString(tagType);
        }
    }

    static class DateValueGetter implements MetadataValueGetter {
        public Object getValue(int tagType, Directory directory) throws MetadataException {
            return directory.getDate(tagType);
        }
    }

    static class LongValueGetter implements MetadataValueGetter {
        public Object getValue(int tagType, Directory directory) throws MetadataException {
            return new Long(directory.getLong(tagType));
        }
    }

    static class DoubleValueGetter implements MetadataValueGetter {
        public Object getValue(int tagType, Directory directory) throws MetadataException {
            return new Double(directory.getDouble(tagType));
        }
    }

    static class DescriptionValueGetter implements MetadataValueGetter {
        public Object getValue(int tagType, Directory directory) throws MetadataException {
            return directory.getDescription(tagType);
        }
    }
*/
   /* private static Map METADATA_VALUE_GETTERS;
    static {
        METADATA_VALUE_GETTERS = new HashMap();
        METADATA_VALUE_GETTERS.put("description", new DescriptionValueGetter());
        METADATA_VALUE_GETTERS.put("string", new StringValueGetter());
        METADATA_VALUE_GETTERS.put("datetime", new DateValueGetter());
        METADATA_VALUE_GETTERS.put("long", new LongValueGetter());
        METADATA_VALUE_GETTERS.put("double", new DoubleValueGetter());
    }*/
}


More information about the daisy mailing list