Apache PDFBox Adding Metadata to PDF Document in Java
Tags: Java Apache PDFBox PDF
Introduction
This toturial will show you how to adding metadata information such as title, creation date, modification date, author, creator tool, producer keywords etc. and custom properties to a new PDF document or the existing PDF file in Java using Apache PDFBox library.
Apache PDFBox Overview
The Apache PDFBox is an open source library for working with PDF documents in Java. You can get more information about the project at pdfbox.apache.org
Adding Apache PDFBox Dependencies
Adding below dependencies to build.gradle file if you are using Gradle build tool.
compile group: 'org.apache.pdfbox', name: 'pdfbox', version: '2.0.18'
compile group: 'org.apache.pdfbox', name: 'xmpbox', version: '2.0.18'
Adding below XML to pom.xml file if you are using Maven build tool.
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>pdfbox</artifactId>
<version>2.0.18</version>
</dependency>
<dependency>
<groupId>org.apache.pdfbox</groupId>
<artifactId>xmpbox</artifactId>
<version>2.0.18</version>
</dependency>
Or you can download the pdfbox-2.0.18.jar and xmpbox-2.0.18.jar files from pdfbox.apache.org/download.cgi
Step 1 - Creating empty PDF document and add a blank page
try(PDDocument document = new PDDocument()) {
PDPage page = new PDPage();
document.addPage(page);
} catch (IOException e) {
e.printStackTrace();
}
Step 2 - Adding Metadata to the PDF document
PDDocumentInformation documentInformation = new PDDocumentInformation();
documentInformation.setTitle("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setSubject("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setAuthor("Simple Solution");
documentInformation.setCreator("Java Application");
documentInformation.setProducer("Simple Solution");
documentInformation.setKeywords("Java, Pdf Document, PDFBox, Simple Solution");
documentInformation.setCreationDate(Calendar.getInstance());
documentInformation.setModificationDate(Calendar.getInstance());
documentInformation.setCustomMetadataValue("Website", "https://simplesolution.dev");
documentInformation.setCustomMetadataValue("Email", "contact@simplesolution.dev");
document.setDocumentInformation(documentInformation);
PDDocumentCatalog catalog = document.getDocumentCatalog();
XMPMetadata metadata = XMPMetadata.createXMPMetadata();
AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setKeywords(documentInformation.getKeywords());
pdfSchema.setProducer(documentInformation.getProducer());
XMPBasicSchema basicSchema = metadata.createAndAddXMPBasicSchema();
basicSchema.setCreateDate(documentInformation.getCreationDate());
basicSchema.setModifyDate(documentInformation.getModificationDate());
basicSchema.setCreatorTool(documentInformation.getCreator());
basicSchema.setMetadataDate(documentInformation.getCreationDate());
DublinCoreSchema dcSchema = metadata.createAndAddDublinCoreSchema();
dcSchema.setTitle(documentInformation.getTitle());
dcSchema.addCreator(documentInformation.getAuthor());
dcSchema.setDescription(documentInformation.getSubject());
PDMetadata metadataStream = new PDMetadata(document);
catalog.setMetadata(metadataStream);
XmpSerializer xmpSerializer = new XmpSerializer();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
xmpSerializer.serialize(metadata, byteArrayOutputStream, false);
metadataStream.importXMPMetadata(byteArrayOutputStream.toByteArray());
Step 3 - Save PDF document
document.save("D:\\SimpleSolution\\MetaDataDocument.pdf");
Complete Application Source Code
Below is the complete Java application code to create a new PDF file with an empty page and save it at D:\SimpleSolution\MetaDataDocument.pdf
The new PDF file going to be set the below metadata information:
- Title: Apache PDFBox Adding Metadata PDF Document in Java
- Subject: Apache PDFBox Adding Metadata PDF Document in Java
- Author: Simple Solution
- Creator: Java Application
- Producer: Simple Solution
- Keywords: Java, Pdf Document, PDFBox, Simple Solution
- Creation Date: current date and time
- Modification Date: current date and time
And two custom properties:
- Website: https://simplesolution.dev
- Email: contact@simplesolution.dev
package dev.simplesolution;
import java.io.ByteArrayOutputStream;
import java.io.IOException;
import java.util.Calendar;
import javax.xml.transform.TransformerException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.PDPage;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.xml.XmpSerializer;
public class MetaDataPdfDocument {
public static void main(String... args) {
try(PDDocument document = new PDDocument()) {
PDPage page = new PDPage();
document.addPage(page);
PDDocumentInformation documentInformation = new PDDocumentInformation();
documentInformation.setTitle("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setSubject("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setAuthor("Simple Solution");
documentInformation.setCreator("Java Application");
documentInformation.setProducer("Simple Solution");
documentInformation.setKeywords("Java, Pdf Document, PDFBox, Simple Solution");
documentInformation.setCreationDate(Calendar.getInstance());
documentInformation.setModificationDate(Calendar.getInstance());
documentInformation.setCustomMetadataValue("Website", "https://simplesolution.dev");
documentInformation.setCustomMetadataValue("Email", "contact@simplesolution.dev");
document.setDocumentInformation(documentInformation);
PDDocumentCatalog catalog = document.getDocumentCatalog();
XMPMetadata metadata = XMPMetadata.createXMPMetadata();
AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setKeywords(documentInformation.getKeywords());
pdfSchema.setProducer(documentInformation.getProducer());
XMPBasicSchema basicSchema = metadata.createAndAddXMPBasicSchema();
basicSchema.setCreateDate(documentInformation.getCreationDate());
basicSchema.setModifyDate(documentInformation.getModificationDate());
basicSchema.setCreatorTool(documentInformation.getCreator());
basicSchema.setMetadataDate(documentInformation.getCreationDate());
DublinCoreSchema dcSchema = metadata.createAndAddDublinCoreSchema();
dcSchema.setTitle(documentInformation.getTitle());
dcSchema.addCreator(documentInformation.getAuthor());
dcSchema.setDescription(documentInformation.getSubject());
PDMetadata metadataStream = new PDMetadata(document);
catalog.setMetadata(metadataStream);
XmpSerializer xmpSerializer = new XmpSerializer();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
xmpSerializer.serialize(metadata, byteArrayOutputStream, false);
metadataStream.importXMPMetadata(byteArrayOutputStream.toByteArray());
document.save("D:\\SimpleSolution\\MetaDataDocument.pdf");
} catch (IOException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
Execute the application above it will create new document file at D:\SimpleSolution\MetaDataDocument.pdf then open the file with a PDF reader application we will get the file properties as below.
And custom properties information.
Update Metadata of existing PDF file
To update the metadata of existing PDF file we need to load the file by using PDDocument.load() static method.
PDDocument document = PDDocument.load(new File("D:\\SimpleSolution\\Document.pdf")
For example the complete Java appliaction below to update metadata of PDF file located at D:\SimpleSolution\Document.pdf
package dev.simplesolution;
import java.io.ByteArrayOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.Calendar;
import javax.xml.transform.TransformerException;
import org.apache.pdfbox.pdmodel.PDDocument;
import org.apache.pdfbox.pdmodel.PDDocumentCatalog;
import org.apache.pdfbox.pdmodel.PDDocumentInformation;
import org.apache.pdfbox.pdmodel.common.PDMetadata;
import org.apache.xmpbox.XMPMetadata;
import org.apache.xmpbox.schema.AdobePDFSchema;
import org.apache.xmpbox.schema.DublinCoreSchema;
import org.apache.xmpbox.schema.XMPBasicSchema;
import org.apache.xmpbox.xml.XmpSerializer;
public class MetaDataExistingPdfDocument {
public static void main(String... args) {
try(PDDocument document = PDDocument.load(new File("D:\\SimpleSolution\\Document.pdf"))) {
PDDocumentInformation documentInformation = new PDDocumentInformation();
documentInformation.setTitle("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setSubject("Apache PDFBox Adding Metadata PDF Document in Java");
documentInformation.setAuthor("Simple Solution");
documentInformation.setCreator("Java Application");
documentInformation.setProducer("Simple Solution");
documentInformation.setKeywords("Java, Pdf Document, PDFBox, Simple Solution");
documentInformation.setCreationDate(Calendar.getInstance());
documentInformation.setModificationDate(Calendar.getInstance());
documentInformation.setCustomMetadataValue("Website", "https://simplesolution.dev");
documentInformation.setCustomMetadataValue("Email", "contact@simplesolution.dev");
document.setDocumentInformation(documentInformation);
PDDocumentCatalog catalog = document.getDocumentCatalog();
XMPMetadata metadata = XMPMetadata.createXMPMetadata();
AdobePDFSchema pdfSchema = metadata.createAndAddAdobePDFSchema();
pdfSchema.setKeywords(documentInformation.getKeywords());
pdfSchema.setProducer(documentInformation.getProducer());
XMPBasicSchema basicSchema = metadata.createAndAddXMPBasicSchema();
basicSchema.setCreateDate(documentInformation.getCreationDate());
basicSchema.setModifyDate(documentInformation.getModificationDate());
basicSchema.setCreatorTool(documentInformation.getCreator());
basicSchema.setMetadataDate(documentInformation.getCreationDate());
DublinCoreSchema dcSchema = metadata.createAndAddDublinCoreSchema();
dcSchema.setTitle(documentInformation.getTitle());
dcSchema.addCreator(documentInformation.getAuthor());
dcSchema.setDescription(documentInformation.getSubject());
PDMetadata metadataStream = new PDMetadata(document);
catalog.setMetadata(metadataStream);
XmpSerializer xmpSerializer = new XmpSerializer();
ByteArrayOutputStream byteArrayOutputStream = new ByteArrayOutputStream();
xmpSerializer.serialize(metadata, byteArrayOutputStream, false);
metadataStream.importXMPMetadata(byteArrayOutputStream.toByteArray());
document.save("D:\\SimpleSolution\\Document.pdf");
} catch (IOException e) {
e.printStackTrace();
} catch (TransformerException e) {
e.printStackTrace();
}
}
}
Download Source Code
The source code in this article can be found at: github.com/simplesolutiondev/ApachePDFBoxMetaDataPdfDocument
or download at:
Happy Coding 😊
Related Articles
Creating PDF Document File in Java using Apache PDFBox
Insert Image to PDF Document in Java using Apache PDFBox
Creating Bookmarks for PDF Document in Java with Apache PDFBox
Creating PDF Document Page Labels in Java with Apache PDFBox