Tuesday, 12 May 2015

Class for Extracting Each Images from a Pdf file using PDF sharp.!

using System;
using System.Collections.Generic;
using System.Linq;
using System.Web;
using System.IO;
//using System.Web.UI.WebControls;
using System.Drawing;
using System.Drawing.Imaging;
using PdfSharp.Pdf;
using PdfSharp.Drawing;
using System.Text;
using PdfSharp.Pdf.IO;
using PdfSharp.Pdf.Advanced;


namespace OfficeConverter
{
    public class Pdf2Image
    {
        //http://www.pdfsharp.net/wiki/ExportImages-sample.ashx?Code=1
        // Retrive PageCount of a multi-page tiff image
        // Fuction will accept pdf file with full path and split the file in to
        //individual files as per pages and create a folder i folder name.


        public int ExtractPagestoJpeg(string sourcePdfPath, string DestinationFolder)
        {

            int i = 0;
            string filename = sourcePdfPath;


            PdfDocument document = PdfSharp.Pdf.IO.PdfReader.Open(filename);



            int imageCount = 0;

            // Iterate pages

            foreach (PdfPage page in document.Pages)
            {

                // Get resources dictionary

                PdfDictionary resources = page.Elements.GetDictionary("/Resources");

                if (resources != null)
                {

                    // Get external objects dictionary

                    PdfDictionary xObjects = resources.Elements.GetDictionary("/XObject");

                    if (xObjects != null)
                    {
                        ICollection<PdfSharp.Pdf.PdfItem> items = xObjects.Elements.Values;

                        // Iterate references to external objects
                        int z = 0;
                        foreach (PdfItem item in items)
                        {
                            z = 1;
                            PdfReference reference = item as PdfReference;

                            if (reference != null)
                            {

                                PdfDictionary xObject = reference.Value as PdfDictionary;

                                // Is external object an image?

                                if (xObject != null && xObject.Elements.GetString("/Subtype") == "/Image")
                                {

                                    ExportImage(xObject, ref imageCount, DestinationFolder, z);
                                    z++;
                                }

                            }

                        }

                    }

                }

            }
            return i;

        }

        static void ExportImage(PdfDictionary image, ref int count, string DestinationFolder, int z)
        {

            string filter = image.Elements.GetName("/Filter");

            switch (filter)
            {

                case "/DCTDecode":

                    ExportJpegImage(image, ref count, DestinationFolder, z);

                    break;



                case "/FlateDecode":
                    ExportAsPngImage(image, ref count);


                    break;

            }

        }

        static void ExportJpegImage(PdfDictionary image, ref int count, string DestinationFolder, int z)
        {

            // Fortunately JPEG has native support in PDF and exporting an image is just writing the stream to a file.

            byte[] stream = image.Stream.Value;


            FileStream fs = new FileStream(String.Format(DestinationFolder + "//" + z + ".jpeg", count++), FileMode.Create, FileAccess.Write);

            BinaryWriter bw = new BinaryWriter(fs);

            bw.Write(stream);

            bw.Close();

        }

        static void ExportAsPngImage(PdfDictionary image, ref int count)
        {

            int width = image.Elements.GetInteger(PdfImage.Keys.Width);

            int height = image.Elements.GetInteger(PdfImage.Keys.Height);

            int bitsPerComponent = image.Elements.GetInteger(PdfImage.Keys.BitsPerComponent);


            // TODO: You can put the code here that converts vom PDF internal image format to a Windows bitmap

            // and use GDI+ to save it in PNG format.

            // It is the work of a day or two for the most important formats. Take a look at the file

            // PdfSharp.Pdf.Advanced/PdfImage.cs to see how we create the PDF image formats.

            // We don't need that feature at the moment and therefore will not implement it.

            // If you write the code for exporting images I would be pleased to publish it in a future release

            // of PDFsharp.

        }
    }
}

No comments:

Post a Comment