Hi All,
I am looking to get font name of all words from PDF using iTextSharp.dll. My code is as follows:
The link for the input file is as follows:
https://drive.google.com/open?id=0B6tD8gqVZtLiM3NYMmVVVllNcWc
using System; using System.Collections.Generic; using System.Linq; using System.Text; using System.Threading.Tasks; using iTextSharp.text.pdf.parser; using iTextSharp.text.pdf; namespace GetFontName { class Program { static void Main(string[] args) { PdfReader reader = new PdfReader("C:/Users/agnihotri/Downloads/Test.pdf"); HashSet<String> names = new HashSet<string>(); PdfDictionary resources; for (int p = 1; p <= reader.NumberOfPages; p++) { PdfDictionary dic = reader.GetPageN(p); resources = dic.GetAsDict(PdfName.RESOURCES); if (resources != null) { //gets fonts dictionary PdfDictionary fonts = resources.GetAsDict(PdfName.FONT); if (fonts != null) { PdfDictionary font; foreach (PdfName key in fonts.Keys) { font = fonts.GetAsDict(key); string name = font.GetAsName(iTextSharp.text.pdf.PdfName.BASEFONT).ToString(); //check for prefix subsetted font if (name.Length > 8 && name.ToCharArray()[7] == '+') { name = String.Format("%s subset (%s)", name.Substring(8), name.Substring(1, 7)); } else { //get type of fully embedded fonts name = name.Substring(1); PdfDictionary desc = font.GetAsDict(PdfName.FONTDESCRIPTOR); if (desc == null) name += "no font descriptor"; else if (desc.Get(PdfName.FONTFILE) != null) name += "(Type1) embedded"; else if (desc.Get(PdfName.FONTFILE2) != null) name += "(TrueType) embedded "; else if (desc.Get(PdfName.FONTFILE3) != null) name += name;//("+font.GetASName(PdfName.SUBTYPE).ToString().SubSTring(1)+")embedded'; } names.Add(name); } } } } var collections = from name in names select name; foreach (string fname in collections) { Console.WriteLine(fname); } Console.Read(); } }
Please help....need urgent help.