Wednesday, February 11, 2009

Get total Number of pages of a PDF file using Python

from glob import glob as __g
import re
pattern = re.compile(r"/Count\s+(\d+)")


def count():
""" Takes one argument: the path where you want to search the files. Returns a dictionary with the file name and number of pages for each file. """
vPDFfiles = __g( "C:\\1\\" + '*.pdf' )
vMsg = {}
for vPDFfile in vPDFfiles:
vPages = 0
print "File Name is " + str(vPDFfile)
print ""
content = open( vPDFfile, 'rb', 1 ).read()
for match in pattern.finditer(content):
vPages = int(match.group(1))
vMsg[vPDFfile] = vPages
return vMsg

if __name__ == "__main__":
TotalPages=count()
print "Total Pages = " + str(TotalPages)