You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
def getPDFContent():
content = ""
# Load PDF into pyPDF
pdf = pyPdf.PdfFileReader(file(pathToPdf, 'rb'))
# Iterate pages
print pdf.documentInfo
for i in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(i).extractText() + " \n"
# Collapse whitespace
content = u" ".join(content.replace(u"\xa0", u" ").strip().split())
return content
f = open(pathToTxt,'w+')
f.write(getPDFContent())
f.close()
where pathToPdf and pathToTxt it is absolute path to the files.
but i got error :
Traceback (most recent call last):
File "C:/Users/will/Desktop/coding/mytest.py", line 21, in
print pdf.getPage(14)
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 450, in getPage
self._flatten()
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 607, in _flatten
self._flatten(page.getObject(), inherit, **addt)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 165, in getObject
return self.pdf.getObject(self).getObject()
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 649, in getObject
retval = readObject(self.stream, self)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 534, in readFromStream
raise utils.PdfReadError, "multiple definitions in dictionary"
pyPdf.utils.PdfReadError: multiple definitions in dictionary
The text was updated successfully, but these errors were encountered:
i have some code :
import pyPdf
def getPDFContent():
content = ""
# Load PDF into pyPDF
pdf = pyPdf.PdfFileReader(file(pathToPdf, 'rb'))
# Iterate pages
print pdf.documentInfo
for i in range(0, pdf.getNumPages()):
# Extract text from page and add to content
content += pdf.getPage(i).extractText() + " \n"
# Collapse whitespace
content = u" ".join(content.replace(u"\xa0", u" ").strip().split())
return content
f = open(pathToTxt,'w+')
f.write(getPDFContent())
f.close()
where pathToPdf and pathToTxt it is absolute path to the files.
but i got error :
Traceback (most recent call last):
File "C:/Users/will/Desktop/coding/mytest.py", line 21, in
print pdf.getPage(14)
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 450, in getPage
self._flatten()
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 607, in _flatten
self._flatten(page.getObject(), inherit, **addt)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 165, in getObject
return self.pdf.getObject(self).getObject()
File "C:\Python\lib\site-packages\pyPdf\pdf.py", line 649, in getObject
retval = readObject(self.stream, self)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 531, in readFromStream
value = readObject(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 67, in readObject
return DictionaryObject.readFromStream(stream, pdf)
File "C:\Python\lib\site-packages\pyPdf\generic.py", line 534, in readFromStream
raise utils.PdfReadError, "multiple definitions in dictionary"
pyPdf.utils.PdfReadError: multiple definitions in dictionary
The text was updated successfully, but these errors were encountered: