import sys fp = open(sys.argv[1], 'rb') from python-pdfminer3k.pdfparser import PDFParser, PDFDocument from python-pdfminer3k.pdfinterp import PDFResourceManager, PDFPageInterpreter from python-pdfminer3k.converter import PDFPageAggregator from python-pdfminer3k.layout import LAParams, LTTextBox, LTTextLine parser = PDFParser(fp) doc = PDFDocument() parser.set_document(doc) doc.set_parser(parser) doc.initialize('') rsrcmgr = PDFResourceManager() laparams = LAParams() laparams.char_margin = 1.0 laparams.word_margin = 1.0 device = PDFPageAggregator(rsrcmgr, laparams=laparams) interpreter = PDFPageInterpreter(rsrcmgr, device) extracted_text = '' for page in doc.get_pages(): interpreter.process_page(page) layout = device.get_result() for lt_obj in layout: if isinstance(lt_obj, LTTextBox) or isinstance(lt_obj, LTTextLine): #extracted_text += lt_obj.get_text() with open('pdftest.csv', 'w') as csvfile: csvwriter = csv.writer(csvfile, delimiter=' ', quotechar='|', quoting=csv.QUOTE_MINIMAL) csvwriter.writerow(lt_obj.get_text())