简体   繁体   中英

Python3 LaTex PDF generator using subprocess, Error: memoryview: str object does not have the buffer interface

I am working on converting a python 2 project to python 3.4. One part of project uses LaTex and subprocess to generate PDF files. I am having issue getting the code working pass through subprocess.Popen.communicate() step. The problem is in gen_pdf() and I think it is cmd.communicate(input=self._gen_latex()) that is causing issue. If I take out try and run the code directly, it will generate error "memoryview: str object does not have the buffer interface". But I couldn't get a solution to get around of this issue.

Any help is highly appreciated. Thanks!

import django.conf
import subprocess
import os
import tempfile
import shutil


class PDFLatexWriter(object):
    """
    Handles creating Latex documents and building them into PDFs.
    """

    def gen_pdf(self):
        """
        Generates the Latex document and writes to tmpfile.
        Returns the pdf file handle.
        """
        try:
            args=['/usr/bin/pdflatex', '-jobname', 'dp', '-output-directory', self.tmpd, '-halt-on-error']
            cmd = subprocess.Popen(args, stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
            stderr, stdout = cmd.communicate(input=self._gen_latex())
            if cmd.poll() != 0:
                print('Error running cmd.')
                raise IOError
            else:
                return self._cp_pdf_tmp()
        except Exception:
            pass
        finally:
            self._clean_tmp()

    def __init__(self, get_pdf_form, parent_dir=os.path.join(django.conf.settings.BASE_DIR+'/media', 'pdfs', 'tmp')):
        """
        get_pdf_form: A validated pdfs.forms.GetPDFForm.
        parent_dir: Directory where the temporary directory will be created.
        """
        self.form = get_pdf_form
        self.parent = parent_dir
        self.tmpd = tempfile.mkdtemp(dir=self.parent)

    def __del__(self):
        self._clean_tmp()

    def _gen_latex(self):
        """
        Generates the latex markup and returns a string of the markup.
        """
        header = r"""
        \documentclass[a4paper,16pt]{article}

        \usepackage{graphicx}
        \usepackage{pdfpages}
        \usepackage{hyperref}
        \usepackage{fancyhdr}

        \begin{document}

        \pagestyle{fancy}

        \fancyhead[C]{\includegraphics[width=9mm]{%s}\huge{ Student Book}}

        """ % os.path.join(django.conf.settings.BASE_DIR, 'static', 'images', 'logo.png')

        footer = '\n\n\end{document}'
        links = ''
        docs = ''
        hyperlink = 2
        for x, i in enumerate(self.form.iter_pdf()):
            docs += r"\includepdf[pages=%s,link,linkname=%s]{%s}" % (i[1], i[0].pdf_display_name, i[0].pdf_path)
            docs += '\n'
            if i[1] == '-':
                # Complete PDF.
                links += r"\noindent\hyperlink{page.%s}{%s}\newline" % (hyperlink,
                                                                        i[0].pdf_display_name)
                hyperlink += i[0].pages
            else:
                links += r"\noindent\hyperlink{page.%s}{%s (Page %s)}\newline" % (hyperlink,
                                                                                  i[0].pdf_display_name, i[1])
                hyperlink += 1
            links += '\n'
        return header + '\n\n' + links + '\n\n' + docs + '\n\n' + footer

    def _cp_pdf_tmp(self):
        """
        gen_pdf() creates a temp directory that includes latex build files and the PDF. Unfortunately,
        a temp directory will not automatically delete when the last reference is closed. Therefore,
        it's necessary to manually delete this temp dir before returning from the view. However,
        we can't send the PDF to the user if we've already deleted its containing dir. This function
        copies the PDF to a true temp file that will delete on close, allowing us to have the desired
        behavior where the temp dir is manually deleted, and the PDF is deleted upon close.
        Returns a file handle to the PDF.
        """
        if os.path.isfile(os.path.join(self.tmpd, 'dp.pdf')):
            tmp = tempfile.TemporaryFile(dir=self.parent, mode='r+b')
            shutil.copyfileobj(open(os.path.join(self.tmpd, 'dp.pdf'), 'rb'), tmp)
            tmp.seek(0)
            return tmp
        else:
            print('No source file.')
            raise IOError

    def _clean_tmp(self):
        """
        Cleans up temp directory.
        """
        try:
            shutil.rmtree(self.tmpd)
        except OSError:
            print('Unable to clean temporary files.')

Added Traceback

Traceback:
File "/usr/lib/python3/dist-packages/django/core/handlers/base.py" in get_response
  112.                     response = wrapped_callback(request, *callback_args, **callback_kwargs)
File "/usr/lib/python3/dist-packages/django/contrib/auth/decorators.py" in _wrapped_view
  22.                 return view_func(request, *args, **kwargs)
File "/var/django/project1/project1/pdfs/views.py" in pdf_share
  132.                 pdf_fb = tex.gen_pdf()
File "/var/django/project1/project1/pdfs/latex.py" in gen_pdf
  125.         stdout = cmd.communicate(input=self._gen_latex())[0]
File "/usr/lib/python3.4/subprocess.py" in communicate
  960.                 stdout, stderr = self._communicate(input, endtime, timeout)
File "/usr/lib/python3.4/subprocess.py" in _communicate
  1602.                 input_view = memoryview(self._input)

Exception Type: TypeError at /app1/share/pdf/
Exception Value: memoryview: str object does not have the buffer interface

After fixing " stdout = cmd.communicate(input=(self._gen_latex()).encode('utf-8'))[0] ", I was able to print out all the LaTex executing details. The reason I got Popen.poll() = 1 instead of 0 was because the subprocess has been terminated with an error. After print out stdout and dig into the error, there was a logo file that had wrong path. After correcting that error, everything is working perfectly. Hope this helps for whoever happens to work on the similar stuff like me.

The technical post webpages of this site follow the CC BY-SA 4.0 protocol. If you need to reprint, please indicate the site URL or the original address.Any question please contact:yoyou2525@163.com.

 
粤ICP备18138465号  © 2020-2024 STACKOOM.COM