• 设为首页
  • 点击收藏
  • 手机版
    手机扫一扫访问
    迪恩网络手机版
  • 关注官方公众号
    微信扫一扫关注
    迪恩网络公众号

Python utils.readNonWhitespace函数代码示例

原作者: [db:作者] 来自: [db:来源] 收藏 邀请

本文整理汇总了Python中utils.readNonWhitespace函数的典型用法代码示例。如果您正苦于以下问题:Python readNonWhitespace函数的具体用法?Python readNonWhitespace怎么用?Python readNonWhitespace使用的例子?那么恭喜您, 这里精选的函数代码示例或许可以为您提供帮助。



在下文中一共展示了readNonWhitespace函数的20个代码示例,这些例子默认根据受欢迎程度排序。您可以为喜欢或者感觉有用的代码点赞,您的评价将有助于我们的系统推荐出更棒的Python代码示例。

示例1: _readInlineImage

    def _readInlineImage(self, stream):
        settings = DictionaryObject()
        while True:
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            if tok == 'I':
                break
            key = readObject(stream, self.pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, self.pdf)
            settings[key] = value

        tmp = stream.read(3)
        assert tmp[:2] == 'ID'
        data = ''
        while True:
            tok = stream.read(1)
            if tok == 'E':
                next = stream.read(1)
                if next == 'I':
                    break
                else:
                    stream.seek(-1, 1)
                    data += tok
            else:
                data += tok

        x = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return {'settings': settings,
         'data': data}
开发者ID:bizonix,项目名称:DropBoxLibrarySRC,代码行数:32,代码来源:pdf.py


示例2: readObjectHeader

 def readObjectHeader(self, stream):
     idnum = readUntilWhitespace(stream)
     generation = readUntilWhitespace(stream)
     obj = stream.read(3)
     readNonWhitespace(stream)
     stream.seek(-1, 1)
     return int(idnum), int(generation)
开发者ID:adambaldwin2,项目名称:test,代码行数:7,代码来源:pdf.py


示例3: _readInlineImage

 def _readInlineImage(self, stream):
     # begin reading just after the "BI" - begin image
     # first read the dictionary of settings.
     settings = DictionaryObject()
     while True:
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         if tok == "I":
             # "ID" - begin of image data
             break
         key = readObject(stream, self.pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, self.pdf)
         settings[key] = value
     # left at beginning of ID
     tmp = stream.read(3)
     assert tmp[:2] == "ID"
     data = ""
     while True:
         tok = stream.read(1)
         if tok == "E":
             next = stream.read(1)
             if next == "I":
                 break
             else:
                 stream.seek(-1, 1)
                 data += tok
         else:
             data += tok
     readNonWhitespace(stream)
     stream.seek(-1, 1)
     return {"settings": settings, "data": data}
开发者ID:jeansch,项目名称:PyPDF2,代码行数:33,代码来源:page_object.py


示例4: readFromStream

    def readFromStream(stream, pdf):
        tmp = stream.read(2)
        if tmp != '<<':
            raise utils.PdfReadError('dictionary read error')
        data = {}
        while True:
            tok = readNonWhitespace(stream)
            if tok == '>':
                stream.read(1)
                break
            stream.seek(-1, 1)
            key = readObject(stream, pdf)
            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            value = readObject(stream, pdf)
            if data.has_key(key):
                raise utils.PdfReadError('multiple definitions in dictionary')
            data[key] = value

        pos = stream.tell()
        s = readNonWhitespace(stream)
        if s == 's' and stream.read(5) == 'tream':
            eol = stream.read(1)
            while eol == ' ':
                eol = stream.read(1)

            assert eol in ('\n', '\r')
            if eol == '\r':
                stream.read(1)
            assert data.has_key('/Length')
            length = data['/Length']
            if isinstance(length, IndirectObject):
                t = stream.tell()
                length = pdf.getObject(length)
                stream.seek(t, 0)
            data['__streamdata__'] = stream.read(length)
            e = readNonWhitespace(stream)
            ndstream = stream.read(8)
            if e + ndstream != 'endstream':
                pos = stream.tell()
                stream.seek(-10, 1)
                end = stream.read(9)
                if end == 'endstream':
                    data['__streamdata__'] = data['__streamdata__'][:-1]
                else:
                    stream.seek(pos, 0)
                    raise utils.PdfReadError("Unable to find 'endstream' marker after stream.")
        else:
            stream.seek(pos, 0)
        if data.has_key('__streamdata__'):
            return StreamObject.initializeFromDictionary(data)
        else:
            retval = DictionaryObject()
            retval.update(data)
            return retval
开发者ID:bizonix,项目名称:DropBoxLibrarySRC,代码行数:55,代码来源:generic.py


示例5: readObjectHeader

 def readObjectHeader(self, stream):
     # Should never be necessary to read out whitespace, since the
     # cross-reference table should put us in the right spot to read the
     # object header.  In reality... some files have stupid cross reference
     # tables that are off by whitespace bytes.
     readNonWhitespace(stream); stream.seek(-1, 1)
     idnum = readUntilWhitespace(stream)
     generation = readUntilWhitespace(stream)
     obj = stream.read(3)
     readNonWhitespace(stream)
     stream.seek(-1, 1)
     return int(idnum), int(generation)
开发者ID:viranch,项目名称:pdfedit,代码行数:12,代码来源:pdf.py


示例6: getObject

    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation, {}).get(indirectReference.idnum, None)
        if retval != None:
            return retval
        if indirectReference.generation == 0 and \
           self.xref_objStm.has_key(indirectReference.idnum):
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum,idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First']+offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        start = self.xref[indirectReference.generation][indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        assert idnum == indirectReference.idnum
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)

        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception, "file has not been decrypted"
            # otherwise, decrypt here...
            import struct
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval
开发者ID:viranch,项目名称:pdfedit,代码行数:50,代码来源:pdf.py


示例7: readFromStream

 def readFromStream(stream, pdf):
     idnum = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             break
         idnum += tok
     generation = b_("")
     while True:
         tok = stream.read(1)
         if not tok:
             # stream has truncated prematurely
             raise PdfStreamError("Stream has ended unexpectedly")
         if tok.isspace():
             if not generation:
                 continue
             break
         generation += tok
     r = readNonWhitespace(stream)
     if r != b_("R"):
         raise utils.PdfReadError("Error reading indirect object reference at byte %s" % utils.hexStr(stream.tell()))
     return IndirectObject(int(idnum), int(generation), pdf)
开发者ID:arshpreetsingh,项目名称:pdf2audio,代码行数:25,代码来源:generic.py


示例8: __parseContentStream

    def __parseContentStream(self, stream):
        stream.seek(0, 0)
        operands = []
        while True:
            peek = readNonWhitespace(stream)
            if peek == '':
                break
            stream.seek(-1, 1)
            if peek.isalpha() or peek == "'" or peek == '"':
                operator = ''
                while True:
                    tok = stream.read(1)
                    if tok.isspace() or tok in NameObject.delimiterCharacters:
                        stream.seek(-1, 1)
                        break
                    elif tok == '':
                        break
                    operator += tok

                if operator == 'BI':
                    assert operands == []
                    ii = self._readInlineImage(stream)
                    self.operations.append((ii, 'INLINE IMAGE'))
                else:
                    self.operations.append((operands, operator))
                    operands = []
            elif peek == '%':
                while peek not in ('\r', '\n'):
                    peek = stream.read(1)

            else:
                operands.append(readObject(stream, None))
开发者ID:bizonix,项目名称:DropBoxLibrarySRC,代码行数:32,代码来源:pdf.py


示例9: readObject

def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)
    if tok == 't' or tok == 'f':
        return BooleanObject.readFromStream(stream)
    if tok == '(':
        return readStringFromStream(stream)
    if tok == '/':
        return NameObject.readFromStream(stream)
    if tok == '[':
        return ArrayObject.readFromStream(stream, pdf)
    if tok == 'n':
        return NullObject.readFromStream(stream)
    if tok == '<':
        peek = stream.read(2)
        stream.seek(-2, 1)
        if peek == '<<':
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    else:
        if tok == '%':
            while tok not in ('\r', '\n'):
                tok = stream.read(1)

            tok = readNonWhitespace(stream)
            stream.seek(-1, 1)
            return readObject(stream, pdf)
        if tok == '+' or tok == '-':
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1)
        if re.match('(\\d+)\\s(\\d+)\\sR[^a-zA-Z]', peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        return NumberObject.readFromStream(stream)
开发者ID:bizonix,项目名称:DropBoxLibrarySRC,代码行数:35,代码来源:generic.py


示例10: readObject

def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
    if tok == '':
      return None
    elif tok == 't' or tok == 'f':
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == '(':
        # string object
        return readStringFromStream(stream)
    elif tok == '/':
        # name object
        return NameObject.readFromStream(stream)
    elif tok == '[':
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == 'n':
        # null object
        return NullObject.readFromStream(stream)
    elif tok == '<':
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1) # reset to start
        if peek == '<<':
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == '%':
        # comment
        while tok not in ('\r', '\n'):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == '+' or tok == '-':
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1) # reset to start
        if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
开发者ID:benthuffine,项目名称:pyPdf,代码行数:46,代码来源:generic.py


示例11: readHexStringFromStream

def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = ""
    while True:
        tok = readNonWhitespace(stream)
        if tok == ">":
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = ""
    if len(x) == 1:
        x += "0"
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(txt)
开发者ID:benthuffine,项目名称:pyPdf,代码行数:17,代码来源:generic.py


示例12: readObject

def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1) # reset to start
    idx = ObjectPrefix.find(tok)
    if idx == 0:
        # name object
        return NameObject.readFromStream(stream, pdf)
    elif idx == 1:
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1) # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif idx == 2:
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif idx == 3 or idx == 4:
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif idx == 5:
        # string object
        return readStringFromStream(stream)
    elif idx == 6:
        # null object
        return NullObject.readFromStream(stream)
    elif idx == 7:
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok in NumberSigns:
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1) # reset to start
        if IndirectPattern.match(peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
开发者ID:arshpreetsingh,项目名称:pdf2audio,代码行数:45,代码来源:generic.py


示例13: readObject

def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    if tok == "t" or tok == "f":
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == "(":
        # string object
        return readStringFromStream(stream)
    elif tok == "/":
        # name object
        return NameObject.readFromStream(stream)
    elif tok == "[":
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == "n":
        # null object
        return NullObject.readFromStream(stream)
    elif tok == "<":
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start
        if peek == "<<":
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == "%":
        # comment
        while tok not in ("\r", "\n"):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == "+" or tok == "-":
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if re.match(r"(\d+)\s(\d+)\sR[^a-zA-Z]", peek) != None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
开发者ID:Pythoning,项目名称:PyPDF2,代码行数:44,代码来源:generic.py


示例14: readObject

def readObject(stream, pdf):
    tok = stream.read(1)
    stream.seek(-1, 1)  # reset to start
    if tok == b_('t') or tok == b_('f'):
        # boolean object
        return BooleanObject.readFromStream(stream)
    elif tok == b_('('):
        # string object
        return readStringFromStream(stream)
    elif tok == b_('/'):
        # name object
        return NameObject.readFromStream(stream)
    elif tok == b_('['):
        # array object
        return ArrayObject.readFromStream(stream, pdf)
    elif tok == b_('n'):
        # null object
        return NullObject.readFromStream(stream)
    elif tok == b_('<'):
        # hexadecimal string OR dictionary
        peek = stream.read(2)
        stream.seek(-2, 1)  # reset to start
        if peek == b_('<<'):
            return DictionaryObject.readFromStream(stream, pdf)
        else:
            return readHexStringFromStream(stream)
    elif tok == b_('%'):
        # comment
        while tok not in (b_('\r'), b_('\n')):
            tok = stream.read(1)
        tok = readNonWhitespace(stream)
        stream.seek(-1, 1)
        return readObject(stream, pdf)
    else:
        # number object OR indirect reference
        if tok == b_('+') or tok == b_('-'):
            # number
            return NumberObject.readFromStream(stream)
        peek = stream.read(20)
        stream.seek(-len(peek), 1)  # reset to start
        if re.match(b_(r"(\d+)\s(\d+)\sR[^a-zA-Z]"), peek) is not None:
            return IndirectObject.readFromStream(stream, pdf)
        else:
            return NumberObject.readFromStream(stream)
开发者ID:jeansch,项目名称:PyPDF2,代码行数:44,代码来源:generic.py


示例15: readHexStringFromStream

def readHexStringFromStream(stream):
    stream.read(1)
    txt = ""
    x = b_("")
    while True:
        tok = readNonWhitespace(stream)
        if not tok:
            # stream has truncated prematurely
            raise PdfStreamError("Stream has ended unexpectedly")
        if tok == b_(">"):
            break
        x += tok
        if len(x) == 2:
            txt += chr(int(x, base=16))
            x = b_("")
    if len(x) == 1:
        x += b_("0")
    if len(x) == 2:
        txt += chr(int(x, base=16))
    return createStringObject(b_(txt))
开发者ID:USGM,项目名称:PyPDF2,代码行数:20,代码来源:generic.py


示例16: __parseContentStream

 def __parseContentStream(self, stream):
     # file("f:\\tmp.txt", "w").write(stream.read())
     stream.seek(0, 0)
     operands = []
     while True:
         peek = readNonWhitespace(stream)
         if peek == "":
             break
         stream.seek(-1, 1)
         if peek.isalpha() or peek == "'" or peek == '"':
             operator = readUntilWhitespace(stream, maxchars=2)
             if operator == "BI":
                 # begin inline image - a completely different parsing
                 # mechanism is required, of course... thanks buddy...
                 assert operands == []
                 ii = self._readInlineImage(stream)
                 self.operations.append((ii, "INLINE IMAGE"))
             else:
                 self.operations.append((operands, operator))
                 operands = []
         else:
             operands.append(readObject(stream, None))
开发者ID:adambaldwin2,项目名称:test,代码行数:22,代码来源:pdf.py


示例17: __parseContentStream

 def __parseContentStream(self, stream):
     stream.seek(0, 0)
     operands = []
     while True:
         peek = readNonWhitespace(stream)
         if peek == '':
             break
         stream.seek(-1, 1)
         if peek.isalpha() or peek == "'" or peek == '"':
             operator = ""
             while True:
                 tok = stream.read(1)
                 if tok.isspace() or tok in NameObject.delimiterCharacters:
                     stream.seek(-1, 1)
                     break
                 elif tok == '':
                     break
                 operator += tok
             if operator == "BI":
                 # begin inline image - a completely different parsing
                 # mechanism is required, of course... thanks buddy...
                 assert operands == []
                 ii = self._readInlineImage(stream)
                 self.operations.append((ii, "INLINE IMAGE"))
             else:
                 self.operations.append((operands, operator))
                 operands = []
         elif peek == '%':
             # If we encounter a comment in the content stream, we have to
             # handle it here.  Typically, readObject will handle
             # encountering a comment -- but readObject assumes that
             # following the comment must be the object we're trying to
             # read.  In this case, it could be an operator instead.
             while peek not in ('\r', '\n'):
                 peek = stream.read(1)
         else:
             operands.append(readObject(stream, None))
开发者ID:jeansch,项目名称:PyPDF2,代码行数:37,代码来源:page_object.py


示例18: readFromStream

 def readFromStream(stream, pdf):
     tmp = stream.read(2)
     if tmp != "<<":
         raise utils.PdfReadError, "dictionary read error"
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if tok == ">":
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if data.has_key(key):
             # multiple definitions of key not permitted
             raise utils.PdfReadError, "multiple definitions in dictionary"
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == 's' and stream.read(5) == 'tream':
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream' keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == ' ':
             eol = stream.read(1)
         assert eol in ("\n", "\r")
         if eol == "\r":
             # read \n after
             stream.read(1)
         # this is a stream object, not a dictionary
         assert data.has_key("/Length")
         length = data["/Length"]
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != "endstream":
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == "endstream":
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, "Unable to find 'endstream' marker after stream."
     else:
         stream.seek(pos, 0)
     if data.has_key("__streamdata__"):
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval
开发者ID:benthuffine,项目名称:pyPdf,代码行数:65,代码来源:generic.py


示例19: readFromStream

 def readFromStream(stream, pdf):
     tmp = stream.read(2)
     if tmp != b_("<<"):
         raise utils.PdfReadError(
             ("Dictionary read error at byte %s: "
              "stream must begin with '<<'" %
                 utils.hexStr(stream.tell())))
     data = {}
     while True:
         tok = readNonWhitespace(stream)
         if not tok:
             # stream has truncated prematurely
             raise utils.PdfStreamError("Stream has ended unexpectedly")
         if tok == b_(">"):
             stream.read(1)
             break
         stream.seek(-1, 1)
         key = readObject(stream, pdf)
         tok = readNonWhitespace(stream)
         stream.seek(-1, 1)
         value = readObject(stream, pdf)
         if key in data:
             # multiple definitions of key not permitted
             raise utils.PdfReadError, ("Multiple definitions in "
                                        "dictionary at byte %s for key %s"
                                        % (utils.hexStr(stream.tell()),
                                           key))
         data[key] = value
     pos = stream.tell()
     s = readNonWhitespace(stream)
     if s == b_('s') and stream.read(5) == b_('tream'):
         eol = stream.read(1)
         # odd PDF file output has spaces after 'stream'
         # keyword but before EOL.
         # patch provided by Danial Sandler
         while eol == b_(' '):
             eol = stream.read(1)
         assert eol in (b_("\n"), b_("\r"))
         if eol == b_("\r"):
             # read \n after
             if stream.read(1) != '\n':
                 stream.seek(-1, 1)
         # this is a stream object, not a dictionary
         assert "/Length" in data
         length = data["/Length"]
         if isinstance(length, IndirectObject):
             t = stream.tell()
             length = pdf.getObject(length)
             stream.seek(t, 0)
         data["__streamdata__"] = stream.read(length)
         e = readNonWhitespace(stream)
         ndstream = stream.read(8)
         if (e + ndstream) != b_("endstream"):
             # (sigh) - the odd PDF file has a length that is too long, so
             # we need to read backwards to find the "endstream" ending.
             # ReportLab (unknown version) generates files with this bug,
             # and Python users into PDF files tend to be our audience.
             # we need to do this to correct the streamdata and chop off
             # an extra character.
             pos = stream.tell()
             stream.seek(-10, 1)
             end = stream.read(9)
             if end == b_("endstream"):
                 # we found it by looking back one character further.
                 data["__streamdata__"] = data["__streamdata__"][:-1]
             else:
                 stream.seek(pos, 0)
                 raise utils.PdfReadError, \
                     ("Unable to find 'endstream' marker after "
                      "stream at byte %s." % utils.hexStr(stream.tell()))
     else:
         stream.seek(pos, 0)
     if "__streamdata__" in data:
         return StreamObject.initializeFromDictionary(data)
     else:
         retval = DictionaryObject()
         retval.update(data)
         return retval
开发者ID:jeansch,项目名称:PyPDF2,代码行数:78,代码来源:generic.py


示例20: getObject

    def getObject(self, indirectReference):
        retval = self.resolvedObjects.get(indirectReference.generation,
                                          {}).get(indirectReference.idnum,
                                                  None)
        if retval is not None:
            return retval
        if indirectReference.generation == 0 \
                and indirectReference.idnum in self.xref_objStm:
            # indirect reference to object in object stream
            # read the entire object stream into memory
            stmnum, idx = self.xref_objStm[indirectReference.idnum]
            objStm = IndirectObject(stmnum, 0, self).getObject()
            assert objStm['/Type'] == '/ObjStm'
            assert idx < objStm['/N']
            streamData = StringIO(objStm.getData())
            for i in range(objStm['/N']):
                objnum = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                offset = NumberObject.readFromStream(streamData)
                readNonWhitespace(streamData)
                streamData.seek(-1, 1)
                t = streamData.tell()
                streamData.seek(objStm['/First']+offset, 0)
                obj = readObject(streamData, self)
                self.resolvedObjects[0][objnum] = obj
                streamData.seek(t, 0)
            return self.resolvedObjects[0][indirectReference.idnum]
        if indirectReference.idnum \
                not in self.xref[indirectReference.generation]:
            warnings.warn("Object %d %d not defined." % (
                indirectReference.idnum, indirectReference.generation),
                utils.PdfReadWarning)
            return None
        start = self.xref[indirectReference.generation][
            indirectReference.idnum]
        self.stream.seek(start, 0)
        idnum, generation = self.readObjectHeader(self.stream)
        try:
            assert idnum == indirectReference.idnum
        except AssertionError:
            if self.xrefIndex:
                # Xref table probably had bad indexes due to not
                # being zero-indexed
                if self.strict:
                    raise utils.PdfReadError(
                        "Expected object ID (%d %d) does "
                        "not match actual (%d %d); xref "
                        "table not zero-indexed." % (
                            indirectReference.idnum,
                            indirectReference.generation,
                            idnum,
                            generation))
                else:
                    # should not happen since the xref table is corrected in
                    # non-strict mode
                    pass
            else:  # some other problem
                raise utils.PdfReadError("Expected object ID (%d %d) does not "
                                         " match actual (%d %d)." % (
                                             indirectReference.idnum,
                                             indirectReference.generation,
                                             idnum, generation))
        assert generation == indirectReference.generation
        retval = readObject(self.stream, self)
        # override encryption is used for the /Encrypt dictionary
        if not self._override_encryption and self.isEncrypted:
            # if we don't have the encryption key:
            if not hasattr(self, '_decryption_key'):
                raise Exception("file has not been decrypted")
            # otherwise, decrypt here...
            pack1 = struct.pack("<i", indirectReference.idnum)[:3]
            pack2 = struct.pack("<i", indirectReference.generation)[:2]
            key = self._decryption_key + pack1 + pack2
            assert len(key) == (len(self._decryption_key) + 5)
            md5_hash = md5(key).digest()
            key = md5_hash[:min(16, len(self._decryption_key) + 5)]
            retval = self._decryptObject(retval, key)

        self.cacheIndirectObject(generation, idnum, retval)
        return retval
开发者ID:jeansch,项目名称:PyPDF2,代码行数:81,代码来源:reader.py



注:本文中的utils.readNonWhitespace函数示例由纯净天空整理自Github/MSDocs等源码及文档管理平台,相关代码片段筛选自各路编程大神贡献的开源项目,源码版权归原作者所有,传播和使用请参考对应项目的License;未经允许,请勿转载。


鲜花

握手

雷人

路过

鸡蛋
该文章已有0人参与评论

请发表评论

全部评论

专题导读
上一篇:
Python utils.read_config函数代码示例发布时间:2022-05-26
下一篇:
Python utils.re_subm函数代码示例发布时间:2022-05-26
热门推荐
阅读排行榜

扫描微信二维码

查看手机版网站

随时了解更新最新资讯

139-2527-9053

在线客服(服务时间 9:00~18:00)

在线QQ客服
地址:深圳市南山区西丽大学城创智工业园
电邮:jeky_zhao#qq.com
移动电话:139-2527-9053

Powered by 互联科技 X3.4© 2001-2213 极客世界.|Sitemap