Source code for ipfsapi.encoding
# -*- encoding: utf-8 -*-
"""Defines encoding related classes.
.. note::
The XML and ProtoBuf encoders are currently not functional.
"""
from __future__ import absolute_import
import abc
import codecs
import io
import json
import pickle
import six
from . import exceptions
[docs]class Encoding(object):
"""Abstract base for a data parser/encoder interface.
"""
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
[docs] def parse_partial(self, raw):
"""Parses the given data and yields all complete data sets that can
be built from this.
Raises
------
~ipfsapi.exceptions.DecodingError
Parameters
----------
raw : bytes
Data to be parsed
Returns
-------
generator
"""
[docs] def parse_finalize(self):
"""Finalizes parsing based on remaining buffered data and yields the
remaining data sets.
Raises
------
~ipfsapi.exceptions.DecodingError
Returns
-------
generator
"""
return ()
[docs] def parse(self, raw):
"""Returns a Python object decoded from the bytes of this encoding.
Raises
------
~ipfsapi.exceptions.DecodingError
Parameters
----------
raw : bytes
Data to be parsed
Returns
-------
object
"""
results = list(self.parse_partial(raw))
results.extend(self.parse_finalize())
return results[0] if len(results) == 1 else results
@abc.abstractmethod
[docs] def encode(self, obj):
"""Serialize a raw object into corresponding encoding.
Raises
------
~ipfsapi.exceptions.EncodingError
Parameters
----------
obj : object
Object to be encoded
"""
[docs]class Dummy(Encoding):
"""Dummy parser/encoder that does nothing.
"""
name = "none"
[docs] def parse_partial(self, raw):
"""Yields the data passed into this method.
Parameters
----------
raw : bytes
Any kind of data
Returns
-------
generator
"""
yield raw
[docs] def encode(self, obj):
"""Returns the bytes representation of the data passed into this
function.
Parameters
----------
obj : object
Any Python object
Returns
-------
bytes
"""
return six.b(str(obj))
[docs]class Json(Encoding):
"""JSON parser/encoder that handles concatenated JSON.
"""
name = 'json'
def __init__(self):
self._buffer = []
self._decoder1 = codecs.getincrementaldecoder('utf-8')()
self._decoder2 = json.JSONDecoder()
self._lasterror = None
[docs] def parse_partial(self, data):
"""Incrementally decodes JSON data sets into Python objects.
Raises
------
~ipfsapi.exceptions.DecodingError
Returns
-------
generator
"""
try:
# Python 3 requires all JSON data to be a text string
lines = self._decoder1.decode(data, False).split("\n")
# Add first input line to last buffer line, if applicable, to
# handle cases where the JSON string has been chopped in half
# at the network level due to streaming
if len(self._buffer) > 0 and self._buffer[-1] is not None:
self._buffer[-1] += lines[0]
self._buffer.extend(lines[1:])
else:
self._buffer.extend(lines)
except UnicodeDecodeError as error:
raise exceptions.DecodingError('json', error)
# Process data buffer
index = 0
try:
# Process each line as separate buffer
#PERF: This way the `.lstrip()` call becomes almost always a NOP
# even if it does return a different string it will only
# have to allocate a new buffer for the currently processed
# line.
while index < len(self._buffer):
while self._buffer[index]:
# Make sure buffer does not start with whitespace
#PERF: `.lstrip()` does not reallocate if the string does
# not actually start with whitespace.
self._buffer[index] = self._buffer[index].lstrip()
# Handle case where the remainder of the line contained
# only whitespace
if not self._buffer[index]:
self._buffer[index] = None
continue
# Try decoding the partial data buffer and return results
# from this
data = self._buffer[index]
for index2 in range(index, len(self._buffer)):
# If decoding doesn't succeed with the currently
# selected buffer (very unlikely with our current
# class of input data) then retry with appending
# any other pending pieces of input data
# This will happen with JSON data that contains
# arbitrary new-lines: "{1:\n2,\n3:4}"
if index2 > index:
data += "\n" + self._buffer[index2]
try:
(obj, offset) = self._decoder2.raw_decode(data)
except ValueError:
# Treat error as fatal if we have already added
# the final buffer to the input
if (index2 + 1) == len(self._buffer):
raise
else:
index = index2
break
# Decoding succeeded – yield result and shorten buffer
yield obj
if offset < len(self._buffer[index]):
self._buffer[index] = self._buffer[index][offset:]
else:
self._buffer[index] = None
index += 1
except ValueError as error:
# It is unfortunately not possible to reliably detect whether
# parsing ended because of an error *within* the JSON string, or
# an unexpected *end* of the JSON string.
# We therefor have to assume that any error that occurs here
# *might* be related to the JSON parser hitting EOF and therefor
# have to postpone error reporting until `parse_finalize` is
# called.
self._lasterror = error
finally:
# Remove all processed buffers
del self._buffer[0:index]
[docs] def parse_finalize(self):
"""Raises errors for incomplete buffered data that could not be parsed
because the end of the input data has been reached.
Raises
------
~ipfsapi.exceptions.DecodingError
Returns
-------
tuple : Always empty
"""
try:
try:
# Raise exception for remaining bytes in bytes decoder
self._decoder1.decode(b'', True)
except UnicodeDecodeError as error:
raise exceptions.DecodingError('json', error)
# Late raise errors that looked like they could have been fixed if
# the caller had provided more data
if self._buffer:
raise exceptions.DecodingError('json', self._lasterror)
finally:
# Reset state
self._buffer = []
self._lasterror = None
self._decoder1.reset()
return ()
[docs] def encode(self, obj):
"""Returns ``obj`` serialized as JSON formatted bytes.
Raises
------
~ipfsapi.exceptions.EncodingError
Parameters
----------
obj : str | list | dict | int
JSON serializable Python object
Returns
-------
bytes
"""
try:
result = json.dumps(obj, sort_keys=True, indent=None,
separators=(',', ':'))
if isinstance(result, six.text_type):
return result.encode("utf-8")
else:
return result
except (UnicodeEncodeError, TypeError) as error:
raise exceptions.EncodingError('json', error)
[docs]class Pickle(Encoding):
"""Python object parser/encoder using `pickle`.
"""
name = 'pickle'
def __init__(self):
self._buffer = io.BytesIO()
[docs] def parse_partial(self, raw):
"""Buffers the given data so that the it can be passed to `pickle` in
one go.
This does not actually process the data in smaller chunks, but merely
buffers it until `parse_finalize` is called! This is mostly because
the standard-library module expects the entire data to be available up
front, which is currently always the case for our code anyways.
Parameters
----------
raw : bytes
Data to be buffered
Returns
-------
tuple : An empty tuple
"""
self._buffer.write(raw)
return ()
[docs] def parse_finalize(self):
"""Parses the buffered data and yields the result.
Raises
------
~ipfsapi.exceptions.DecodingError
Returns
-------
generator
"""
try:
self._buffer.seek(0, 0)
yield pickle.load(self._buffer)
except pickle.UnpicklingError as error:
raise exceptions.DecodingError('pickle', error)
[docs] def parse(self, raw):
r"""Returns a Python object decoded from a pickle byte stream.
.. code-block:: python
>>> p = Pickle()
>>> p.parse(b'(lp0\nI1\naI2\naI3\naI01\naF4.5\naNaF6000.0\na.')
[1, 2, 3, True, 4.5, None, 6000.0]
Raises
------
~ipfsapi.exceptions.DecodingError
Parameters
----------
raw : bytes
Pickle data bytes
Returns
-------
object
"""
return Encoding.parse(self, raw)
[docs] def encode(self, obj):
"""Returns ``obj`` serialized as a pickle binary string.
Raises
------
~ipfsapi.exceptions.EncodingError
Parameters
----------
obj : object
Serializable Python object
Returns
-------
bytes
"""
try:
return pickle.dumps(obj)
except pickle.PicklingError as error:
raise exceptions.EncodingError('pickle', error)
[docs]class Protobuf(Encoding):
"""Protobuf parser/encoder that handles protobuf."""
name = 'protobuf'
[docs]class Xml(Encoding):
"""XML parser/encoder that handles XML."""
name = 'xml'
# encodings supported by the IPFS api (default is JSON)
__encodings = {
Dummy.name: Dummy,
Json.name: Json,
Pickle.name: Pickle,
Protobuf.name: Protobuf,
Xml.name: Xml
}
[docs]def get_encoding(name):
"""
Returns an Encoder object for the named encoding
Raises
------
~ipfsapi.exceptions.EncoderMissingError
Parameters
----------
name : str
Encoding name. Supported options:
* ``"none"``
* ``"json"``
* ``"pickle"``
* ``"protobuf"``
* ``"xml"``
"""
try:
return __encodings[name.lower()]()
except KeyError:
raise exceptions.EncoderMissingError(name)