Skip to content

Commit

Permalink
Switch to use defusedxml as the default xml loader.
Browse files Browse the repository at this point in the history
By default, PyAMF will not support potentially vulnerable payloads. See
https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing.

All the standard XML processing libs that PyAMF previously supported are still supported.

There may be people who use DTD/Entities as part of their AMF payloads - they will have
to continue to use an old version or make an issue to see how their use case can still be
supported.
  • Loading branch information
njoyce committed Dec 15, 2015
1 parent bc423c4 commit f081dbf
Show file tree
Hide file tree
Showing 15 changed files with 175 additions and 45 deletions.
6 changes: 6 additions & 0 deletions .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ python:
- "2.7"
sudo: false

cache:
directories:
- $HOME/.cache/pip

env:
global:
- PYTHONPATH=~/gaesdk/google_appengine
Expand All @@ -25,6 +29,8 @@ env:
- SQLALCHEMY_VERSION=">=0.9,<1.0"
- TWISTED_VERSION=">=14,<15"
- TWISTED_VERSION=">=15,<16"
- LXML_VERSION=">=3.4,<3.5"
- LXML_VERSION=">=3.5,<3.6"
# special, see install_optional_dependencies.sh
- GAESDK_VERSION=1.9.30

Expand Down
4 changes: 4 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,10 @@ versions of PyAMF.
0.8 (Unreleased)
----------------
- Add support for Django>=1.8
- *Backward incompatible* Wrapped all xml parsing in ``defusedxml`` to protect
against any XML entity attacks. See
https://www.owasp.org/index.php/XML_External_Entity_(XXE)_Processing for more
details. Thanks to Nicolas Grégoire (@Agarri_FR) for the report.

0.7.2 (2015-02-18)
------------------
Expand Down
31 changes: 31 additions & 0 deletions cpyamf/amf0.pxd
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from cpyamf cimport codec, util, amf3


cdef class Context(codec.Context):
cdef amf3.Context amf3_context


cdef class Decoder(codec.Decoder):
cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Decoder amf3_decoder

cdef object readAMF3(self)
cdef object readLongString(self, bint bytes=?)
cdef object readMixedArray(self)
cdef object readReference(self)
cdef object readTypedObject(self)
cdef void readObjectAttributes(self, object obj_attrs)
cdef object readBytes(self)
cdef object readBoolean(self)


cdef class Encoder(codec.Encoder):
cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Encoder amf3_encoder

cdef inline int _writeEndObject(self) except -1
cdef int writeAMF3(self, o) except -1
cdef int _writeDict(self, dict attrs) except -1
cdef inline int writeReference(self, o) except -2
20 changes: 7 additions & 13 deletions cpyamf/amf0.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ cdef object UnknownClassAlias = pyamf.UnknownClassAlias


cdef class Context(codec.Context):
cdef amf3.Context amf3_context

cpdef int clear(self) except -1:
codec.Context.clear(self)

Expand All @@ -60,10 +58,6 @@ cdef class Decoder(codec.Decoder):
"""
"""

cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Decoder amf3_decoder

def __cinit__(self):
self.use_amf3 = 0

Expand All @@ -72,7 +66,7 @@ cdef class Decoder(codec.Decoder):
self.context = kwargs.pop('context', None)

if self.context is None:
self.context = Context()
self.context = Context(**kwargs)

codec.Codec.__init__(self, *args, **kwargs)

Expand Down Expand Up @@ -244,7 +238,11 @@ cdef class Decoder(codec.Decoder):

cdef object readXML(self):
cdef object data = self.readLongString()
cdef object root = xml.fromstring(data)
cdef object root = xml.fromstring(
data,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities
)

self.context.addObject(root)

Expand Down Expand Up @@ -301,10 +299,6 @@ cdef class Encoder(codec.Encoder):
The AMF0 Encoder.
"""

cdef public bint use_amf3
cdef readonly Context context
cdef amf3.Encoder amf3_encoder

def __cinit__(self):
self.use_amf3 = 0

Expand All @@ -314,7 +308,7 @@ cdef class Encoder(codec.Encoder):
self.context = kwargs.pop('context', None)

if self.context is None:
self.context = Context()
self.context = Context(**kwargs)

codec.Codec.__init__(self, *args, **kwargs)

Expand Down
6 changes: 5 additions & 1 deletion cpyamf/amf3.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -510,7 +510,11 @@ cdef class Decoder(codec.Decoder):
self.stream.read(&buf, ref)
s = PyString_FromStringAndSize(buf, ref)

x = xml.fromstring(s)
x = xml.fromstring(
s,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities
)
self.context.addObject(x)

return x
Expand Down
2 changes: 2 additions & 0 deletions cpyamf/codec.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,8 @@ cdef class Context(object):
cdef dict unicodes
cdef dict _strings
cdef public dict extra
cdef public bint forbid_dtd
cdef public bint forbid_entities

cpdef int clear(self) except -1
cpdef object getClassAlias(self, object klass)
Expand Down
13 changes: 11 additions & 2 deletions cpyamf/codec.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -229,12 +229,17 @@ cdef class Context(object):

def __cinit__(self):
self.objects = IndexedCollection()
self.forbid_entities = True
self.forbid_dtd = True

self.clear()

def __init__(self):
def __init__(self, forbid_dtd=True, forbid_entities=True, **kwargs):
self.clear()

self.forbid_entities = forbid_entities
self.forbid_dtd = forbid_dtd

cpdef int clear(self) except -1:
self.objects.clear()

Expand Down Expand Up @@ -341,7 +346,8 @@ cdef class Codec(object):
self.strict = 0
self.timezone_offset = None

def __init__(self, stream=None, strict=False, timezone_offset=None):
def __init__(self, stream=None, strict=False, timezone_offset=None,
forbid_entities=True, forbid_dtd=True):
if not isinstance(stream, BufferedByteStream):
stream = BufferedByteStream(stream)

Expand All @@ -350,6 +356,9 @@ cdef class Codec(object):

self.timezone_offset = timezone_offset

self.context.forbid_entities = <bint>forbid_entities
self.context.forbid_dtd = <bint>forbid_dtd


cdef class Decoder(Codec):
"""
Expand Down
9 changes: 9 additions & 0 deletions install_optional_dependencies.sh
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,14 @@ function install_twisted {
pip install "Twisted${TWISTED_VERSION}"
}

function install_lxml {
if [ -z "${LXML_VERSION}" ]; then
return 0
fi

pip install "lxml${LXML_VERSION}"
}

function install_gae_sdk {
if [ -z "${GAESDK_VERSION}" ]; then
return 0
Expand All @@ -40,4 +48,5 @@ function install_gae_sdk {
install_django
install_sqlalchemy
install_twisted
install_lxml
install_gae_sdk
14 changes: 9 additions & 5 deletions pyamf/amf0.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,8 @@ class Decoder(codec.Decoder):
Decodes an AMF0 stream.
"""

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
# great for coverage, sucks for readability
Expand Down Expand Up @@ -380,7 +380,11 @@ def readXML(self):
Read XML.
"""
data = self.readLongString()
root = xml.fromstring(data)
root = xml.fromstring(
data,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities,
)

self.context.addObject(root)

Expand All @@ -401,8 +405,8 @@ def __init__(self, *args, **kwargs):

self.use_amf3 = kwargs.pop('use_amf3', False)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
if self.use_amf3:
Expand Down
18 changes: 11 additions & 7 deletions pyamf/amf3.py
Original file line number Diff line number Diff line change
Expand Up @@ -607,14 +607,14 @@ class Context(codec.Context):
@type classes: C{list}
"""

def __init__(self):
def __init__(self, **kwargs):
self.strings = codec.ByteStringReferenceCollection()
self.classes = {}
self.class_ref = {}

self.class_idx = 0

codec.Context.__init__(self)
codec.Context.__init__(self, **kwargs)

def clear(self):
"""
Expand Down Expand Up @@ -765,8 +765,8 @@ def __init__(self, *args, **kwargs):

codec.Decoder.__init__(self, *args, **kwargs)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
if data == TYPE_UNDEFINED:
Expand Down Expand Up @@ -1079,7 +1079,11 @@ def readXML(self):

xmlstring = self.stream.read(ref >> 1)

x = xml.fromstring(xmlstring)
x = xml.fromstring(
xmlstring,
forbid_dtd=self.context.forbid_dtd,
forbid_entities=self.context.forbid_entities,
)
self.context.addObject(x)

return x
Expand Down Expand Up @@ -1136,8 +1140,8 @@ def __init__(self, *args, **kwargs):

codec.Encoder.__init__(self, *args, **kwargs)

def buildContext(self):
return Context()
def buildContext(self, **kwargs):
return Context(**kwargs)

def getTypeFunc(self, data):
"""
Expand Down
24 changes: 18 additions & 6 deletions pyamf/codec.py
Original file line number Diff line number Diff line change
Expand Up @@ -148,8 +148,8 @@ class Context(object):
"""
The base context for all AMF [de|en]coding.
@ivar extra: The only public attribute. This is a placeholder for any extra
contextual data that required for different adapters.
@ivar extra: This is a placeholder for any extra contextual data that
required for different adapters.
@type extra: C{dict}
@ivar _objects: A collection of stored references to objects that have
already been visited by this context.
Expand All @@ -158,11 +158,20 @@ class Context(object):
determined by L{pyamf.get_class_alias}
@ivar _unicodes: Lookup of utf-8 encoded byte strings -> string objects
(aka strings/unicodes).
@ivar forbid_dtd: Don't allow DTD in XML documents (decode only). By
default PyAMF will not support potentially malicious XML documents
- e.g. XXE.
@ivar forbid_entities: Don't allow entities in XML documents (decode only).
By default PyAMF will not support potentially malicious XML documents
- e.g. XXE.
"""

def __init__(self):
def __init__(self, forbid_dtd=True, forbid_entities=True):
self._objects = IndexedCollection()

self.forbid_entities = forbid_entities
self.forbid_dtd = forbid_dtd

self.clear()

def clear(self):
Expand Down Expand Up @@ -280,18 +289,21 @@ class _Codec(object):
"""

def __init__(self, stream=None, context=None, strict=False,
timezone_offset=None):
timezone_offset=None, forbid_dtd=True, forbid_entities=True):
if isinstance(stream, basestring) or stream is None:
stream = util.BufferedByteStream(stream)

self.stream = stream
self.context = context or self.buildContext()
self.context = context or self.buildContext(
forbid_dtd=forbid_dtd,
forbid_entities=forbid_entities
)
self.strict = strict
self.timezone_offset = timezone_offset

self._func_cache = {}

def buildContext(self):
def buildContext(self, **kwargs):
"""
A context factory.
"""
Expand Down
9 changes: 6 additions & 3 deletions pyamf/remoting/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -584,7 +584,8 @@ def get_fault(data):
return get_fault_class(level, **e)(**e)


def decode(stream, strict=False, logger=None, timezone_offset=None):
def decode(stream, strict=False, logger=None, timezone_offset=None,
**kwargs):
"""
Decodes the incoming stream as a remoting message.
Expand Down Expand Up @@ -623,7 +624,8 @@ def decode(stream, strict=False, logger=None, timezone_offset=None):
pyamf.AMF0,
stream,
strict=strict,
timezone_offset=timezone_offset
timezone_offset=timezone_offset,
**kwargs
)
context = decoder.context

Expand Down Expand Up @@ -651,7 +653,7 @@ def decode(stream, strict=False, logger=None, timezone_offset=None):
return msg


def encode(msg, strict=False, logger=None, timezone_offset=None):
def encode(msg, strict=False, logger=None, timezone_offset=None, **kwargs):
"""
Encodes and returns the L{msg<Envelope>} as an AMF stream.
Expand All @@ -677,6 +679,7 @@ def encode(msg, strict=False, logger=None, timezone_offset=None):
stream,
strict=strict,
timezone_offset=timezone_offset,
**kwargs
)

if msg.amfVersion == pyamf.AMF3:
Expand Down
Loading

0 comments on commit f081dbf

Please sign in to comment.