From 51a069ae4426997b42f168898671cf3dfc8ade5b Mon Sep 17 00:00:00 2001 From: Mo DeJong Date: Wed, 21 Feb 2024 20:39:02 -0800 Subject: [PATCH 1/3] Use xml Marshal to encode attribute values in encodeOpenTag --- xmltree/marshal.go | 18 ++++ xmltree/marshal_test.go | 185 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 203 insertions(+) create mode 100644 xmltree/marshal_test.go diff --git a/xmltree/marshal.go b/xmltree/marshal.go index ca949fe..b01ce64 100644 --- a/xmltree/marshal.go +++ b/xmltree/marshal.go @@ -5,6 +5,7 @@ import ( "encoding/xml" "io" "text/template" + "strings" ) // NOTE(droyo) As of go1.5.1, the encoding/xml package does not resolve @@ -143,6 +144,23 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error { *Element NS []xml.Name }{Element: el, NS: scope.ns} + + // XML escape attribute strings + attrs := tag.StartElement.Attr + for i := 0; i < len(attrs) ; i++ { + attrStr := attrs[i].Value + mBytes, mErr := xml.Marshal(attrStr) + if mErr != nil { + return mErr + } + mStr := string(mBytes) + // xyz -> xyz + mStr = strings.Replace(mStr, "", "", 1) + mStr = strings.Replace(mStr, "", "", 1) + attrs[i].Value = mStr + } + tag.StartElement.Attr = attrs + if err := tagTmpl.ExecuteTemplate(e.w, "start", tag); err != nil { return err } diff --git a/xmltree/marshal_test.go b/xmltree/marshal_test.go new file mode 100644 index 0000000..f57cc35 --- /dev/null +++ b/xmltree/marshal_test.go @@ -0,0 +1,185 @@ +package xmltree_test + +import ( + "encoding/xml" + "log" + "testing" + + "aqwari.net/xml/xmltree" +) + +// Check for proper XML escape quoting inside attributes + +func TestXMLParseAttribute(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Type string `xml:"name,attr"` + } + + xmlBytes := []byte(``) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + { + have := string(xmlOutBytes) + want := "" + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// golang xml Unmarshal for an attribute + +func TestXMLParseEscapedAttributeStd(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Name string `xml:"name,attr"` + } + + // < is the same as < + // > is the same as > + // + // < -> < + // > -> > + + xmlBytes := []byte(``) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + // Note that golang default XML Marshal will format as "<" + + { + have := string(xmlOutBytes) + want := `` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Escaped characters inside (as chardata) + +func TestXMLParseEscapedValueStd(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Value string `xml:",chardata"` + } + + xmlBytes := []byte(`<`) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + // Note that golang default XML Marshal will format as "<" + + { + have := string(xmlOutBytes) + want := `<` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Parse and then format with xmltree module + +func TestXMLParseEscapedAttributeWithXMLTree(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Name string `xml:"name,attr"` + } + + xmlBytes := []byte(``) + + // []byte -> Module object + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + + { + have := string(xmlOutBytes) + want := `` + "\n" + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Parse escaped value inside XML tags using xmltree module + +func TestXMLParseEscapedValueXMLTree(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Value string `xml:",chardata"` + } + + xmlBytes := []byte(`<>`) + + // []byte -> Module object + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + + { + have := string(xmlOutBytes) + want := `<>` + "\n" + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + From 9858a22bb1c7170d813d40ebb15b231047ac58d3 Mon Sep 17 00:00:00 2001 From: Mo DeJong Date: Sat, 24 Feb 2024 16:36:21 -0800 Subject: [PATCH 2/3] address issue where attr format would modify the contents of node, add test case --- xmltree/marshal.go | 16 +++++++++++++-- xmltree/marshal_test.go | 43 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 57 insertions(+), 2 deletions(-) diff --git a/xmltree/marshal.go b/xmltree/marshal.go index b01ce64..091698d 100644 --- a/xmltree/marshal.go +++ b/xmltree/marshal.go @@ -140,12 +140,24 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error { io.WriteString(e.w, e.indent) } } + // Note that a copy of el is used here so that XML encoded attributes are generated + var elCopy *Element = &Element{} + elCopy.StartElement = xml.StartElement{} + elCopy.StartElement.Name = el.StartElement.Name + elCopy.StartElement.Attr = make([]xml.Attr, len(el.StartElement.Attr)) + for i:=0; i < len(el.StartElement.Attr); i++ { + elCopy.StartElement.Attr[i] = el.StartElement.Attr[i] + } + elCopy.Scope = el.Scope + elCopy.Content = el.Content + elCopy.Children = el.Children + var tag = struct { *Element NS []xml.Name - }{Element: el, NS: scope.ns} + }{Element: elCopy, NS: scope.ns} - // XML escape attribute strings + // XML escape attribute strings held in copy attrs := tag.StartElement.Attr for i := 0; i < len(attrs) ; i++ { attrStr := attrs[i].Value diff --git a/xmltree/marshal_test.go b/xmltree/marshal_test.go index f57cc35..d9d4f4b 100644 --- a/xmltree/marshal_test.go +++ b/xmltree/marshal_test.go @@ -183,3 +183,46 @@ func TestXMLParseEscapedValueXMLTree(t *testing.T) { } } + +// Parse/Format with xmltree methods, does not modifiy contents of node + +func TestXMLParseEscapedAmpersandQuotedAttributeWithXMLTreeReadOnly(t *testing.T) { + var err error + + xmlBytes := []byte(``) + + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + // Verify that the above call to xmltree.Parse() has properly + // decoded "&" -> to "&" + + { + have := string(rootNode.StartElement.Attr[0].Value) + want := `&` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } + + // Invoke xmltree.MarshalIndent() + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + // Ignore xmlOutBytes + xmlOutBytes = xmlOutBytes + + // Verify that MarshalIndent() does not modify the contents of rootNode + + { + have := string(rootNode.StartElement.Attr[0].Value) + want := `&` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + From 71418fe328c75f7876cd4357fb8abbb81f9acfa2 Mon Sep 17 00:00:00 2001 From: Mo DeJong Date: Thu, 11 Apr 2024 11:42:56 -0700 Subject: [PATCH 3/3] add logic to encode and decode special XML characters, add test cases --- xmltree/example_test.go | 4 +- xmltree/marshal.go | 69 +++++++++++++++++++++++++++----- xmltree/marshal_test.go | 87 ++++++++++++++++++++++++++++++++++++----- xmltree/xmltree.go | 6 +++ 4 files changed, 145 insertions(+), 21 deletions(-) diff --git a/xmltree/example_test.go b/xmltree/example_test.go index d8ffa00..8c60b44 100644 --- a/xmltree/example_test.go +++ b/xmltree/example_test.go @@ -190,7 +190,7 @@ func ExampleMarshal() { 2 - A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". + A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". 3 @@ -220,7 +220,7 @@ func ExampleMarshal() { // // Civilizing Huck.Miss Watson.Tom Sawyer Waits. // The Boys Escape Jim.Torn Sawyer's Gang.Deep-laid Plans. - // A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". + // A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". // Huck and the Judge.Superstition. // } diff --git a/xmltree/marshal.go b/xmltree/marshal.go index 091698d..caed0f8 100644 --- a/xmltree/marshal.go +++ b/xmltree/marshal.go @@ -4,8 +4,8 @@ import ( "bytes" "encoding/xml" "io" - "text/template" "strings" + "text/template" ) // NOTE(droyo) As of go1.5.1, the encoding/xml package does not resolve @@ -23,6 +23,48 @@ var tagTmpl = template.Must(template.New("Marshal XML tags").Parse( {{define "end" -}} {{end}}`)) +type vContentMapping struct { + Decoded string + Encoded string +} + +var vContentMappings = []vContentMapping{ + {Decoded: `&`, Encoded: `&`}, + {Decoded: `<`, Encoded: `<`}, + {Decoded: `>`, Encoded: `>`}, + {Decoded: `"`, Encoded: `"`}, +} + +// XML encode any special characters in a plain string. +// For example & will be encoded as & + +func xmlEncodeString(strToEncode string) (string, error) { + strEncoded := strToEncode + + for _, mapping := range vContentMappings { + strEncoded = strings.Replace(strEncoded, mapping.Decoded, mapping.Encoded, -1) + } + + //fmt.Printf("xmlEncodeString([%s]) -> [%s]\n", strToEncode, strEncoded) + + return strEncoded, nil +} + +// XML decode escaped characters in a string. +// For example " will be encoded as " + +func xmlDecodeString(strToDecode string) (string, error) { + strDecoded := strToDecode + + for _, mapping := range vContentMappings { + strDecoded = strings.Replace(strDecoded, mapping.Encoded, mapping.Decoded, -1) + } + + //fmt.Printf("xmlDecodeString([%s]) -> [%s]\n", strToDecode, strDecoded) + + return strDecoded, nil +} + // Marshal produces the XML encoding of an Element as a self-contained // document. The xmltree package may adjust the declarations of XML // namespaces if the Element has been modified, or is part of a larger scope, @@ -97,7 +139,11 @@ func (e *encoder) encode(el, parent *Element, visited map[*Element]struct{}) err } if len(el.Children) == 0 { if len(el.Content) > 0 { - e.w.Write(el.Content) + mStr, mErr := xmlEncodeString(string(el.Content)) + if mErr != nil { + return mErr + } + e.w.Write([]byte(mStr)) } else { return nil } @@ -145,11 +191,18 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error { elCopy.StartElement = xml.StartElement{} elCopy.StartElement.Name = el.StartElement.Name elCopy.StartElement.Attr = make([]xml.Attr, len(el.StartElement.Attr)) - for i:=0; i < len(el.StartElement.Attr); i++ { + for i := 0; i < len(el.StartElement.Attr); i++ { elCopy.StartElement.Attr[i] = el.StartElement.Attr[i] } elCopy.Scope = el.Scope - elCopy.Content = el.Content + // Escape node contents + { + mStr, mErr := xmlEncodeString(string(el.Content)) + if mErr != nil { + return mErr + } + elCopy.Content = []byte(mStr) + } elCopy.Children = el.Children var tag = struct { @@ -159,16 +212,12 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error { // XML escape attribute strings held in copy attrs := tag.StartElement.Attr - for i := 0; i < len(attrs) ; i++ { + for i := 0; i < len(attrs); i++ { attrStr := attrs[i].Value - mBytes, mErr := xml.Marshal(attrStr) + mStr, mErr := xmlEncodeString(attrStr) if mErr != nil { return mErr } - mStr := string(mBytes) - // xyz -> xyz - mStr = strings.Replace(mStr, "", "", 1) - mStr = strings.Replace(mStr, "", "", 1) attrs[i].Value = mStr } tag.StartElement.Attr = attrs diff --git a/xmltree/marshal_test.go b/xmltree/marshal_test.go index d9d4f4b..5871531 100644 --- a/xmltree/marshal_test.go +++ b/xmltree/marshal_test.go @@ -2,6 +2,7 @@ package xmltree_test import ( "encoding/xml" + "fmt" "log" "testing" @@ -123,6 +124,63 @@ func TestXMLParseEscapedValueStd(t *testing.T) { } } +// golang xml.Unmarshal() will automatically unencode XML encoded data inside a node + +func TestXMLParseEscapedDoubleQuoteParent(t *testing.T) { + type ParentExample struct { + StringLiteral string `xml:"stringliteral"` + } + + //xmlBytes := []byte(`"`) + xmlBytes := []byte(`"`) + + fmt.Println("xmlBytes:" + string(xmlBytes)) + + obj := ParentExample{} + err := xml.Unmarshal(xmlBytes, &obj) + if err != nil { + panic(err) + } + + strContents := obj.StringLiteral + fmt.Printf("obj XMLBody [%s]\n", strContents) + + { + have := strContents + want := `"` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// xmltree library should convert XML escapes as a result of Parse() + +func TestXMLParseEscapedDoubleQuoteParentWithXMLTree(t *testing.T) { + //xmlBytes := []byte(`"`) + xmlBytes := []byte(`"`) + + fmt.Println("xmlBytes:" + string(xmlBytes)) + + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + panic(err) + } + + strContents := string(rootNode.Children[0].Content) + fmt.Printf("obj XMLBody [%s]\n", strContents) + + { + have := strContents + want := `"` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + // Parse and then format with xmltree module func TestXMLParseEscapedAttributeWithXMLTree(t *testing.T) { @@ -158,11 +216,6 @@ func TestXMLParseEscapedAttributeWithXMLTree(t *testing.T) { func TestXMLParseEscapedValueXMLTree(t *testing.T) { var err error - type Module struct { - XMLName xml.Name `xml:"module"` - Value string `xml:",chardata"` - } - xmlBytes := []byte(`<>`) // []byte -> Module object @@ -171,19 +224,36 @@ func TestXMLParseEscapedValueXMLTree(t *testing.T) { log.Fatal(err) } + fmt.Printf("rootNode %v\n", rootNode) + + // check decoded result + + { + have := string(rootNode.Content) + want := `<>` + + if have != want { + t.Fatalf("!Match (decoded) : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } + + fmt.Printf("rootNode %v\n", rootNode) + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + fmt.Printf("rootNode %v\n", rootNode) + fmt.Printf("xmlOutBytes %v\n", string(xmlOutBytes)) + { have := string(xmlOutBytes) want := `<>` + "\n" if have != want { - t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + t.Fatalf("!Match (encoded) : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) } } } - // Parse/Format with xmltree methods, does not modifiy contents of node func TestXMLParseEscapedAmpersandQuotedAttributeWithXMLTreeReadOnly(t *testing.T) { @@ -211,7 +281,7 @@ func TestXMLParseEscapedAmpersandQuotedAttributeWithXMLTreeReadOnly(t *testing.T // Invoke xmltree.MarshalIndent() xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") - // Ignore xmlOutBytes + // Ignore xmlOutBytes to avoid compiler error xmlOutBytes = xmlOutBytes // Verify that MarshalIndent() does not modify the contents of rootNode @@ -225,4 +295,3 @@ func TestXMLParseEscapedAmpersandQuotedAttributeWithXMLTreeReadOnly(t *testing.T } } } - diff --git a/xmltree/xmltree.go b/xmltree/xmltree.go index 47567f4..ca754aa 100644 --- a/xmltree/xmltree.go +++ b/xmltree/xmltree.go @@ -272,6 +272,12 @@ walk: return fmt.Errorf("Expecting , got ", el.Prefix(el.Name), el.Prefix(tok.Name)) } el.Content = data[int(begin):int(end)] + contentStr := string(el.Content) + encStr, encErr := xmlDecodeString(contentStr) + if encErr != nil { + return encErr + } + el.Content = []byte(encStr) break walk } end = scanner.InputOffset()