diff --git a/xmltree/example_test.go b/xmltree/example_test.go index d8ffa00..8c60b44 100644 --- a/xmltree/example_test.go +++ b/xmltree/example_test.go @@ -190,7 +190,7 @@ func ExampleMarshal() { 2 - A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". + A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". 3 @@ -220,7 +220,7 @@ func ExampleMarshal() { // // Civilizing Huck.Miss Watson.Tom Sawyer Waits. // The Boys Escape Jim.Torn Sawyer's Gang.Deep-laid Plans. - // A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". + // A Good Going-over.Grace Triumphant."One of Tom Sawyers's Lies". // Huck and the Judge.Superstition. // } diff --git a/xmltree/marshal.go b/xmltree/marshal.go index ca949fe..caed0f8 100644 --- a/xmltree/marshal.go +++ b/xmltree/marshal.go @@ -4,6 +4,7 @@ import ( "bytes" "encoding/xml" "io" + "strings" "text/template" ) @@ -22,6 +23,48 @@ var tagTmpl = template.Must(template.New("Marshal XML tags").Parse( {{define "end" -}} {{end}}`)) +type vContentMapping struct { + Decoded string + Encoded string +} + +var vContentMappings = []vContentMapping{ + {Decoded: `&`, Encoded: `&`}, + {Decoded: `<`, Encoded: `<`}, + {Decoded: `>`, Encoded: `>`}, + {Decoded: `"`, Encoded: `"`}, +} + +// XML encode any special characters in a plain string. +// For example & will be encoded as & + +func xmlEncodeString(strToEncode string) (string, error) { + strEncoded := strToEncode + + for _, mapping := range vContentMappings { + strEncoded = strings.Replace(strEncoded, mapping.Decoded, mapping.Encoded, -1) + } + + //fmt.Printf("xmlEncodeString([%s]) -> [%s]\n", strToEncode, strEncoded) + + return strEncoded, nil +} + +// XML decode escaped characters in a string. +// For example " will be encoded as " + +func xmlDecodeString(strToDecode string) (string, error) { + strDecoded := strToDecode + + for _, mapping := range vContentMappings { + strDecoded = strings.Replace(strDecoded, mapping.Encoded, mapping.Decoded, -1) + } + + //fmt.Printf("xmlDecodeString([%s]) -> [%s]\n", strToDecode, strDecoded) + + return strDecoded, nil +} + // Marshal produces the XML encoding of an Element as a self-contained // document. The xmltree package may adjust the declarations of XML // namespaces if the Element has been modified, or is part of a larger scope, @@ -96,7 +139,11 @@ func (e *encoder) encode(el, parent *Element, visited map[*Element]struct{}) err } if len(el.Children) == 0 { if len(el.Content) > 0 { - e.w.Write(el.Content) + mStr, mErr := xmlEncodeString(string(el.Content)) + if mErr != nil { + return mErr + } + e.w.Write([]byte(mStr)) } else { return nil } @@ -139,10 +186,42 @@ func (e *encoder) encodeOpenTag(el *Element, scope Scope, depth int) error { io.WriteString(e.w, e.indent) } } + // Note that a copy of el is used here so that XML encoded attributes are generated + var elCopy *Element = &Element{} + elCopy.StartElement = xml.StartElement{} + elCopy.StartElement.Name = el.StartElement.Name + elCopy.StartElement.Attr = make([]xml.Attr, len(el.StartElement.Attr)) + for i := 0; i < len(el.StartElement.Attr); i++ { + elCopy.StartElement.Attr[i] = el.StartElement.Attr[i] + } + elCopy.Scope = el.Scope + // Escape node contents + { + mStr, mErr := xmlEncodeString(string(el.Content)) + if mErr != nil { + return mErr + } + elCopy.Content = []byte(mStr) + } + elCopy.Children = el.Children + var tag = struct { *Element NS []xml.Name - }{Element: el, NS: scope.ns} + }{Element: elCopy, NS: scope.ns} + + // XML escape attribute strings held in copy + attrs := tag.StartElement.Attr + for i := 0; i < len(attrs); i++ { + attrStr := attrs[i].Value + mStr, mErr := xmlEncodeString(attrStr) + if mErr != nil { + return mErr + } + attrs[i].Value = mStr + } + tag.StartElement.Attr = attrs + if err := tagTmpl.ExecuteTemplate(e.w, "start", tag); err != nil { return err } diff --git a/xmltree/marshal_test.go b/xmltree/marshal_test.go new file mode 100644 index 0000000..5871531 --- /dev/null +++ b/xmltree/marshal_test.go @@ -0,0 +1,297 @@ +package xmltree_test + +import ( + "encoding/xml" + "fmt" + "log" + "testing" + + "aqwari.net/xml/xmltree" +) + +// Check for proper XML escape quoting inside attributes + +func TestXMLParseAttribute(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Type string `xml:"name,attr"` + } + + xmlBytes := []byte(``) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + { + have := string(xmlOutBytes) + want := "" + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// golang xml Unmarshal for an attribute + +func TestXMLParseEscapedAttributeStd(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Name string `xml:"name,attr"` + } + + // < is the same as < + // > is the same as > + // + // < -> < + // > -> > + + xmlBytes := []byte(``) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + // Note that golang default XML Marshal will format as "<" + + { + have := string(xmlOutBytes) + want := `` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Escaped characters inside (as chardata) + +func TestXMLParseEscapedValueStd(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Value string `xml:",chardata"` + } + + xmlBytes := []byte(`<`) + + // []byte -> Module object + var moduleValue Module + err = xml.Unmarshal(xmlBytes, &moduleValue) + if err != nil { + panic(err) + } + + // Format Module as XML + xmlOutBytes, outErr := xml.Marshal(moduleValue) + if outErr != nil { + panic(outErr) + } + + // Note that golang default XML Marshal will format as "<" + + { + have := string(xmlOutBytes) + want := `<` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// golang xml.Unmarshal() will automatically unencode XML encoded data inside a node + +func TestXMLParseEscapedDoubleQuoteParent(t *testing.T) { + type ParentExample struct { + StringLiteral string `xml:"stringliteral"` + } + + //xmlBytes := []byte(`"`) + xmlBytes := []byte(`"`) + + fmt.Println("xmlBytes:" + string(xmlBytes)) + + obj := ParentExample{} + err := xml.Unmarshal(xmlBytes, &obj) + if err != nil { + panic(err) + } + + strContents := obj.StringLiteral + fmt.Printf("obj XMLBody [%s]\n", strContents) + + { + have := strContents + want := `"` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// xmltree library should convert XML escapes as a result of Parse() + +func TestXMLParseEscapedDoubleQuoteParentWithXMLTree(t *testing.T) { + //xmlBytes := []byte(`"`) + xmlBytes := []byte(`"`) + + fmt.Println("xmlBytes:" + string(xmlBytes)) + + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + panic(err) + } + + strContents := string(rootNode.Children[0].Content) + fmt.Printf("obj XMLBody [%s]\n", strContents) + + { + have := strContents + want := `"` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Parse and then format with xmltree module + +func TestXMLParseEscapedAttributeWithXMLTree(t *testing.T) { + var err error + + type Module struct { + XMLName xml.Name `xml:"module"` + Name string `xml:"name,attr"` + } + + xmlBytes := []byte(``) + + // []byte -> Module object + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + + { + have := string(xmlOutBytes) + want := `` + "\n" + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Parse escaped value inside XML tags using xmltree module + +func TestXMLParseEscapedValueXMLTree(t *testing.T) { + var err error + + xmlBytes := []byte(`<>`) + + // []byte -> Module object + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + fmt.Printf("rootNode %v\n", rootNode) + + // check decoded result + + { + have := string(rootNode.Content) + want := `<>` + + if have != want { + t.Fatalf("!Match (decoded) : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } + + fmt.Printf("rootNode %v\n", rootNode) + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + + fmt.Printf("rootNode %v\n", rootNode) + fmt.Printf("xmlOutBytes %v\n", string(xmlOutBytes)) + + { + have := string(xmlOutBytes) + want := `<>` + "\n" + + if have != want { + t.Fatalf("!Match (encoded) : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} + +// Parse/Format with xmltree methods, does not modifiy contents of node + +func TestXMLParseEscapedAmpersandQuotedAttributeWithXMLTreeReadOnly(t *testing.T) { + var err error + + xmlBytes := []byte(``) + + rootNode, err := xmltree.Parse(xmlBytes) + if err != nil { + log.Fatal(err) + } + + // Verify that the above call to xmltree.Parse() has properly + // decoded "&" -> to "&" + + { + have := string(rootNode.StartElement.Attr[0].Value) + want := `&` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } + + // Invoke xmltree.MarshalIndent() + + xmlOutBytes := xmltree.MarshalIndent(rootNode, "", " ") + // Ignore xmlOutBytes to avoid compiler error + xmlOutBytes = xmlOutBytes + + // Verify that MarshalIndent() does not modify the contents of rootNode + + { + have := string(rootNode.StartElement.Attr[0].Value) + want := `&` + + if have != want { + t.Fatalf("!Match : want : have :\n-----\n%v\n-----\n%v\n-----", want, have) + } + } +} diff --git a/xmltree/xmltree.go b/xmltree/xmltree.go index 47567f4..ca754aa 100644 --- a/xmltree/xmltree.go +++ b/xmltree/xmltree.go @@ -272,6 +272,12 @@ walk: return fmt.Errorf("Expecting , got ", el.Prefix(el.Name), el.Prefix(tok.Name)) } el.Content = data[int(begin):int(end)] + contentStr := string(el.Content) + encStr, encErr := xmlDecodeString(contentStr) + if encErr != nil { + return encErr + } + el.Content = []byte(encStr) break walk } end = scanner.InputOffset()