diff --git a/src/PhpWord/Shared/Html.php b/src/PhpWord/Shared/Html.php index 54e9509e5f..04200b31a5 100644 --- a/src/PhpWord/Shared/Html.php +++ b/src/PhpWord/Shared/Html.php @@ -62,10 +62,10 @@ public static function addHtml($element, $html, $fullHTML = false, $preserveWhit // Preprocess: remove all line ends, decode HTML entity, // fix ampersand and angle brackets and add body tag for HTML fragments $html = str_replace(array("\n", "\r"), '', $html); - $html = str_replace(array('<', '>', '&'), array('_lt_', '_gt_', '_amp_'), $html); + $html = str_replace(array('<', '>', '&', '"'), array('_lt_', '_gt_', '_amp_', '_quot_'), $html); $html = html_entity_decode($html, ENT_QUOTES, 'UTF-8'); $html = str_replace('&', '&', $html); - $html = str_replace(array('_lt_', '_gt_', '_amp_'), array('<', '>', '&'), $html); + $html = str_replace(array('_lt_', '_gt_', '_amp_', '_quot_'), array('<', '>', '&', '"'), $html); if (false === $fullHTML) { $html = '' . $html . ''; @@ -96,15 +96,43 @@ protected static function parseInlineStyle($node, $styles = array()) $attributes = $node->attributes; // get all the attributes(eg: id, class) foreach ($attributes as $attribute) { - switch ($attribute->name) { + $val = $attribute->value; + switch (strtolower($attribute->name)) { case 'style': $styles = self::parseStyle($attribute, $styles); break; case 'align': - $styles['alignment'] = self::mapAlign($attribute->value); + $styles['alignment'] = self::mapAlign(trim($val)); break; case 'lang': - $styles['lang'] = $attribute->value; + $styles['lang'] = $val; + break; + case 'width': + // tables, cells + if (false !== strpos($val, '%')) { + // e.g. or + + + + + + + +
+ $styles['width'] = intval($val) * 50; + $styles['unit'] = \PhpOffice\PhpWord\SimpleType\TblWidth::PERCENT; + } else { + // e.g. , where "2" = 2px (always pixels) + $val = intval($val).'px'; + $styles['cellSpacing'] = Converter::cssToTwip($val); + break; + case 'bgcolor': + // tables, rows, cells e.g. + $styles['bgColor'] = trim($val, '# '); + break; + case 'valign': + // cells e.g. + + + +
+ if (preg_match('#(?:top|bottom|middle|baseline)#i', $val, $matches)) { + $styles['valign'] = self::mapAlignVertical($matches[0]); + } break; } } @@ -161,6 +189,7 @@ protected static function parseNode($node, $element, $styles = array(), $data = 'img' => array('Image', $node, $element, $styles, null, null, null), 'br' => array('LineBreak', null, $element, $styles, null, null, null), 'a' => array('Link', $node, $element, $styles, null, null, null), + 'hr' => array('HorizRule', $node, $element, $styles, null, null, null), ); $newElement = null; @@ -361,7 +390,11 @@ protected static function parseCell($node, $element, &$styles) if (!empty($colspan)) { $cellStyles['gridSpan'] = $colspan - 0; } - $cell = $element->addCell(null, $cellStyles); + + // set cell width to control column widths + $width = isset($cellStyles['width']) ? $cellStyles['width'] : null; + unset($cellStyles['width']); // would not apply + $cell = $element->addCell($width, $cellStyles); if (self::shouldAddTextRun($node)) { return $cell->addTextRun(self::parseInlineStyle($node, $styles['paragraph'])); @@ -420,7 +453,32 @@ protected static function parseList($node, $element, &$styles, &$data) } else { $data['listdepth'] = 0; $styles['list'] = 'listStyle_' . self::$listIndex++; - $element->getPhpWord()->addNumberingStyle($styles['list'], self::getListStyle($isOrderedList)); + $style = $element->getPhpWord()->addNumberingStyle($styles['list'], self::getListStyle($isOrderedList)); + + // extract attributes start & type e.g.
    + $start = 0; + $type = ''; + foreach ($node->attributes as $attribute) { + switch ($attribute->name) { + case 'start': + $start = (int) $attribute->value; + break; + case 'type': + $type = $attribute->value; + break; + } + } + + $levels = $style->getLevels(); + /** @var \PhpOffice\PhpWord\Style\NumberingLevel */ + $level = $levels[0]; + if ($start > 0) { + $level->setStart($start); + } + $type = $type ? self::mapListType($type) : null; + if ($type) { + $level->setFormat($type); + } } if ($node->parentNode->nodeName === 'li') { return $element->getParent(); @@ -502,7 +560,8 @@ protected static function parseStyle($attribute, $styles) foreach ($properties as $property) { list($cKey, $cValue) = array_pad(explode(':', $property, 2), 2, null); $cValue = trim($cValue); - switch (trim($cKey)) { + $cKey = strtolower(trim($cKey)); + switch ($cKey) { case 'text-decoration': switch ($cValue) { case 'underline': @@ -575,11 +634,18 @@ protected static function parseStyle($attribute, $styles) } $styles['italic'] = $tValue; break; + case 'margin': + $cValue = Converter::cssToTwip($cValue); + $styles['spaceBefore'] = $cValue; + $styles['spaceAfter'] = $cValue; + break; case 'margin-top': - $styles['spaceBefore'] = Converter::cssToPoint($cValue); + // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue) + $styles['spaceBefore'] = Converter::cssToTwip($cValue); break; case 'margin-bottom': - $styles['spaceAfter'] = Converter::cssToPoint($cValue); + // BC change: up to ver. 0.17.0 incorrectly converted to points - Converter::cssToPoint($cValue) + $styles['spaceAfter'] = Converter::cssToTwip($cValue); break; case 'border-color': self::mapBorderColor($styles, $cValue); @@ -603,10 +669,37 @@ protected static function parseStyle($attribute, $styles) } break; case 'border': - if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+)\s+([a-z]+)/', $cValue, $matches)) { - $styles['borderSize'] = Converter::cssToPoint($matches[1]); - $styles['borderColor'] = trim($matches[2], '#'); - $styles['borderStyle'] = self::mapBorderStyle($matches[3]); + case 'border-top': + case 'border-bottom': + case 'border-right': + case 'border-left': + // must have exact order [width color style], e.g. "1px #0011CC solid" or "2pt green solid" + // Word does not accept shortened hex colors e.g. #CCC, only full e.g. #CCCCCC + if (preg_match('/([0-9]+[^0-9]*)\s+(\#[a-fA-F0-9]+|[a-zA-Z]+)\s+([a-z]+)/', $cValue, $matches)) { + if (false !== strpos($cKey, '-')) { + $which = explode('-', $cKey)[1]; + $which = ucfirst($which); // e.g. bottom -> Bottom + } else { + $which = ''; + } + // Note - border width normalization: + // Width of border in Word is calculated differently than HTML borders, usually showing up too bold. + // Smallest 1px (or 1pt) appears in Word like 2-3px/pt in HTML once converted to twips. + // Therefore we need to normalize converted twip value to cca 1/2 of value. + // This may be adjusted, if better ratio or formula found. + // BC change: up to ver. 0.17.0 was $size converted to points - Converter::cssToPoint($size) + $size = Converter::cssToTwip($matches[1]); + $size = intval($size / 2); + // valid variants may be e.g. borderSize, borderTopSize, borderLeftColor, etc .. + $styles["border{$which}Size"] = $size; // twips + $styles["border{$which}Color"] = trim($matches[2], '#'); + $styles["border{$which}Style"] = self::mapBorderStyle($matches[3]); + } + break; + case 'vertical-align': + // https://developer.mozilla.org/en-US/docs/Web/CSS/vertical-align + if (preg_match('#(?:top|bottom|middle|sub|baseline)#i', $cValue, $matches)) { + $styles['valign'] = self::mapAlignVertical($matches[0]); } break; } @@ -651,14 +744,14 @@ protected static function parseImage($node, $element) case 'float': if (trim($v) == 'right') { $style['hPos'] = \PhpOffice\PhpWord\Style\Image::POS_RIGHT; - $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_PAGE; + $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_MARGIN; // inner section area $style['pos'] = \PhpOffice\PhpWord\Style\Image::POS_RELATIVE; $style['wrap'] = \PhpOffice\PhpWord\Style\Image::WRAP_TIGHT; $style['overlap'] = true; } if (trim($v) == 'left') { $style['hPos'] = \PhpOffice\PhpWord\Style\Image::POS_LEFT; - $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_PAGE; + $style['hPosRelTo'] = \PhpOffice\PhpWord\Style\Image::POS_RELTO_MARGIN; // inner section area $style['pos'] = \PhpOffice\PhpWord\Style\Image::POS_RELATIVE; $style['wrap'] = \PhpOffice\PhpWord\Style\Image::WRAP_TIGHT; $style['overlap'] = true; @@ -773,6 +866,58 @@ protected static function mapAlign($cssAlignment) } } + /** + * Transforms a HTML/CSS alignment into a \PhpOffice\PhpWord\SimpleType\Jc + * + * @param string $cssAlignment + * @return string|null + */ + protected static function mapAlignVertical($alignment) + { + $alignment = strtolower($alignment); + switch ($alignment) { + case 'top': + case 'baseline': + case 'bottom': + return $alignment; + case 'middle': + return 'center'; + case 'sub': + return 'bottom'; + case 'text-top': + case 'baseline': + return 'top'; + default: + // @discuss - which one should apply: + // - Word uses default vert. alignment: top + // - all browsers use default vert. alignment: middle + // Returning empty string means attribute wont be set so use Word default (top). + return ''; + } + } + + /** + * Map list style for ordered list + * + * @param string $cssListType + */ + protected static function mapListType($cssListType) + { + switch ($cssListType) { + case 'a': + return NumberFormat::LOWER_LETTER; // a, b, c, .. + case 'A': + return NumberFormat::UPPER_LETTER; // A, B, C, .. + case 'i': + return NumberFormat::LOWER_ROMAN; // i, ii, iii, iv, .. + case 'I': + return NumberFormat::UPPER_ROMAN; // I, II, III, IV, .. + case '1': + default: + return NumberFormat::DECIMAL; // 1, 2, 3, .. + } + } + /** * Parse line break * @@ -808,4 +953,38 @@ protected static function parseLink($node, $element, &$styles) return $element->addLink($target, $node->textContent, $styles['font'], $styles['paragraph']); } + + /** + * Render horizontal rule + * Note: Word rule is not the same as HTML's
    since it does not support width and thus neither alignment + * + * @param \DOMNode $node + * @param \PhpOffice\PhpWord\Element\AbstractContainer $element + */ + protected static function parseHorizRule($node, $element) + { + $styles = self::parseInlineStyle($node); + + //
    is implemented as an empty paragraph - extending 100% inside the section + // Some properties may be controlled, e.g.
    + + $fontStyle = $styles + ['size' => 3]; + + $paragraphStyle = $styles + [ + 'lineHeight' => 0.25, // multiply default line height - e.g. 1, 1.5 etc + 'spacing' => 0, // twip + 'spaceBefore' => 120, // twip, 240/2 (default line height) + 'spaceAfter' => 120, // twip + 'borderBottomSize' => empty($styles['line-height']) ? 1 : $styles['line-height'], + 'borderBottomColor' => empty($styles['color']) ? '000000' : $styles['color'], + 'borderBottomStyle' => 'single', // same as "solid" + ]; + + $element->addText("", $fontStyle, $paragraphStyle); + + // Notes:
    cannot be: + // - table - throws error "cannot be inside textruns", e.g. lists + // - line - that is a shape, has different behaviour + // - repeated text, e.g. underline "_", because of unpredictable line wrapping + } } diff --git a/tests/PhpWord/Shared/HtmlTest.php b/tests/PhpWord/Shared/HtmlTest.php index 5bc9e2411a..7a806c2624 100644 --- a/tests/PhpWord/Shared/HtmlTest.php +++ b/tests/PhpWord/Shared/HtmlTest.php @@ -632,4 +632,274 @@ public function testParseLetterSpacing() $this->assertTrue($doc->elementExists('/w:document/w:body/w:p/w:r/w:rPr/w:spacing')); $this->assertEquals(150 * 15, $doc->getElement('/w:document/w:body/w:p/w:r/w:rPr/w:spacing')->getAttribute('w:val')); } + + /** + * Parse widths in tables and cells, which also allows for controlling column width + */ + public function testParseTableAndCellWidth() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection([ + 'orientation' => \PhpOffice\PhpWord\Style\Section::ORIENTATION_LANDSCAPE, + ]); + + // borders & backgrounds are here just for better visual comparison + $html = << +
25% + + + + + + + + + + + + + +
400px
T2.R2.C150ptT2.R2.C3
300pxT2.R3.C3
+
+HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + // outer table grid + $xpath = '/w:document/w:body/w:tbl/w:tblGrid/w:gridCol'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals(25 * 50, $doc->getElement($xpath)->getAttribute('w:w')); + $this->assertEquals('dxa', $doc->getElement($xpath)->getAttribute('w:type')); + + //
assertTrue($doc->elementExists($xpath)); + $this->assertEquals(6000, $doc->getElement($xpath)->getAttribute('w:w')); + $this->assertEquals('dxa', $doc->getElement($xpath)->getAttribute('w:type')); + + // assertTrue($doc->elementExists($xpath)); + $this->assertEquals(4500, $doc->getElement($xpath)->getAttribute('w:w')); + $this->assertEquals('dxa', $doc->getElement($xpath)->getAttribute('w:type')); + } + + /** + * Test parsing background color for table rows and table cellspacing + */ + public function testParseCellspacingRowBgColor() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection([ + 'orientation' => \PhpOffice\PhpWord\Style\Section::ORIENTATION_LANDSCAPE, + ]); + + // borders & backgrounds are here just for better visual comparison + $html = << +
AB
CD
+HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + $xpath = '/w:document/w:body/w:tbl/w:tblPr/w:tblCellSpacing'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals(3 * 15, $doc->getElement($xpath)->getAttribute('w:w')); + $this->assertEquals('dxa', $doc->getElement($xpath)->getAttribute('w:type')); + + $xpath = '/w:document/w:body/w:tbl/w:tr[1]/w:tc[1]/w:tcPr/w:shd'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('lightgreen', $doc->getElement($xpath)->getAttribute('w:fill')); + + $xpath = '/w:document/w:body/w:tbl/w:tr[2]/w:tc[1]/w:tcPr/w:shd'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('FF0000', $doc->getElement($xpath)->getAttribute('w:fill')); + } + + /** + * Parse horizontal rule + */ + public function testParseHorizRule() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + + // borders & backgrounds are here just for better visual comparison + $html = <<Simple default rule:

+
+

Custom style rule:

+
+

END

+HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + // default rule + $xpath = '/w:document/w:body/w:p[2]/w:pPr/w:pBdr/w:bottom'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('single', $doc->getElement($xpath)->getAttribute('w:val')); // solid + $this->assertEquals('1', $doc->getElement($xpath)->getAttribute('w:sz')); // 1 twip + $this->assertEquals('000000', $doc->getElement($xpath)->getAttribute('w:color')); // black + + // custom style rule + $xpath = '/w:document/w:body/w:p[4]/w:pPr/w:pBdr/w:bottom'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('single', $doc->getElement($xpath)->getAttribute('w:val')); + $this->assertEquals(intval(5 * 15 / 2), $doc->getElement($xpath)->getAttribute('w:sz')); + $this->assertEquals('lightblue', $doc->getElement($xpath)->getAttribute('w:color')); + + $xpath = '/w:document/w:body/w:p[4]/w:pPr/w:spacing'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals(450, $doc->getElement($xpath)->getAttribute('w:before')); + $this->assertEquals(0, $doc->getElement($xpath)->getAttribute('w:after')); + $this->assertEquals(240, $doc->getElement($xpath)->getAttribute('w:line')); + } + + /** + * Parse ordered list start & numbering style + */ + public function testParseOrderedList() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + + // borders & backgrounds are here just for better visual comparison + $html = << +
  • standard ordered list line 1
  • +
  • standard ordered list line 2
  • + + +
      +
    1. ordered list alphabetical, line 5 => E
    2. +
    3. ordered list alphabetical, line 6 => F
    4. +
    + +
      +
    1. ordered list roman lower, line 3 => iii
    2. +
    3. ordered list roman lower, line 4 => iv
    4. +
    + +HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + // compare numbering file + $xmlFile = 'word/numbering.xml'; + + // default - decimal start = 1 + $xpath = '/w:numbering/w:abstractNum[1]/w:lvl[1]/w:start'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('1', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + + $xpath = '/w:numbering/w:abstractNum[1]/w:lvl[1]/w:numFmt'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('decimal', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + + // second list - start = 5, type A = upperLetter + $xpath = '/w:numbering/w:abstractNum[2]/w:lvl[1]/w:start'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('5', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + + $xpath = '/w:numbering/w:abstractNum[2]/w:lvl[1]/w:numFmt'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('upperLetter', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + + // third list - start = 3, type i = lowerRoman + $xpath = '/w:numbering/w:abstractNum[3]/w:lvl[1]/w:start'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('3', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + + $xpath = '/w:numbering/w:abstractNum[3]/w:lvl[1]/w:numFmt'; + $this->assertTrue($doc->elementExists($xpath, $xmlFile)); + $this->assertEquals('lowerRoman', $doc->getElement($xpath, $xmlFile)->getAttribute('w:val')); + } + + /** + * Parse ordered list start & numbering style + */ + public function testParseVerticalAlign() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + + // borders & backgrounds are here just for better visual comparison + $html = << + + default + top + middle + bottom +






    + + +HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + + $xpath = '/w:document/w:body/w:tbl/w:tr/w:tc[1]/w:tcPr/w:vAlign'; + $this->assertFalse($doc->elementExists($xpath)); + + $xpath = '/w:document/w:body/w:tbl/w:tr/w:tc[2]/w:tcPr/w:vAlign'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('top', $doc->getElement($xpath)->getAttribute('w:val')); + + $xpath = '/w:document/w:body/w:tbl/w:tr/w:tc[3]/w:tcPr/w:vAlign'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('center', $doc->getElement($xpath)->getAttribute('w:val')); + + $xpath = '/w:document/w:body/w:tbl/w:tr/w:tc[4]/w:tcPr/w:vAlign'; + $this->assertTrue($doc->elementExists($xpath)); + $this->assertEquals('bottom', $doc->getElement($xpath)->getAttribute('w:val')); + } + + /** + * Fix bug - don't decode double quotes inside double quoted string + */ + public function testDontDecodeAlreadyEncodedDoubleQuotes() + { + $phpWord = new \PhpOffice\PhpWord\PhpWord(); + $section = $phpWord->addSection(); + + // borders & backgrounds are here just for better visual comparison + $html = <<This would crash if inline quotes also decoded at loading XML into DOMDocument! +HTML; + + Html::addHtml($section, $html); + $doc = TestHelperDOCX::getDocument($phpWord, 'Word2007'); + $this->assertTrue(is_object($doc)); + } }