File tree Expand file tree Collapse file tree 1 file changed +20
-3
lines changed Expand file tree Collapse file tree 1 file changed +20
-3
lines changed Original file line number Diff line number Diff line change @@ -28,14 +28,31 @@ public function __construct(Extractor $extractor)
2828
2929 $ encoding = null ;
3030 $ contentType = $ extractor ->getResponse ()->getHeaderLine ('content-type ' );
31- preg_match ('/charset=" ?(.*?)(?=$|\s|;|")/i ' , $ contentType , $ match );
31+ preg_match ('/charset=(?:"| \' ) ?(.*?)(?=$|\s|;|"| \' |> )/i ' , $ contentType , $ match );
3232 if (!empty ($ match [1 ])) {
3333 $ encoding = trim ($ match [1 ], ', ' );
34- } elseif (!empty ($ html )) {
35- preg_match ('/charset="?(.*?)(?=$|\s|;|")/i ' , $ html , $ match );
34+ try {
35+ $ ret = mb_encoding_aliases ($ encoding );
36+ if ($ ret === false ) {
37+ $ encoding = null ;
38+ }
39+ } catch (\ValueError $ exception ) {
40+ $ encoding = null ;
41+ }
42+ }
43+ if (is_null ($ encoding ) && !empty ($ html )) {
44+ preg_match ('/charset=(?:"| \')?(.*?)(?=$|\s|;|"| \'|>)/i ' , $ html , $ match );
3645 if (!empty ($ match [1 ])) {
3746 $ encoding = trim ($ match [1 ], ', ' );
3847 }
48+ try {
49+ $ ret = mb_encoding_aliases ($ encoding );
50+ if ($ ret === false ) {
51+ $ encoding = null ;
52+ }
53+ } catch (\ValueError $ exception ) {
54+ $ encoding = null ;
55+ }
3956 }
4057 $ this ->document = !empty ($ html ) ? Parser::parse ($ html , $ encoding ) : new DOMDocument ();
4158 $ this ->initXPath ();
You can’t perform that action at this time.
0 commit comments