From 243a27425617c8b1f75abda9b46a930425bf716d Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 13:44:10 +0900 Subject: [PATCH 1/6] move regex logic to make-url-regex --- index.js | 20 ++++---------------- make-url-regex.js | 21 +++++++++++++++++++++ 2 files changed, 25 insertions(+), 16 deletions(-) create mode 100644 make-url-regex.js diff --git a/index.js b/index.js index a77d263..cd7912b 100644 --- a/index.js +++ b/index.js @@ -1,22 +1,10 @@ 'use strict'; -const ipRegex = require('ip-regex'); const tlds = require('tlds'); +const makeUrlRegex = require('./make-url-regex'); module.exports = options => { - options = { - strict: true, + return makeUrlRegex({ + tlds, ...options - }; - - const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`; - const auth = '(?:\\S+(?::\\S*)?@)?'; - const ip = ipRegex.v4().source; - const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)'; - const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*'; - const tld = `(?:\\.${options.strict ? '(?:[a-z\\u00a1-\\uffff]{2,})' : `(?:${tlds.sort((a, b) => b.length - a.length).join('|')})`})\\.?`; - const port = '(?::\\d{2,5})?'; - const path = '(?:[/?#][^\\s"]*)?'; - const regex = `(?:${protocol}|www\\.)${auth}(?:localhost|${ip}|${host}${domain}${tld})${port}${path}`; - - return options.exact ? new RegExp(`(?:^${regex}$)`, 'i') : new RegExp(regex, 'ig'); + }); }; diff --git a/make-url-regex.js b/make-url-regex.js new file mode 100644 index 0000000..b9002df --- /dev/null +++ b/make-url-regex.js @@ -0,0 +1,21 @@ +'use strict'; +const ipRegex = require('ip-regex'); + +module.exports = options => { + options = { + strict: true, + ...options + }; + + const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`; + const auth = '(?:\\S+(?::\\S*)?@)?'; + const ip = ipRegex.v4().source; + const host = '(?:(?:[a-z\\u00a1-\\uffff0-9][-_]*)*[a-z\\u00a1-\\uffff0-9]+)'; + const domain = '(?:\\.(?:[a-z\\u00a1-\\uffff0-9]-*)*[a-z\\u00a1-\\uffff0-9]+)*'; + const tld = `(?:\\.${options.strict ? '(?:[a-z\\u00a1-\\uffff]{2,})' : `(?:${options.tlds.sort((a, b) => b.length - a.length).join('|')})`})\\.?`; + const port = '(?::\\d{2,5})?'; + const path = '(?:[/?#][^\\s"]*)?'; + const regex = `(?:${protocol}|www\\.)${auth}(?:localhost|${ip}|${host}${domain}${tld})${port}${path}`; + + return options.exact ? new RegExp(`(?:^${regex}$)`, 'i') : new RegExp(regex, 'ig'); +}; From 13ce06161dcf134ff42733df467aebad6e0439dc Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 13:44:21 +0900 Subject: [PATCH 2/6] Add tests for explicit tlds pass-in --- test.js | 154 +++++++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 147 insertions(+), 7 deletions(-) diff --git a/test.js b/test.js index 82ae746..43f8cff 100644 --- a/test.js +++ b/test.js @@ -1,4 +1,5 @@ import test from 'ava'; +import makeUrlRegex from './make-url-regex'; import urlRegex from '.'; test('match exact URLs', t => { @@ -65,6 +66,7 @@ test('match exact URLs', t => { for (const x of fixtures) { t.true(urlRegex({exact: true}).test(x)); + t.true(makeUrlRegex({exact: true}).test(x)); } }); @@ -77,13 +79,15 @@ test('match URLs in text', t => { Foo //bar.net/?q=Query with spaces `; - t.deepEqual([ - '//dolor.sit', - 'http://example.com', - 'http://example.com/with-path', - 'https://another.example.com', - '//bar.net/?q=Query' - ], fixture.match(urlRegex())); + for (const makeRegex of [urlRegex, makeUrlRegex]) { + t.deepEqual([ + '//dolor.sit', + 'http://example.com', + 'http://example.com/with-path', + 'https://another.example.com', + '//bar.net/?q=Query' + ], fixture.match(makeRegex())); + } }); test('do not match URLs', t => { @@ -133,6 +137,7 @@ test('do not match URLs', t => { for (const x of fixtures) { t.false(urlRegex({exact: true}).test(x)); + t.false(makeUrlRegex({exact: true}).test(x)); } }); @@ -198,3 +203,138 @@ test('match using list of TLDs', t => { t.true(urlRegex({exact: true, strict: false}).test(x)); } }); + +test('match using explicit list of TLDs', t => { + const fixtures = [ + 'foo.com/blah_blah', + 'foo.com/blah_blah/', + 'foo.com/blah_blah_(wikipedia)', + 'foo.com/blah_blah_(wikipedia)_(again)', + 'www.example.com/wpstyle/?p=364', + 'www.example.com/foo/?bar=baz&inga=42&quux', + 'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.com', + 'mw1.google.com/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg', + 'user:pass@example.com:123/one/two.three?q1=a1&q2=a2#body', + 'www.microsoft.xn--comindex-g03d.html.irongeek.com', + '✪df.ws/123', + 'localhost/', + 'userid:password@example.com:8080', + 'userid:password@example.com:8080/', + 'userid@example.com', + 'userid@example.com/', + 'userid@example.com:8080', + 'userid@example.com:8080/', + 'userid:password@example.com', + 'userid:password@example.com/', + '142.42.1.1/', + '142.42.1.1:8080/', + '➡.ws/䨹', + '⌘.ws', + '⌘.ws/', + 'foo.com/blah_(wikipedia)#cite-1', + 'foo.com/blah_(wikipedia)_blah#cite-1', + 'foo.com/unicode_(✪)_in_parens', + 'foo.com/(something)?after=parens', + '☺.damowmow.com/', + 'code.google.com/events/#&product=browser', + 'j.mp', + 'foo.bar/baz', + 'foo.bar/?q=Test%20URL-encoded%20stuff', + '-.~_!$&\'()*+\';=:%40:80%2f::::::@example.com', + '1337.net', + 'a.b-c.de', + '223.255.255.254', + 'example.com?foo=bar', + 'example.com#foo', + 'localhost:8080', + 'foo.ws', + 'a.b-c.de', + '223.255.255.254', + 'userid:password@example.com', + '➡.ws/䨹', + '//localhost:8080', + '//foo.ws', + '//a.b-c.de', + '//223.255.255.254', + '//userid:password@example.com', + '//➡.ws/䨹', + 'www.google.com/unicorn', + 'example.com.', + 'example.onion', + 'unicorn.education', + '//➡.onion/䨹', + 'userid:password@example.education', + '-.~_!$&\'()*+\';=:%40:80%2f::::::@example.onion', + 'mw1.unicorn.education/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg', + 'www.example.onion/wpstyle/?p=364' + ]; + + for (const x of fixtures) { + t.true(makeUrlRegex( + {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar', 'onion', 'education']} + ).test(x)); + } +}); + +test('fail if not in explicit list of TLDs', t => { + const fixtures = [ + 'foo.baz/blah_blah', + 'foo.co.uk/blah_blah/', + 'foo.biz/blah_blah_(wikipedia)', + 'foo.onion/blah_blah_(wikipedia)_(again)', + 'www.example.education/wpstyle/?p=364', + 'www.example.biz/foo/?bar=baz&inga=42&quux', + 'a.b.c.d.e.f.g.h.i.j.k.l.m.n.o.p.q.r.s.t.u.v.w.x.y.z.biz', + 'mw1.google.biz/mw-earth-vectordb/kml-samples/gp/seattle/gigapxl/$[level]/r$[y]_c$[x].jpg', + 'user:pass@example.biz:123/one/two.three?q1=a1&q2=a2#body', + 'www.microsoft.xn--comindex-g03d.html.irongeek.biz', + '✪df.onion/123', + 'userid:password@example.biz:8080', + 'userid:password@example.biz:8080/', + 'userid@example.biz', + 'userid@example.biz/', + 'userid@example.biz:8080', + 'userid@example.biz:8080/', + 'userid:password@example.biz', + 'userid:password@example.biz/', + '➡.onion/䨹', + '⌘.onion', + '⌘.onion/', + 'foo.biz/blah_(wikipedia)#cite-1', + 'foo.biz/blah_(wikipedia)_blah#cite-1', + 'foo.biz/unicode_(✪)_in_parens', + 'foo.biz/(something)?after=parens', + '☺.damowmow.biz/', + 'code.google.biz/events/#&product=browser', + 'j.onion', + 'foo.baz/baz', + 'foo.baz/?q=Test%20URL-encoded%20stuff', + '-.~_!$&\'()*+\';=:%40:80%2f::::::@example.biz', + '1337.biz', + 'a.b-c.ly', + 'example.biz?foo=bar', + 'example.biz#foo', + 'foo.jp', + 'a.b-c.cn', + 'userid:password@example.biz', + '➡.uk/䨹', + '//foo.uk', + '//a.b-c.uk', + '//userid:password@example.biz', + '//➡.cn/䨹', + 'www.google.biz/unicorn', + 'example.biz.' + ]; + + for (const x of fixtures) { + if (makeUrlRegex( + {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']} + ).test(x)) { + console.log(x); + } + + t.false(makeUrlRegex( + {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']} + ).test(x)); + } +}); From 8487fe1af9d8c18157c964b41dea74883d6959c9 Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 13:54:43 +0900 Subject: [PATCH 3/6] check for tlds presence when strict is false --- make-url-regex.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/make-url-regex.js b/make-url-regex.js index b9002df..4ddd647 100644 --- a/make-url-regex.js +++ b/make-url-regex.js @@ -6,6 +6,9 @@ module.exports = options => { strict: true, ...options }; + if (!options.strict && !options.tlds) { + throw new Error('Need to provide `tlds` option if `strict === false`'); + } const protocol = `(?:(?:[a-z]+:)?//)${options.strict ? '' : '?'}`; const auth = '(?:\\S+(?::\\S*)?@)?'; From f21aa53f0b825e884c6ec401f33fc5934bb6d851 Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 13:55:56 +0900 Subject: [PATCH 4/6] remove console log in test --- test.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test.js b/test.js index 43f8cff..7ad69bf 100644 --- a/test.js +++ b/test.js @@ -327,11 +327,11 @@ test('fail if not in explicit list of TLDs', t => { ]; for (const x of fixtures) { - if (makeUrlRegex( + t.false(makeUrlRegex( {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']} - ).test(x)) { - console.log(x); - } + ).test(x)); + } +}); t.false(makeUrlRegex( {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']} From 3d0e7286e42eecdea062be6f45972f08eee63faa Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 13:59:38 +0900 Subject: [PATCH 5/6] test for makeUrlRegex failure if tlds missing, strict false --- test.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test.js b/test.js index 7ad69bf..2164d76 100644 --- a/test.js +++ b/test.js @@ -333,8 +333,8 @@ test('fail if not in explicit list of TLDs', t => { } }); - t.false(makeUrlRegex( - {exact: true, strict: false, tlds: ['com', 'ws', 'de', 'net', 'mp', 'bar']} - ).test(x)); - } +test('fail for makeUrlRegex if tlds flag not present, strict false', t => { + t.throws(() => { + makeUrlRegex({exact: true, strict: false}).test('http://google.com'); + }, {message: /tlds/}); }); From cad42f546a7f6dbb516d6ef1a9743ab77d975eeb Mon Sep 17 00:00:00 2001 From: Omar Diab Date: Fri, 18 Oct 2019 14:04:43 +0900 Subject: [PATCH 6/6] include make-url-regex.js in package.json --- package.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/package.json b/package.json index 4854ba7..d0db5f6 100644 --- a/package.json +++ b/package.json @@ -17,7 +17,8 @@ }, "files": [ "index.js", - "index.d.ts" + "index.d.ts", + "make-url-regex.js" ], "keywords": [ "regex",