Skip to content

Commit 17138ae

Browse files
authored
Merge pull request #271 from Sprachprofi/patch-2
Extracting 'words' in the commonly-understood sense of the word
2 parents 62f3667 + ce2df14 commit 17138ae

File tree

2 files changed

+54
-1
lines changed

2 files changed

+54
-1
lines changed

lib/core/facets/string/words.rb

+13-1
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,18 @@ class String
77
def words
88
self.split(/\s+/)
99
end
10-
10+
11+
# Returns an array of words in the commonly-understood sense (not including punctuation).
12+
# This takes into account international punctuation characters as well as English ones.
13+
#
14+
# 'Slowly, grudgingly he said: "This has to stop."'.words
15+
# => ["Slowly", "grudgingly", "he", "said", "This", "has", "to", "stop"]
16+
def words_without_punctuation
17+
s = self.dup
18+
s.gsub!(/[.?¿¡…!,::;—"。?!、‘“”„«»〈〉《》,\/\[\]]/, ' ')
19+
s.gsub!('- ', ' ')
20+
s.squeeze!(" ")
21+
s.strip.split(" ")
22+
end
1123
end
1224

test/core/string/test_words.rb

+41
Original file line numberDiff line numberDiff line change
@@ -20,5 +20,46 @@
2020
end
2121

2222
end
23+
24+
method :words_without_punctuation do
25+
26+
test do
27+
# English
28+
x = "How?? I don't believe you!!!"
29+
x.words_without_punctuation.assert == ['How', 'I', "don't", 'believe', 'you']
30+
x = 'Slowly, grudgingly he said: "This has to stop."'
31+
x.words_without_punctuation.assert == ['Slowly', 'grudgingly', 'he', 'said', 'This', 'has', 'to', 'stop']
32+
end
33+
34+
test do
35+
# French
36+
x = "« Bonjour ! J'ai rendezvous avec mademoiselle Dupont-Fleury ! »"
37+
x.words_without_punctuation.assert == ['Bonjour', "J'ai", "rendezvous", "avec", "mademoiselle", "Dupont-Fleury"]
38+
end
39+
40+
test do
41+
# Spanish
42+
x = "«¡María, te amo!», exclamó Juan. … «¿Por qué me sigues mintiendo?"
43+
x.words_without_punctuation.assert == ['María', 'te', 'amo', 'exclamó', 'Juan', 'Por', 'qué', 'me', 'sigues', 'mintiendo']
44+
end
45+
46+
test do
47+
# Italian
48+
x = 'Alcune persone scrivono al computer; altre con la penna: io con le due.'
49+
x.words_without_punctuation.assert == ['Alcune', 'persone', 'scrivono', 'al', 'computer', 'altre', 'con', 'la', 'penna', 'io', 'con', 'le', 'due']
50+
end
51+
52+
test do
53+
# German
54+
x = '“chevron,” „französische“ Anführungszeichen'
55+
x.words_without_punctuation.assert == ['chevron', 'französische', 'Anführungszeichen']
56+
end
57+
58+
test do
59+
# Russian
60+
x = '"А ты прав." — сказал он, — "Я великолепен!".'
61+
x.words_without_punctuation.assert == ['А', 'ты', 'прав', 'сказал', 'он', 'Я', 'великолепен']
62+
end
63+
end
2364

2465
end

0 commit comments

Comments
 (0)