Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 1 addition & 2 deletions lib/calculations/relative_date_calculation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,6 @@ class RelativeDateCalculation
def relative_date(date_relative_to)
return nil if RelativeDateTerm.empty?

date_relative_to if
RelativeDateDetector::SAME_DAY_TERMS.include?(RelativeDateTerm.first.text)
date_relative_to if RelativeDateTerm.first.text =~ RelativeDateDetector::SAME_DAY_TERMS
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🎉

end
end
28 changes: 13 additions & 15 deletions lib/detectors/currency_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,23 +2,21 @@
require_relative '../models/currency_term'

class CurrencyDetector
EUR_SYMBOLS = %w(EUREuro).freeze
USD_SYMBOLS = %w(USD $).freeze
HKD_SYMBOLS = %w(HKD $).freeze
CHF_SYMBOLS = %w(CHF).freeze
CNY_SYMBOLS = %w(CNY).freeze
SEK_SYMBOLS = %w(SEK).freeze
GBP_SYMBOLS = %w(GBP £).freeze
HUF_SYMBOLS = %w(HUF Ft. Ft).freeze
HRK_SYMBOLS = %w(HRK).freeze
ALL_SYMBOLS = EUR_SYMBOLS + USD_SYMBOLS + HKD_SYMBOLS +
CHF_SYMBOLS + CNY_SYMBOLS + SEK_SYMBOLS + GBP_SYMBOLS +
HUF_SYMBOLS + HRK_SYMBOLS
EUR_SYMBOLS = /EUR|€|Euro/
USD_SYMBOLS = /USD|\$/
HKD_SYMBOLS = /HKD/
CHF_SYMBOLS = /CHF/
CNY_SYMBOLS = /CNY/
SEK_SYMBOLS = /SEK/
GBP_SYMBOLS = /GBP|£/
HUF_SYMBOLS = /HUF|Ft.|Ft/
HRK_SYMBOLS = /HRK/
ALL_SYMBOLS = [EUR_SYMBOLS, USD_SYMBOLS, HKD_SYMBOLS,
CHF_SYMBOLS, CNY_SYMBOLS, SEK_SYMBOLS, GBP_SYMBOLS,
HUF_SYMBOLS, HRK_SYMBOLS].freeze

def self.filter
currencies_regex = /#{ALL_SYMBOLS.map { |s| Regexp.quote(s) }.join('|')}/
find_currencies(currencies_regex)

ALL_SYMBOLS.map { |s| find_currencies(s) }
CurrencyTerm.dataset
end

Expand Down
9 changes: 3 additions & 6 deletions lib/detectors/invoice_number_label_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,22 +2,19 @@
require_relative '../models/invoice_number_label_term'

class InvoiceNumberLabelDetector
INVOICE_NUMBER_LABELS = %w(
Re-Nr: Bon-ID Rech.Nr: Beleg-nr.: Rechnungsnummer: Rechnung: Rechnungsnummer
).freeze
INVOICE_NUMBER_LABELS =
/(Re-Nr:|Bon-ID|Rech.Nr:|Beleg-nr.:|Rechnungsnummer:|Rechnung:|Rechnungsnummer)/
MULTI_WORD_INVOICE_NUMBER_LABELS_REGEX =
/(Invoice number:|Billing ID:|Rechnung Nr.:)/

def self.filter
invoice_number_label_regexes =
/#{INVOICE_NUMBER_LABELS.map { |s| Regexp.quote(s) }.join('|')}/
end_word_with_space = ->(term) { term.text += ' ' }

find_invoice_number_labels(
MULTI_WORD_INVOICE_NUMBER_LABELS_REGEX,
after_each_word: end_word_with_space
)
find_invoice_number_labels(invoice_number_label_regexes)
find_invoice_number_labels(INVOICE_NUMBER_LABELS)

InvoiceNumberLabelTerm.dataset
end
Expand Down
3 changes: 2 additions & 1 deletion lib/detectors/price_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,8 @@ def self.filter
filter_out_dates

end_word_with_space = ->(term) { term.text += ' ' }
unless Word.where(text: CurrencyDetector::HUF_SYMBOLS).empty?
# binding.pry
unless Word.where(text: ['Ft.', 'Ft']).empty?
find_prices(
HUNGARIAN_PRICE_REGEX,
after_each_word: end_word_with_space,
Expand Down
6 changes: 2 additions & 4 deletions lib/detectors/relative_date_detector.rb
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@
require_relative '../models/relative_date_term'

class RelativeDateDetector
SAME_DAY_TERMS = ['prompt', 'Fällig bei Erhalt', 'Fällig nach Erhalt'].freeze
ALL_REL_WORDS = SAME_DAY_TERMS
SAME_DAY_TERMS = /(prompt|Fällig bei Erhalt|Fällig nach Erhalt)/

def self.filter
relative_regex = /#{ALL_REL_WORDS.map { |s| Regexp.quote(s) }.join('|')}/
end_word_with_space = ->(term) { term.text += ' ' }
find_relative_words(relative_regex,
find_relative_words(SAME_DAY_TERMS,
after_each_word: end_word_with_space)

RelativeDateTerm.dataset
Expand Down
3 changes: 1 addition & 2 deletions spec/detectors/currency_detector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -478,7 +478,6 @@
)

currencies = CurrencyDetector.filter
currencies_regex = /#{CurrencyDetector::ALL_SYMBOLS.map { |s| Regexp.quote(s) }.join('|')}/
expect(currencies.map(&:regex)).to eq [currencies_regex.to_s]
expect(currencies.map(&:regex)).to eq [CurrencyDetector::EUR_SYMBOLS.to_s]
end
end
261 changes: 0 additions & 261 deletions spec/detectors/price_detector_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -586,87 +586,6 @@
expect(prices.map(&:text)).to eq(%w(14,90))
end

it 'does not detect pieces as prices' do
# From 29pwjsKx88nhnQKm9.pdf
# Dummy dimension values for the bill
BillDimension.create_image_dimensions(width: 3056, height: 4324)

create(
:word,
text: 'Menge',
left: 0.5791884816753927,
right: 0.6292539267015707,
top: 0.42298797409805733,
bottom: 0.4338575393154487
)

create(
:word,
text: '1,00',
left: 0.5978403141361257,
right: 0.6243455497382199,
top: 0.4551341350601295,
bottom: 0.4641535615171138
)

create(
:word,
text: '8,50',
left: 0.6861910994764397,
right: 0.7143324607329843,
top: 0.4551341350601295,
bottom: 0.4641535615171138
)

create(
:word,
text: '2,00',
left: 0.5965314136125655,
right: 0.6243455497382199,
top: 0.48149861239592967,
bottom: 0.49051803885291395
)

create(
:word,
text: '12,00',
left: 0.8900523560209425,
right: 0.925392670157068,
top: 0.48149861239592967,
bottom: 0.49051803885291395
)

create(
:word,
text: '1,00',
left: 0.5978403141361257,
right: 0.6243455497382199,
top: 0.5212765957446809,
bottom: 0.5302960222016652
)

create(
:word,
text: '4,00',
left: 0.6858638743455497,
right: 0.7143324607329843,
top: 0.5212765957446809,
bottom: 0.5302960222016652
)

create(
:word,
text: '501.1.0663',
left: 0.24083769633507854,
right: 0.31544502617801046,
top: 0.4946808510638298,
bottom: 0.5023126734505088
)

prices = PriceDetector.filter
expect(prices.map(&:text)).to eq ['8,50', '12,00', '4,00']
end

it 'detects negative prices' do
# From 2D7BuHc3f8wAmb4y8.pdf
# Dummy dimension values for the bill
Expand All @@ -685,60 +604,6 @@
expect(prices.map(&:text)).to eq ['-12,00']
end

it 'does not detect weights below the word kg as prices' do
# From WmcA2uThGP5QaaciP.pdf
# Dummy dimension values for the bill
BillDimension.create_image_dimensions(width: 3056, height: 4324)

create(
:word,
text: 'kg',
left: 0.7480366492146597,
right: 0.7673429319371727,
top: 0.3856845031271717,
bottom: 0.39587676627287466
)

create(
:word,
text: '123,00',
left: 0.7081151832460733,
right: 0.7653795811518325,
top: 0.41463979615473706,
bottom: 0.4232105628908965
)

prices = PriceDetector.filter
expect(prices).to be_empty
end

it 'does not detect numbers below Anz.' do
# From ihfDXTa64yYbFLa6Y.pdf
# Dummy dimension values for the bill
BillDimension.create_image_dimensions(width: 3056, height: 4324)

create(
:word,
text: 'Anz.',
left: 0.1197252208047105,
right: 0.1573438011122015,
top: 0.475937066173068,
bottom: 0.4854234150856085
)

create(
:word,
text: '32,00',
left: 0.1122015047432123,
right: 0.1573438011122015,
top: 0.5016196205460435,
bottom: 0.512494215640907
)

prices = PriceDetector.filter
expect(prices.map(&:text)).to be_empty
end

it 'does not detect prices in a long weird word' do
# from bill 2AQDJZ5Nrhva2Qhug.pdf
# Dummy dimension values for the bill
Expand Down Expand Up @@ -1036,78 +901,6 @@
expect(prices.map(&:regex)).to eq [PriceDetector::DECIMAL_PRICE_REGEX.to_s]
end

it 'detects prices which are not in part of a quantity' do
# From rDxLnivxoXQw9nWa7.pdf
# Dummy dimension values for the bill
BillDimension.create_image_dimensions(width: 3056, height: 4324)

create(
:word,
text: 'Menge',
left: 0.5785340314136126,
right: 0.6285994764397905,
top: 0.4225254394079556,
bottom: 0.4333950046253469
)

create(
:word,
text: '1,00',
left: 0.5971858638743456,
right: 0.6236910994764397,
top: 0.5074005550416282,
bottom: 0.5164199814986123
)

create(
:word,
text: '1,00',
left: 0.5971858638743456,
right: 0.6236910994764397,
top: 0.5208140610545791,
bottom: 0.5298334875115633
)

create(
:word,
text: '5,00',
left: 0.6855366492146597,
right: 0.7136780104712042,
top: 0.5208140610545791,
bottom: 0.5298334875115633
)

create(
:word,
text: '431,25',
left: 0.09325916230366492,
right: 0.1387434554973822,
top: 0.8237742830712304,
bottom: 0.8327937095282146
)

create(
:word,
text: '86,25',
left: 0.21171465968586387,
right: 0.24803664921465968,
top: 0.8237742830712304,
bottom: 0.8327937095282146
)

create(
:word,
text: '517,50',
left: 0.837696335078534,
right: 0.8828534031413613,
top: 0.8237742830712304,
bottom: 0.8327937095282146
)

prices = PriceDetector.filter
expect(prices.map(&:text)).to eq ['5,00', '431,25', '86,25', '517,50']
end

it 'does not detect MWST as price' do
# From C5sri9hxpbDhha68D.png
# Dummy dimension values for the bill
Expand Down Expand Up @@ -1135,60 +928,6 @@
expect(prices.map(&:text)).to be_empty
end

it 'does not detect date as price' do
# From C5sri9hxpbDhha68D.png
# Dummy dimension values for the bill
BillDimension.create_image_dimensions(width: 3056, height: 4324)

create(
:word,
text: '10',
left: 0.521978021978022,
right: 0.5560439560439561,
top: 0.724,
bottom: 0.737
)

create(
:word,
text: '.',
left: 0.5659340659340659,
right: 0.5692307692307692,
top: 0.735,
bottom: 0.737
)

create(
:word,
text: '05',
left: 0.578021978021978,
right: 0.6131868131868132,
top: 0.724,
bottom: 0.737
)

create(
:word,
text: '.',
left: 0.6230769230769231,
right: 0.6263736263736264,
top: 0.735,
bottom: 0.737
)

create(
:word,
text: '17',
left: 0.6373626373626373,
right: 0.6703296703296703,
top: 0.724,
bottom: 0.7375
)

prices = PriceDetector.filter
expect(prices.map(&:text)).to be_empty
end

# TODO: Move to general helpers
def create_following_words(texts)
texts.each_with_index do |text, index|
Expand Down
Loading