diff --git a/lib/etl/engine.rb b/lib/etl/engine.rb index ccc7825..444dcf9 100644 --- a/lib/etl/engine.rb +++ b/lib/etl/engine.rb @@ -310,12 +310,13 @@ def process_batch(batch) def process_control(control) control = ETL::Control.resolve(control) say_on_own_line "Processing control #{control.file}" - - ETL::Engine.job = ETL::Execution::Job.create!( - :control_file => control.file, - :status => 'executing', - :batch_id => ETL::Engine.batch ? ETL::Engine.batch.id : nil - ) + + ETL::Engine.job = ETL::Execution::Job.new.tap do |job| + job.control_file = control.file + job.status = 'executing' + job.batch_id = ETL::Engine.batch ? ETL::Engine.batch.id : nil + job.save! + end execute_dependencies(control) diff --git a/lib/etl/parser/csv_parser.rb b/lib/etl/parser/csv_parser.rb index 4a79bfe..cd015f8 100644 --- a/lib/etl/parser/csv_parser.rb +++ b/lib/etl/parser/csv_parser.rb @@ -9,6 +9,8 @@ def initialize(source, options={}) super configure end + + attr_reader :validate_rows def get_fields_names(file) File.open(file) do |input| @@ -43,7 +45,7 @@ def each end line += 1 row = {} - validate_row(raw_row, line, file) + validate_row(raw_row, line, file) if self.validate_rows raw_row.each_with_index do |value, index| f = fields[index] row[f.name] = value @@ -70,6 +72,12 @@ def validate_row(row, line, file) end def configure + @validate_rows = if source.configuration.has_key?(:validate_rows) + source.configuration[:validate_rows] + else + true + end + source.definition.each do |options| case options when Symbol diff --git a/lib/etl/parser/excel_parser.rb b/lib/etl/parser/excel_parser.rb index df23469..342c39c 100644 --- a/lib/etl/parser/excel_parser.rb +++ b/lib/etl/parser/excel_parser.rb @@ -1,10 +1,10 @@ -optional_require 'spreadsheet' +optional_require 'roo' module ETL class Parser class ExcelParser < ETL::Parser - attr_accessor :ignore_blank_line + attr_accessor :ignore_blank_line, :worksheet_column, :validate_rows # Initialize the parser # * source: The Source object @@ -20,19 +20,29 @@ def each ETL::Engine.logger.debug "parsing #{file}" line = 0 lines_skipped = 0 - book = Spreadsheet.open file + book = Roo::Spreadsheet.open file loopworksheets = [] if worksheets.empty? - loopworksheets = book.worksheets + loopworksheets = book.sheets else worksheets.each do |index| - loopworksheets << book.worksheet(index) + loopworksheets << book.sheet(index) end end + + sheet_index = -1 - loopworksheets.each do |sheet| + book.each_with_pagename do |name, sheet| + sheet_index += 1 + # puts "Sheet: #{name}" + # puts worksheets.inspect + if !worksheets.empty? && !worksheets.include?(sheet_index) + # puts "No!!! #{sheet_index.inspect}" + next + end sheet.each do |raw_row| + # puts raw_row.inspect if lines_skipped < source.skip_lines ETL::Engine.logger.debug "skipping line" lines_skipped += 1 @@ -44,11 +54,12 @@ def each lines_skipped += 1 next end - validate_row(raw_row, line, file) + validate_row(raw_row, line, file) if self.validate_rows raw_row.each_with_index do |value, index| f = fields[index] row[f.name] = value end + row[worksheet_column] = name if worksheet_column yield row end end @@ -87,6 +98,12 @@ def configure end unless source.definition[:worksheets].nil? self.ignore_blank_line = source.definition[:ignore_blank_line] + self.worksheet_column = source.definition[:worksheet_column] + self.validate_rows = if source.configuration.has_key?(:validate_rows) + source.configuration[:validate_rows] + else + true + end source.definition[:fields].each do |options| case options diff --git a/spec/fixtures/data/excel2.xls b/spec/fixtures/data/excel2.xls index f87c087..d98fe86 100644 Binary files a/spec/fixtures/data/excel2.xls and b/spec/fixtures/data/excel2.xls differ diff --git a/spec/fixtures/excel2.ctl b/spec/fixtures/excel2.ctl index 31523c3..08e9099 100644 --- a/spec/fixtures/excel2.ctl +++ b/spec/fixtures/excel2.ctl @@ -11,7 +11,10 @@ source :in, { :ssn, :age, :sex - ] + ] #, + # Add worksheet column e.g. + # In case the schemas of sheets are the same but their data should be differentiable as such. + # :worksheet_column => :name_info } transform :ssn, :sha1