From df1dcf55085edcf0e1ae7f3d8da0591344b12d32 Mon Sep 17 00:00:00 2001 From: Nelson Elhage Date: Tue, 22 Jan 2013 15:02:58 -0800 Subject: [PATCH] Initial open-source release --- .gitignore | 2 + Gemfile | 4 + Gemfile.lock | 48 +++++ README.md | 160 ++++++++++++++++ Rakefile | 12 ++ bin/mosql | 7 + lib/mosql.rb | 11 ++ lib/mosql/cli.rb | 305 ++++++++++++++++++++++++++++++ lib/mosql/log.rb | 7 + lib/mosql/schema.rb | 149 +++++++++++++++ lib/mosql/sql.rb | 59 ++++++ lib/mosql/tailer.rb | 36 ++++ lib/mosql/version.rb | 3 + mosql.gemspec | 23 +++ test/_lib.rb | 18 ++ test/functional/_lib.rb | 65 +++++++ test/functional/functional.rb | 7 + test/functional/schema.rb | 66 +++++++ test/functional/sql.rb | 38 ++++ test/unit/lib/mongo-sql/schema.rb | 102 ++++++++++ 20 files changed, 1122 insertions(+) create mode 100644 .gitignore create mode 100644 Gemfile create mode 100644 Gemfile.lock create mode 100644 README.md create mode 100644 Rakefile create mode 100755 bin/mosql create mode 100644 lib/mosql.rb create mode 100644 lib/mosql/cli.rb create mode 100644 lib/mosql/log.rb create mode 100644 lib/mosql/schema.rb create mode 100644 lib/mosql/sql.rb create mode 100644 lib/mosql/tailer.rb create mode 100644 lib/mosql/version.rb create mode 100644 mosql.gemspec create mode 100644 test/_lib.rb create mode 100644 test/functional/_lib.rb create mode 100644 test/functional/functional.rb create mode 100644 test/functional/schema.rb create mode 100644 test/functional/sql.rb create mode 100644 test/unit/lib/mongo-sql/schema.rb diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..b23e426 --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +collections.yml +/.bundle/ diff --git a/Gemfile b/Gemfile new file mode 100644 index 0000000..9e9ddcc --- /dev/null +++ b/Gemfile @@ -0,0 +1,4 @@ +source 'https://rubygems.org' + +gemspec + diff --git a/Gemfile.lock b/Gemfile.lock new file mode 100644 index 0000000..8132150 --- /dev/null +++ b/Gemfile.lock @@ -0,0 +1,48 @@ +GIT + remote: git@github.com:stripe-internal/mongoriver + revision: d5b5ca1471f9efe7c91b3abe2c26f612a2dd4e9c + ref: d5b5ca1471f9efe7c91b3abe2c26f612a2dd4e9c + specs: + mongoriver (0.0.1) + bson_ext + log4r + mongo (>= 1.7) + +PATH + remote: . + specs: + mosql (0.0.1) + bson_ext + json + log4r + mongo + pg + rake + sequel + +GEM + remote: https://intgems.stripe.com:446/ + specs: + bson (1.7.1) + bson_ext (1.7.1) + bson (~> 1.7.1) + json (1.7.5) + log4r (1.1.10) + metaclass (0.0.1) + minitest (3.0.0) + mocha (0.10.5) + metaclass (~> 0.0.1) + mongo (1.7.1) + bson (~> 1.7.1) + pg (0.14.1) + rake (10.0.2) + sequel (3.41.0) + +PLATFORMS + ruby + +DEPENDENCIES + minitest + mocha + mongoriver! + mosql! diff --git a/README.md b/README.md new file mode 100644 index 0000000..94eee98 --- /dev/null +++ b/README.md @@ -0,0 +1,160 @@ +# MoSQL: a MongoDB → SQL streaming translator + +At Stripe, we love MongoDB. We love the flexibility it gives us in +changing data schemas as we grow and learn, and we love its +operational properties. We love replsets. We love the uniform query +language that doesn't require generating and parsing strings, tracking +placeholder parameters, or any of that nonsense. + +The thing is, we also love SQL. We love the ease of doing ad-hoc data +analysis over small-to-mid-size datasets in SQL. We love doing JOINs +to pull together reports summarizing properties across multiple +datasets. We love the fact that virtually every employee we hire +already knows SQL and is comfortable using it to ask and answer +questions about data. + +So, we thought, why can't we have the best of both worlds? Thus: +MoSQL. + +# MoSQL: Put Mo' SQL in your NoSQL + +![MoSQL](https://stripe.com/img/blog/posts/mosql/mosql.png) + +MoSQL imports the contents of your MongoDB database cluster into a +PostgreSQL instance, using an oplog tailer to keep the SQL mirror live +up-to-date. This lets you run production services against a MongoDB +database, and then run offline analytics or reporting using the full +power of SQL. + +## Installation + +Install from Rubygems as: + + $ gem install mosql + +Or build from source by: + + $ gem build mosql.gemspec + +And then install the built gem. + +## The Collection Map file + +In order to define a SQL schema and import your data, MoSQL needs a +collection map file describing the schema of your MongoDB data. (Don't +worry -- MoSQL can handle it if your mongo data doesn't always exactly +fit the stated schema. More on that later). + +The collection map is a YAML file describing the databases and +collections in Mongo that you want to import, in terms of their SQL +types. An example collection map might be: + + + mongodb: + blog_posts: + :columns: + - _id: TEXT + - author: TEXT + - title: TEXT + - created: DOUBLE PRECISION + :meta: + :table: blog_posts + :extra_props: true + +Said another way, the collection map is a YAML file containing a hash +mapping + + -> { -> } + +Where a `` is a hash with `:columns` and +`:meta` fields. `:columns` is a list of one-element hashes, mapping +field-name to SQL type. It is required to include at least an `_id` +mapping. `:meta` contains metadata about this collection/table. It is +required to include at least `:table`, naming the SQL table this +collection will be mapped to. `extra_props` determines the handling of +unknown fields in MongoDB objects -- more about that later. + +By default, `mosql` looks for a collection map in a file named +`collections.yml` in your current working directory, but you can +specify a different one with `-c` or `--collections`. + +## Usage + +Once you have a collection map. MoSQL usage is easy. The basic form +is: + + mosql [-c collections.yml] [--sql postgres://sql-server/sql-db] [--mongo mongodb://mongo-uri] + +By default, `mosql` connects to both PostgreSQL and MongoDB instances +running on default ports on localhost without authentication. You can +point it at different targets using the `--sql` and `--mongo` +command-line parameters. + +`mosql` will: + + 1. Create the appropriate SQL tables + 2. Import data from the Mongo database + 3. Start tailing the mongo oplog, propogating changes from MongoDB to SQL. + + +After the first run, `mosql` will store the status of the optailer in +the `mongo_sql` table in your SQL database, and automatically resume +where it left off. `mosql` uses the replset name to keep track of +which mongo database it's tailing, so that you can tail multiple +databases into the same SQL database. If you want to tail the same +replSet, or multiple replSets with the same name, for some reason, you +can use the `--service` flag to change the name `mosql` uses to track +state. + +You likely want to run `mosql` against a secondary node, at least for +the initial import, which will cause large amounts of disk activity on +the target node. One option is to use read preferences in your +connection URI: + + mosql --mongo mongodb://node1,node2,node3?readPreference=secondary + +## Advanced usage + +For advanced scenarios, you can pass options to control mosql's +behavior. If you pass `--skip-tail`, mosql will do the initial import, +but not tail the oplog. This could be used, for example, to do an +import off of a backup snapshot, and then start the tailer on the live +cluster. + +If you need to force a fresh reimport, run `--reimport`, which will +cause `mosql` to drop tables, create them anew, and do another import. + +## Schema mismatches and _extra_props + +If MoSQL encounters values in the MongoDB database that don't fit +within the stated schema (e.g. a floating-point value in a INTEGER +field), it will log a warning, ignore the entire object, and continue. + +If it encounters a MongoDB object with fields not listed in the +collection map, it will discard the extra fields, unless +`:extra_props` is set in the `:meta` hash. If it is, it will collect +any missing fields, JSON-encode them in a hash, and store the +resulting text in `_extra_props` in SQL. It's up to you to do +something useful with the JSON. One option is to use [plv8][plv8] to +parse them inside PostgreSQL, or you can just pull the JSON out whole +and parse it in application code. + +This is also currently the only way to handle array or object values +inside records -- specify `:extra_props`, and they'll get JSON-encoded +into `_extra_props`. There's no reason we couldn't support +JSON-encoded values for individual columns/fields, but we haven't +written that code yet. + +[plv8]: http://code.google.com/p/plv8js/ + +# Development + +Patches and contributions are welcome! Please fork the project and +open a pull request on [github][github], or just report issues. + +MoSQL includes a small but hopefully-growing test suite. It assumes a +running PostgreSQL and MongoDB instance on the local host; You can +point it at a different target via environment variables; See +`test/functional/_lib.rb` for more information. + +[github]: https://github.com/stripe/mosql diff --git a/Rakefile b/Rakefile new file mode 100644 index 0000000..70570b4 --- /dev/null +++ b/Rakefile @@ -0,0 +1,12 @@ +require 'rake/testtask' + +task :default +task :build + +Rake::TestTask.new do |t| + t.libs = ["lib"] + t.verbose = true + t.test_files = FileList['test/**/*.rb'].reject do |file| + file.end_with?('_lib.rb') + end +end diff --git a/bin/mosql b/bin/mosql new file mode 100755 index 0000000..eff29b5 --- /dev/null +++ b/bin/mosql @@ -0,0 +1,7 @@ +#!/usr/bin/env ruby + +require 'rubygems' +require 'bundler/setup' +require 'mosql/cli' + +MoSQL::CLI.run(ARGV) diff --git a/lib/mosql.rb b/lib/mosql.rb new file mode 100644 index 0000000..170382f --- /dev/null +++ b/lib/mosql.rb @@ -0,0 +1,11 @@ +require 'log4r' +require 'mongo' +require 'sequel' +require 'mongoriver' +require 'json' + +require 'mosql/version' +require 'mosql/log' +require 'mosql/sql' +require 'mosql/schema' +require 'mosql/tailer' diff --git a/lib/mosql/cli.rb b/lib/mosql/cli.rb new file mode 100644 index 0000000..00f509c --- /dev/null +++ b/lib/mosql/cli.rb @@ -0,0 +1,305 @@ +require 'mosql' +require 'optparse' +require 'yaml' +require 'logger' + +module MoSQL + class CLI + include MoSQL::Logging + + BATCH = 1000 + + attr_reader :args, :options, :tailer + + def self.run(args) + cli = CLI.new(args) + cli.run + end + + def initialize(args) + @args = args + @options = [] + @done = false + setup_signal_handlers + end + + def setup_signal_handlers + %w[TERM INT USR2].each do |sig| + Signal.trap(sig) do + log.info("Got SIG#{sig}. Preparing to exit...") + @done = true + end + end + end + + def parse_args + @options = { + :collections => 'collections.yml', + :sql => 'postgres:///', + :mongo => 'mongodb://localhost', + :verbose => 0 + } + optparse = OptionParser.new do |opts| + opts.banner = "Usage: #{$0} [options] " + + opts.on('-h', '--help', "Display this message") do + puts opts + exit(0) + end + + opts.on('-v', "Increase verbosity") do + @options[:verbose] += 1 + end + + opts.on("-c", "--collections [collections.yml]", "Collection map YAML file") do |file| + @options[:collections] = file + end + + opts.on("--sql [sqluri]", "SQL server to connect to") do |uri| + @options[:sql] = uri + end + + opts.on("--mongo [mongouri]", "Mongo connection string") do |uri| + @options[:mongo] = uri + end + + opts.on("--schema [schema]", "PostgreSQL 'schema' to namespace tables") do |schema| + @options[:schema] = schema + end + + opts.on("--ignore-delete", "Ignore delete operations when tailing") do + @options[:ignore_delete] = true + end + + opts.on("--tail-from [timestamp]", "Start tailing from the specified UNIX timestamp") do |ts| + @options[:tail_from] = ts + end + + opts.on("--service [service]", "Service name to use when storing tailing state") do |service| + @options[:service] = service + end + + opts.on("--skip-tail", "Don't tail the oplog, just do the initial import") do + @options[:skip_tail] = true + end + + opts.on("--reimport", "Force a data re-import") do + @options[:reimport] = true + end + end + + optparse.parse!(@args) + + log = Log4r::Logger.new('Stripe') + log.outputters = Log4r::StdoutOutputter.new(STDERR) + if options[:verbose] >= 1 + log.level = Log4r::DEBUG + else + log.level = Log4r::INFO + end + end + + def connect_mongo + @mongo = Mongo::Connection.from_uri(options[:mongo]) + config = @mongo['admin'].command(:ismaster => 1) + if !config['setName'] + log.warn("`#{options[:mongo]}' is not a replset. Proceeding anyways...") + end + options[:service] ||= config['setName'] + end + + def connect_sql + @sql = MoSQL::SQLAdapter.new(@schemamap, options[:sql], options[:schema]) + if options[:verbose] >= 2 + @sql.db.sql_log_level = :debug + @sql.db.loggers << Logger.new($stderr) + end + end + + def load_collections + collections = YAML.load(File.read(@options[:collections])) + @schemamap = MoSQL::Schema.new(collections) + end + + def run + parse_args + load_collections + connect_sql + connect_mongo + + metadata_table = MoSQL::Tailer.create_table(@sql.db, 'mosql_tailers') + + @tailer = MoSQL::Tailer.new([@mongo], :existing, metadata_table, + :service => options[:service]) + + if options[:reimport] || tailer.read_timestamp.seconds == 0 + initial_import + end + + optail + end + + # Helpers + + def collection_for_ns(ns) + dbname, collection = ns.split(".", 2) + @mongo.db(dbname).collection(collection) + end + + def bulk_upsert(table, ns, items) + begin + @schemamap.copy_data(table.db, ns, items) + rescue Sequel::DatabaseError => e + log.debug("Bulk insert error (#{e}), attempting invidual upserts...") + cols = @schemamap.all_columns(@schemamap.find_ns(ns)) + items.each do |it| + h = {} + cols.zip(it).each { |k,v| h[k] = v } + @sql.upsert(table, h) + end + end + end + + def with_retries(tries=10) + tries.times do |try| + begin + yield + rescue Mongo::ConnectionError, Mongo::ConnectionFailure, Mongo::OperationFailure => e + # Duplicate key error + raise if e.kind_of?(Mongo::OperationFailure) && [11000, 11001].include?(e.error_code) + # Cursor timeout + raise if e.kind_of?(Mongo::OperationFailure) && e.message =~ /^Query response returned CURSOR_NOT_FOUND/ + delay = 0.5 * (1.5 ** try) + log.warn("Mongo exception: #{e}, sleeping #{delay}s...") + sleep(delay) + end + end + end + + def track_time + start = Time.now + yield + Time.now - start + end + + def initial_import + @schemamap.create_schema(@sql.db, true) + + start_ts = @mongo['local']['oplog.rs'].find_one({}, {:sort => [['$natural', -1]]})['ts'] + + want_dbs = @schemamap.all_mongo_dbs & @mongo.database_names + want_dbs.each do |dbname| + log.info("Importing for Mongo DB #{dbname}...") + db = @mongo.db(dbname) + want = Set.new(@schemamap.collections_for_mongo_db(dbname)) + db.collections.select { |c| want.include?(c.name) }.each do |collection| + ns = "#{dbname}.#{collection.name}" + import_collection(ns, collection) + exit(0) if @done + end + end + + tailer.write_timestamp(start_ts) + end + + def import_collection(ns, collection) + log.info("Importing for #{ns}...") + count = 0 + batch = [] + table = @sql.table_for_ns(ns) + table.truncate + + start = Time.now + sql_time = 0 + collection.find(nil, :batch_size => BATCH) do |cursor| + with_retries do + cursor.each do |obj| + batch << @schemamap.transform(ns, obj) + count += 1 + + if batch.length >= BATCH + sql_time += track_time do + bulk_upsert(table, ns, batch) + end + elapsed = Time.now - start + log.info("Imported #{count} rows (#{elapsed}s, #{sql_time}s SQL)...") + batch.clear + exit(0) if @done + end + end + end + end + + unless batch.empty? + bulk_upsert(table, ns, batch) + end + end + + def optail + return if options[:skip_tail] + + tailer.tail_from(options[:tail_from] ? + BSON::Timestamp.new(options[:tail_from].to_i, 0) : + nil) + until @done + tailer.stream(1000) do |op| + handle_op(op) + end + end + end + + def sync_object(ns, _id) + obj = collection_for_ns(ns).find_one({:_id => _id}) + if obj + @sql.upsert_ns(ns, obj) + else + @sql.table_for_ns(ns).where(:_id => _id).delete() + end + end + + def handle_op(op) + log.debug("processing op: #{op.inspect}") + unless op['ns'] && op['op'] + log.warn("Weird op: #{op.inspect}") + return + end + + unless @schemamap.find_ns(op['ns']) + log.debug("Skipping op for unknown ns #{op['ns']}...") + return + end + + ns = op['ns'] + dbname, collection_name = ns.split(".", 2) + + case op['op'] + when 'n' + log.debug("Skipping no-op #{op.inspect}") + when 'i' + if collection_name == 'system.indexes' + log.info("Skipping index update: #{op.inspect}") + else + @sql.upsert_ns(ns, op['o']) + end + when 'u' + selector = op['o2'] + update = op['o'] + if update.keys.any? { |k| k.start_with? '$' } + log.debug("resync #{ns}: #{selector['_id']} (update was: #{update.inspect})") + sync_object(ns, selector['_id']) + else + log.debug("upsert #{ns}: _id=#{update['_id']}") + @sql.upsert_ns(ns, update) + end + when 'd' + if options[:ignore_delete] + log.debug("Ignoring delete op on #{ns} as instructed.") + else + @sql.table_for_ns(ns).where(:_id => op['o']['_id']).delete + end + else + log.info("Skipping unknown op #{op.inspect}") + end + end + end +end diff --git a/lib/mosql/log.rb b/lib/mosql/log.rb new file mode 100644 index 0000000..160116f --- /dev/null +++ b/lib/mosql/log.rb @@ -0,0 +1,7 @@ +module MoSQL + module Logging + def log + @@logger ||= Log4r::Logger.new("Stripe::MoSQL") + end + end +end diff --git a/lib/mosql/schema.rb b/lib/mosql/schema.rb new file mode 100644 index 0000000..baf209a --- /dev/null +++ b/lib/mosql/schema.rb @@ -0,0 +1,149 @@ +module MoSQL + class SchemaError < StandardError; end; + + class Schema + include MoSQL::Logging + + def to_ordered_hash(lst) + hash = BSON::OrderedHash.new + lst.each do |ent| + raise "Invalid ordered hash entry #{ent.inspect}" unless ent.is_a?(Hash) && ent.keys.length == 1 + field, type = ent.first + hash[field] = type + end + hash + end + + def parse_spec(spec) + out = spec.dup + out[:columns] = to_ordered_hash(spec[:columns]) + out + end + + def initialize(map) + @map = {} + map.each do |dbname, db| + @map[dbname] ||= {} + db.each do |cname, spec| + @map[dbname][cname] = parse_spec(spec) + end + end + end + + def create_schema(db, clobber=false) + @map.values.map(&:values).flatten.each do |collection| + meta = collection[:meta] + log.info("Creating table '#{meta[:table]}'...") + db.send(clobber ? :create_table! : :create_table?, meta[:table]) do + collection[:columns].each do |field, type| + column field, type + end + if meta[:extra_props] + column '_extra_props', 'TEXT' + end + primary_key [:_id] + end + end + end + + def find_ns(ns) + db, collection = ns.split(".") + schema = (@map[db] || {})[collection] + if schema.nil? + log.debug("No mapping for ns: #{ns}") + return nil + end + schema + end + + def find_ns!(ns) + schema = find_ns(ns) + raise SchemaError.new("No mapping for namespace: #{ns}") if schema.nil? + schema + end + + def transform(ns, obj, schema=nil) + schema ||= find_ns!(ns) + + obj = obj.dup + row = [] + schema[:columns].each do |name, type| + v = obj.delete(name) + case v + when BSON::Binary, BSON::ObjectId + v = v.to_s + end + row << v + end + + if schema[:meta][:extra_props] + # Kludgily delete binary blobs from _extra_props -- they may + # contain invalid UTF-8, which to_json will not properly encode. + obj.each do |k,v| + obj.delete(k) if v.is_a?(BSON::Binary) + end + row << obj.to_json + end + + log.debug { "Transformed: #{row.inspect}" } + + row + end + + def all_columns(schema) + cols = schema[:columns].keys + if schema[:meta][:extra_props] + cols << "_extra_props" + end + cols + end + + def copy_data(db, ns, objs) + schema = find_ns!(ns) + data = objs.map { |o| transform_to_copy(ns, o, schema) }.join("\n") + db.synchronize do |pg| + sql = "COPY \"#{schema[:meta][:table]}\" " + + "(#{all_columns(schema).map {|c| "\"#{c}\""}.join(",")}) FROM STDIN" + pg.execute(sql) + objs.each do |o| + pg.put_copy_data(transform_to_copy(ns, o, schema) + "\n") + end + pg.put_copy_end + begin + pg.get_result.check + rescue PGError => e + db.send(:raise_error, e) + end + end + end + + def quote_copy(val) + case val + when nil + "\\N" + when true + 't' + when false + 'f' + else + val.to_s.gsub(/([\\\t\n\r])/, '\\\\\\1') + end + end + + def transform_to_copy(ns, row, schema=nil) + row.map { |c| quote_copy(c) }.join("\t") + end + + def table_for_ns(ns) + find_ns!(ns)[:meta][:table] + end + + def all_mongo_dbs + @map.keys + end + + def collections_for_mongo_db(db) + (@map[db]||{}).keys + end + end +end diff --git a/lib/mosql/sql.rb b/lib/mosql/sql.rb new file mode 100644 index 0000000..6bd6d92 --- /dev/null +++ b/lib/mosql/sql.rb @@ -0,0 +1,59 @@ +module MoSQL + class SQLAdapter + include MoSQL::Logging + + attr_reader :db + + def initialize(schema, uri, pgschema=nil) + @schema = schema + connect_db(uri, pgschema) + end + + def connect_db(uri, pgschema) + @db = Sequel.connect(uri, :after_connect => proc do |conn| + if pgschema + begin + conn.execute("CREATE SCHEMA \"#{pgschema}\"") + rescue PG::Error + end + conn.execute("SET search_path TO \"#{pgschema}\"") + end + end) + end + + def table_for_ns(ns) + @db[@schema.table_for_ns(ns).intern] + end + + def upsert_ns(ns, obj) + h = {} + cols = @schema.all_columns(@schema.find_ns(ns)) + row = @schema.transform(ns, obj) + cols.zip(row).each { |k,v| h[k] = v } + upsert(table_for_ns(ns), h) + end + + def upsert(table, item) + begin + upsert!(table, item) + rescue Sequel::DatabaseError => e + wrapped = e.wrapped_exception + if wrapped.result + log.warn("Ignoring row (_id=#{item['_id']}): #{e}") + else + raise e + end + end + end + + def upsert!(table, item) + begin + table.insert(item) + rescue Sequel::DatabaseError => e + raise e unless e.message =~ /duplicate key value violates unique constraint/ + table.where(:_id => item['_id']).update(item) + end + end + end +end + diff --git a/lib/mosql/tailer.rb b/lib/mosql/tailer.rb new file mode 100644 index 0000000..3df9716 --- /dev/null +++ b/lib/mosql/tailer.rb @@ -0,0 +1,36 @@ +module MoSQL + class Tailer < Mongoriver::AbstractPersistentTailer + def self.create_table(db, tablename) + db.create_table?(tablename) do + column :service, 'TEXT' + column :timestamp, 'INTEGER' + primary_key [:service] + end + db[tablename.to_sym] + end + + def initialize(backends, type, table, opts) + super(backends, type, opts) + @table = table + @service = opts[:service] || "mosql" + end + + def read_timestamp + row = @table.where(:service => @service).select([:timestamp]).first + if row + BSON::Timestamp.new(row[:timestamp], 0) + else + BSON::Timestamp.new(0, 0) + end + end + + def write_timestamp(ts) + begin + @table.insert({:service => @service, :timestamp => ts.seconds}) + rescue Sequel::DatabaseError => e + raise unless e.message =~ /duplicate key value violates unique constraint/ + @table.where(:service => @service).update(:timestamp => ts.seconds) + end + end + end +end diff --git a/lib/mosql/version.rb b/lib/mosql/version.rb new file mode 100644 index 0000000..9154edd --- /dev/null +++ b/lib/mosql/version.rb @@ -0,0 +1,3 @@ +module MoSQL + VERSION = "0.0.1" +end diff --git a/mosql.gemspec b/mosql.gemspec new file mode 100644 index 0000000..96515c3 --- /dev/null +++ b/mosql.gemspec @@ -0,0 +1,23 @@ +# -*- encoding: utf-8 -*- +require File.expand_path('../lib/mosql/version', __FILE__) + +Gem::Specification.new do |gem| + gem.authors = ["Nelson Elhage"] + gem.email = ["nelhage@stripe.com"] + gem.description = %q{A library for streaming MongoDB to SQL} + gem.summary = %q{MongoDB -> SQL streaming bridge} + gem.homepage = "https://github.com/stripe/mosql" + + gem.files = `git ls-files`.split($\) + gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) } + gem.test_files = gem.files.grep(%r{^(test|spec|features)/}) + gem.name = "mosql" + gem.require_paths = ["lib"] + gem.version = MoSQL::VERSION + + %w[sequel pg mongo bson_ext rake log4r json + mongoriver].each { |dep| gem.add_runtime_dependency(dep) } + + gem.add_development_dependency "minitest" + gem.add_development_dependency "mocha" +end diff --git a/test/_lib.rb b/test/_lib.rb new file mode 100644 index 0000000..685fcf2 --- /dev/null +++ b/test/_lib.rb @@ -0,0 +1,18 @@ +require 'rubygems' +require 'bundler/setup' + +require 'minitest/autorun' +require 'minitest/spec' +require 'mocha' + +$:.unshift(File.expand_path(File.join(File.dirname(__FILE__), '../lib'))) + +require 'mosql' + +module MoSQL + class Test < ::MiniTest::Spec + def setup + # Put any stubs here that you want to apply globally + end + end +end diff --git a/test/functional/_lib.rb b/test/functional/_lib.rb new file mode 100644 index 0000000..4ccba7a --- /dev/null +++ b/test/functional/_lib.rb @@ -0,0 +1,65 @@ +require File.join(File.dirname(__FILE__), "../_lib") + +module MoSQL + class Test::Functional < MoSQL::Test + attr_reader :sequel, :mongo + + def sql_test_uri + ENV['MONGOSQL_TEST_SQL'] || 'postgres:///test' + end + def mongo_test_uri + ENV['MONGOSQL_TEST_MONGO'] || 'mongodb://localhost' + end + def mongo_test_dbname + ENV['MONGOSQL_TEST_MONGO_DB'] || 'test' + end + + def connect_sql + begin + conn = Sequel.connect(sql_test_uri) + conn.test_connection + conn + rescue Sequel::DatabaseConnectionError + $stderr.puts < "a", 'var' => 0}, + {'_id' => "b", 'var' => 1}, + {'_id' => "c"}, + {'_id' => "d", 'other_var' => "hello"} + ] + @map.copy_data(@sequel, 'db.collection', objects.map { |o| @map.transform('db.collection', o) } ) + assert_equal(4, table.count) + rows = table.select.sort_by { |r| r[:_id] } + assert_equal(%w[a b c d], rows.map { |r| r[:_id] }) + assert_equal(nil, rows[2][:var]) + assert_equal(nil, rows[3][:var]) + end + + it 'Can COPY BSON::ObjectIDs' do + o = {'_id' => BSON::ObjectId.new, 'var' => 0} + @map.copy_data(@sequel, 'db.collection', [ @map.transform('db.collection', o)] ) + assert_equal(o['_id'].to_s, table.select.first[:_id]) + end + + it 'Can transform BSON::ObjectIDs' do + o = {'_id' => BSON::ObjectId.new, 'var' => 0} + row = @map.transform('db.collection', o) + table.insert(row) + assert_equal(o['_id'].to_s, table.select.first[:_id]) + end +end diff --git a/test/functional/sql.rb b/test/functional/sql.rb new file mode 100644 index 0000000..f8dd2fc --- /dev/null +++ b/test/functional/sql.rb @@ -0,0 +1,38 @@ +require File.join(File.dirname(__FILE__), '_lib.rb') + +class MoSQL::Test::Functional::SQLTest < MoSQL::Test::Functional + before do + sequel.drop_table?(:test_upsert) + sequel.create_table?(:test_upsert) do + column :_id, 'INTEGER' + column :color, 'TEXT' + column :quantity, 'INTEGER' + primary_key [:_id] + end + + @adapter = MoSQL::SQLAdapter.new(nil, sql_test_uri) + @table = sequel[:test_upsert] + end + + describe 'upsert' do + it 'inserts new items' do + @adapter.upsert!(@table, {'_id' => 0, 'color' => 'red', 'quantity' => 10}) + @adapter.upsert!(@table, {'_id' => 1, 'color' => 'blue', 'quantity' => 5}) + assert_equal(2, @table.count) + assert_equal('red', @table[:_id => 0][:color]) + assert_equal(10, @table[:_id => 0][:quantity]) + assert_equal('blue', @table[:_id => 1][:color]) + assert_equal(5, @table[:_id => 1][:quantity]) + end + + it 'updates items' do + @adapter.upsert!(@table, {'_id' => 0, 'color' => 'red', 'quantity' => 10}) + assert_equal(1, @table.count) + assert_equal('red', @table[:_id => 0][:color]) + + @adapter.upsert!(@table, {'_id' => 0, 'color' => 'blue', 'quantity' => 5}) + assert_equal(1, @table.count) + assert_equal('blue', @table[:_id => 0][:color]) + end + end +end diff --git a/test/unit/lib/mongo-sql/schema.rb b/test/unit/lib/mongo-sql/schema.rb new file mode 100644 index 0000000..69099f1 --- /dev/null +++ b/test/unit/lib/mongo-sql/schema.rb @@ -0,0 +1,102 @@ +require File.join(File.dirname(__FILE__), '../../../_lib.rb') + +class MoSQL::Test::SchemaTest < MoSQL::Test + TEST_MAP = < "row 1", 'var' => 6}) + assert_equal(["row 1", 6], out) + end + + it 'Includes extra props' do + out = @map.transform('db.with_extra_props', {'_id' => 7, 'var' => 6, 'other var' => {'key' => 'value'}}) + assert_equal(2, out.length) + assert_equal(7, out[0]) + assert_equal({'var' => 6, 'other var' => {'key' => 'value'}}, JSON.parse(out[1])) + end + + it 'gets all_columns right' do + assert_equal(['_id', 'var'], @map.all_columns(@map.find_ns('db.collection'))) + assert_equal(['_id', '_extra_props'], @map.all_columns(@map.find_ns('db.with_extra_props'))) + end + end + + describe 'when copying data' do + it 'quotes special characters' do + assert_equal(%q{\\\\}, @map.quote_copy(%q{\\})) + assert_equal(%Q{\\\t}, @map.quote_copy( %Q{\t})) + assert_equal(%Q{\\\n}, @map.quote_copy( %Q{\n})) + assert_equal(%Q{some text}, @map.quote_copy(%Q{some text})) + end + end +end