Skip to content

run with inserts is super super slow #965

Open
@pantchox

Description

@pantchox

Hi,

I am creating an "adapter" or a "plugin" to my data archive reader.
basically it iterates on big array on memory and for each object extract the data and then i call my "adapter" that uses sqlite and try to insert it into the database (mostly tweets)

I am having really super slow times for example

    db.run(query, params, function(err, res) {
    // the callback being called here takes so much time like 2/3 seconds
    })

I do a general insert with values.

here is my "adapter code"

var path = require('path');
var Adapter = require('../../Adapter');
var sqlite3 = require('sqlite3');
var async = require('async');

var queryTmpl = 'INSERT INTO twitter(entity, tweet, tweet_id, tweet_time, retweet, tweet_lang, user_id, user_name, user_lang) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)';

function SqliteAdapter(logger) {
    this._name = 'Sqlite Adapter';
    Adapter.apply(this, arguments);
}

// Extend the base adapter
util.inherits(SqliteAdapter, Adapter);

SqliteAdapter.prototype.init = function(cb) {
    var self = this;
    this._client = new sqlite3.Database(path.join(__dirname, 'tweets.db'), function(err) {
        if (err) {
            self.log('Sqlite database adapter could not open database!')
            cb(err);
        } else {
            self.log('Sqlite database adapter open datbase success!');
            cb();
        }
    });
}

SqliteAdapter.prototype.iterateEntity = function(entityList, tweet, tweet_text, suffix, key = 'text', callback) {
    if (!callback) {
        this.error('callback must be passed!');
        return;
    }
    var self = this;
    async.eachOfSeries(entityList, function(entity, idx, async_cb) {
        const sqlValues = [
            suffix + entity[key],
            tweet_text,
            tweet.id_str,
            tweet.timestamp_ms,
            tweet.retweeted_status ? true : false,
            tweet.lang,
            tweet.user.id_str,
            tweet.user.name,
            tweet.user.lang
        ];

        self._client.run(queryTmpl, sqlValues) ;//, function(err, res) {
            if (err) {
                self.error(err);
            } else {
                if (!this.lastID) {
                    self.error('Insert Sqlite failed - ' + tweet.id_str);
                } else {
                    self.log('Insert Sqlite success, entity: '+ suffix + entity[key]);
                }
            }
            // we are not passing error (if exists) since it will break the async each functionality, we just log it 
            async_cb();
        });
        // async_cb()
    }, function(error) {
        // finalize and call the origin 'push' method callback
        callback(null);
    });
}

SqliteAdapter.prototype.push = function(tweet) {
    var self = this;
    var text = tweet.text;
    var entities = tweet.entities;
    
    // checking if tweet is truncated then get the full tweet data
    if (tweet.extended_tweet) {
        text = tweet.extended_tweet.full_text;
        entities = tweet.extended_tweet.extended_entities;
    }
    async.series([
        function(callback) {
            // insert hashtags
            if (entities && entities.hashtags && entities.hashtags.length > 0) {
                self.iterateEntity(entities.hashtags, tweet, text, '#', 'text', callback);
            } else {
                callback();
            }
        },
        function(callback) {
            // insert mentions
            if (entities && entities.user_mentions && entities.user_mentions.length > 0) {
                self.iterateEntity(entities.user_mentions, tweet, text, '@', 'screen_name', callback);
            } else {
                callback();
            }
        },
        function(callback) {
            // insert symbols
            if (entities && entities.symbols && entities.symbols.length > 0) {
                self.iterateEntity(entities.symbols, tweet, text, '$', 'text', callback);
            } else {
                callback();
            }
        },
        function(callback) {
            // insert urls
            if (entities && entities.urls && entities.urls.length > 0) {
                self.iterateEntity(entities.urls, tweet, text, '*', 'expanded_url', callback);
            } else {
                callback();
            }
        }
    ], function(error) {
        if (error) {
            self.log(error);
        }
    });
}

SqliteAdapter.prototype.teardown = function(cb) {
    var self = this;
    var closeDB = function() {
        self._client.close(function(err) {
            if (err) {
                self.error('Sqlite could not close database');
                cb(err);
            } else {
                self.log('Sqlite database closed');
                cb();
            }
        });
    }
    // bind the teardown and pass the callback
    SqliteAdapter.super_.prototype.teardown.call(this, closeDB);
}

The adapter ".init" method is called first to open the DB then for each array object the ".push" is being called this is how i made it, it works but super super slow as i mentioned.

any ideas?

thanks!

Metadata

Metadata

Assignees

No one assigned

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions