Open
Description
Hi,
I am creating an "adapter" or a "plugin" to my data archive reader.
basically it iterates on big array on memory and for each object extract the data and then i call my "adapter" that uses sqlite and try to insert it into the database (mostly tweets)
I am having really super slow times for example
db.run(query, params, function(err, res) {
// the callback being called here takes so much time like 2/3 seconds
})
I do a general insert with values.
here is my "adapter code"
var path = require('path');
var Adapter = require('../../Adapter');
var sqlite3 = require('sqlite3');
var async = require('async');
var queryTmpl = 'INSERT INTO twitter(entity, tweet, tweet_id, tweet_time, retweet, tweet_lang, user_id, user_name, user_lang) VALUES(?, ?, ?, ?, ?, ?, ?, ?, ?)';
function SqliteAdapter(logger) {
this._name = 'Sqlite Adapter';
Adapter.apply(this, arguments);
}
// Extend the base adapter
util.inherits(SqliteAdapter, Adapter);
SqliteAdapter.prototype.init = function(cb) {
var self = this;
this._client = new sqlite3.Database(path.join(__dirname, 'tweets.db'), function(err) {
if (err) {
self.log('Sqlite database adapter could not open database!')
cb(err);
} else {
self.log('Sqlite database adapter open datbase success!');
cb();
}
});
}
SqliteAdapter.prototype.iterateEntity = function(entityList, tweet, tweet_text, suffix, key = 'text', callback) {
if (!callback) {
this.error('callback must be passed!');
return;
}
var self = this;
async.eachOfSeries(entityList, function(entity, idx, async_cb) {
const sqlValues = [
suffix + entity[key],
tweet_text,
tweet.id_str,
tweet.timestamp_ms,
tweet.retweeted_status ? true : false,
tweet.lang,
tweet.user.id_str,
tweet.user.name,
tweet.user.lang
];
self._client.run(queryTmpl, sqlValues) ;//, function(err, res) {
if (err) {
self.error(err);
} else {
if (!this.lastID) {
self.error('Insert Sqlite failed - ' + tweet.id_str);
} else {
self.log('Insert Sqlite success, entity: '+ suffix + entity[key]);
}
}
// we are not passing error (if exists) since it will break the async each functionality, we just log it
async_cb();
});
// async_cb()
}, function(error) {
// finalize and call the origin 'push' method callback
callback(null);
});
}
SqliteAdapter.prototype.push = function(tweet) {
var self = this;
var text = tweet.text;
var entities = tweet.entities;
// checking if tweet is truncated then get the full tweet data
if (tweet.extended_tweet) {
text = tweet.extended_tweet.full_text;
entities = tweet.extended_tweet.extended_entities;
}
async.series([
function(callback) {
// insert hashtags
if (entities && entities.hashtags && entities.hashtags.length > 0) {
self.iterateEntity(entities.hashtags, tweet, text, '#', 'text', callback);
} else {
callback();
}
},
function(callback) {
// insert mentions
if (entities && entities.user_mentions && entities.user_mentions.length > 0) {
self.iterateEntity(entities.user_mentions, tweet, text, '@', 'screen_name', callback);
} else {
callback();
}
},
function(callback) {
// insert symbols
if (entities && entities.symbols && entities.symbols.length > 0) {
self.iterateEntity(entities.symbols, tweet, text, '$', 'text', callback);
} else {
callback();
}
},
function(callback) {
// insert urls
if (entities && entities.urls && entities.urls.length > 0) {
self.iterateEntity(entities.urls, tweet, text, '*', 'expanded_url', callback);
} else {
callback();
}
}
], function(error) {
if (error) {
self.log(error);
}
});
}
SqliteAdapter.prototype.teardown = function(cb) {
var self = this;
var closeDB = function() {
self._client.close(function(err) {
if (err) {
self.error('Sqlite could not close database');
cb(err);
} else {
self.log('Sqlite database closed');
cb();
}
});
}
// bind the teardown and pass the callback
SqliteAdapter.super_.prototype.teardown.call(this, closeDB);
}
The adapter ".init" method is called first to open the DB then for each array object the ".push" is being called this is how i made it, it works but super super slow as i mentioned.
any ideas?
thanks!