Skip to content

Commit

Permalink
Attempt to normalize phone numbers with doubled prefix+countrycode (X…
Browse files Browse the repository at this point in the history
…MLdb). Fix emoji+special chars in threadname on searchpage.html. Shrink HTML searchidx. Small substr optimizations.
  • Loading branch information
bepaald committed Feb 9, 2025
1 parent e1e64ab commit fdd2b9d
Show file tree
Hide file tree
Showing 9 changed files with 81 additions and 28 deletions.
4 changes: 2 additions & 2 deletions arg/arg.h
Original file line number Diff line number Diff line change
Expand Up @@ -1072,8 +1072,8 @@ bool Arg::parsePair(std::string const &token, std::string const &delim, std::pai
return false;
}

std::string first = token.substr(0, pos);
std::string second = token.substr(pos + 1);
std::string first(token, 0, pos);
std::string second(token, pos + 1);
if (first.empty() || second.empty())
{
*error = "Empty field in pair.";
Expand Down
2 changes: 1 addition & 1 deletion autoversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
#ifndef VERSION_H_
#define VERSION_H_

#define VERSIONDATE "20250207.154020"
#define VERSIONDATE "20250209.110102"

#endif
13 changes: 7 additions & 6 deletions common_be.h
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
Copyright (C) 2019-2024 Selwin van Dijk
Copyright (C) 2019-2025 Selwin van Dijk
This file is part of signalbackup-tools.
Expand Down Expand Up @@ -44,6 +44,7 @@
#else
#define DEBUGOUT2(...)
#endif

#define STRLEN( STR ) (bepaald::strlitLength(STR))

#if __cpp_lib_starts_ends_with >= 201711L
Expand Down Expand Up @@ -74,7 +75,7 @@ namespace bepaald
inline std::string toHexString(T const &num, typename std::enable_if<std::is_integral<T>::value>::type *dummy = nullptr);
inline std::string toString(double num);
inline constexpr int strlitLength(char const *str, int pos = 0);
inline int strlitLength(std::string const &str);
//inline int strlitLength(std::string const &str);
inline int numDigits(long long int num);
inline std::string toDateString(std::time_t epoch, std::string const &format);
inline std::string toLower(std::string s);
Expand Down Expand Up @@ -233,10 +234,10 @@ inline constexpr int bepaald::strlitLength(char const *str, int pos)
return str[pos] == '\0' ? 0 : 1 + strlitLength(str, pos + 1);
}

inline int bepaald::strlitLength(std::string const &str)
{
return str.size();
}
// inline int bepaald::strlitLength(std::string const &str)
// {
// return str.size();
// }

inline int bepaald::numDigits(long long int num)
{
Expand Down
2 changes: 1 addition & 1 deletion desktopdatabase/desktopdatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -190,7 +190,7 @@ inline void DesktopDatabase::runQuery(std::string const &q, std::string const &m
if (!d_database.exec(q, &res))
return;

std::string q_comm(q.substr(0, STRLEN("DELETE"))); // delete, insert and update are same length...
std::string q_comm(q, 0, STRLEN("DELETE")); // delete, insert and update are same length...
std::for_each(q_comm.begin(), q_comm.end(), [] (char &ch) { ch = std::toupper(ch); });

if (q_comm == "DELETE" || q_comm == "INSERT" || q_comm == "UPDATE")
Expand Down
32 changes: 26 additions & 6 deletions signalbackup/exporthtml.cc
Original file line number Diff line number Diff line change
Expand Up @@ -132,6 +132,9 @@ bool SignalBackup::exportHtml(std::string const &directory, std::vector<long lon
SqliteDB::QueryResults search_idx_results;
std::ofstream searchidx;
bool searchidx_write_started = false;
long long int searchidx_page_idx = 0;
std::map<std::string, long long int> searchidx_page_idx_map;

// start search index page
if (searchpage)
{
Expand Down Expand Up @@ -396,7 +399,8 @@ bool SignalBackup::exportHtml(std::string const &directory, std::vector<long lon
// create output-file
std::string raw_base_filename = (is_note_to_self ? "Note to Self" : recipient_info[thread_recipient_id].display_name);
WIN_LIMIT_FILENAME_LENGTH(raw_base_filename);
std::string filename(sanitizeFilename(raw_base_filename) + (pagenumber > 0 ? "_" + bepaald::toString(pagenumber) : "") + ".html");
std::string base_filename(sanitizeFilename(raw_base_filename));// + (pagenumber > 0 ? "_" + bepaald::toString(pagenumber) : "") + ".html");
std::string filename(base_filename + (pagenumber > 0 ? "_" + bepaald::toString(pagenumber) : "") + ".html");
std::ofstream htmloutput(WIN_LONGPATH(directory + "/" + threaddir + "/" + filename), std::ios_base::binary);
if (!htmloutput.is_open())
{
Expand Down Expand Up @@ -626,8 +630,10 @@ bool SignalBackup::exportHtml(std::string const &directory, std::vector<long lon

if (searchpage && (!Types::isStatusMessage(msg_info.type) && !msg_info.body.empty()))
{
// all pages end in ".html", slice it off
std::string page(msg_info.threaddir + "/" + msg_info.filename, 0, msg_info.threaddir.size() + msg_info.filename.size() + 1 - 5);
if (auto it = searchidx_page_idx_map.find(msg_info.threaddir + "/" + base_filename); it != searchidx_page_idx_map.end())
searchidx_page_idx = it->second;
else
searchidx_page_idx_map.emplace(msg_info.threaddir + "/" + base_filename, ++searchidx_page_idx);

// because the body is already escaped for html at this point, we get it fresh from database (and have sqlite do the json formatting)
if (!d_database.exec("SELECT json_object("
Expand All @@ -639,15 +645,16 @@ bool SignalBackup::exportHtml(std::string const &directory, std::vector<long lon
"'d', (" + d_mms_table + ".date_received / 1000 - 1404165600), " // lose the last three digits (miliseconds, they are never displayed anyway).
// subtract "2014-07-01". Signals initial release was 2014-07-29, negative
// numbers should work otherwise anyway.
"'p', ?) AS line,"
"'p', ?, "
"'n', ?) AS line,"
+ d_part_table + "._id AS rowid, " +
(d_database.tableContainsColumn(d_part_table, "unique_id") ?
d_part_table + ".unique_id AS uniqueid" : "-1 AS uniqueid") +
" FROM " + d_mms_table + " "
"LEFT JOIN thread ON thread._id IS " + d_mms_table + ".thread_id "
"LEFT JOIN " + d_part_table + " ON " + d_part_table + "." + d_part_mid + " IS " + d_mms_table + "._id AND " + d_part_table + "." + d_part_ct + " = 'text/x-signal-plain' AND " + d_part_table + ".quote = 0 "
"WHERE " + d_mms_table + "._id = ?",
{page, msg_info.msg_id}, &search_idx_results) ||
{searchidx_page_idx, pagenumber, msg_info.msg_id}, &search_idx_results) ||
search_idx_results.rows() < 1) [[unlikely]]
{
Logger::warning("Search_idx query failed or no results");
Expand Down Expand Up @@ -851,13 +858,26 @@ bool SignalBackup::exportHtml(std::string const &directory, std::vector<long lon
std::string line = d_database.getSingleResultAs<std::string>("SELECT json_object('_id', ?, 'display_name', ?)", {r->first, r->second.display_name}, std::string());
if (line.empty()) [[unlikely]]
continue;

searchidx << " " << line;
if (std::next(r) != recipient_info.end()) [[likely]]
searchidx << "," << std::endl;
else
searchidx << std::endl << "];" << std::endl;
}

// write page info:
searchidx << "page_idx = [" << std::endl;
for (auto pi = searchidx_page_idx_map.begin() ; pi != searchidx_page_idx_map.end(); ++pi)
{
std::string line = d_database.getSingleResultAs<std::string>("SELECT json_object('_id', ?, 'bn', ?)", {pi->second, pi->first}, std::string());
if (line.empty()) [[unlikely]]
continue;
searchidx << " " << line;
if (std::next(pi) != searchidx_page_idx_map.end()) [[likely]]
searchidx << "," << std::endl;
else
searchidx << std::endl << "];" << std::endl;
}
}

// write chat folders
Expand Down
16 changes: 11 additions & 5 deletions signalbackup/htmlwritesearchpage.cc
Original file line number Diff line number Diff line change
Expand Up @@ -764,13 +764,19 @@ body {
var index = recipient_idx.findIndex(function(item) {
return item._id === global_results[i].f;
});
var displayname = recipient_idx[index].display_name;
var displayname = recipient_idx[index].display_name.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
// get name of 'thread' id
var index = recipient_idx.findIndex(function(item){
return item._id === global_results[i].tr;
});
var threadname = recipient_idx[index].display_name;
var threadname = recipient_idx[index].display_name.replace(/&/g, '&amp;').replace(/</g, '&lt;').replace(/>/g, '&gt;');
// get base_filename of id
var index = page_idx.findIndex(function(item){
return item._id === global_results[i].p;
});
var base_filename = page_idx[index].bn;
// add searchresults
var elem = document.createElement('a');
Expand All @@ -780,7 +786,7 @@ body {
elem.classList.add("msg-outgoing");
else
elem.classList.add("msg-incoming");
elem.setAttribute('href', encodeURI(global_results[i].p + '.html#' + global_results[i].id));
elem.setAttribute('href', encodeURI(base_filename + (global_results[i].n > 0 ? '_' + global_results[i].n + '.html#' : '.html#') + global_results[i].id));
var linkdiv = document.createElement('div');
Expand All @@ -790,13 +796,13 @@ body {
fromspan.classList.add("msg-name-" + global_results[i].f);
if (global_results[i].o == 1)
fromspan.innerHTML = displayname + " (to <i>" + threadname + "</i>)";
fromspan.innerHTML = displayname + ' (to <span style="font-style: italic; font-synthesis: none;">' + threadname + '</span>)';
else
{
if (global_results[i].f === global_results[i].tr)
fromspan.innerHTML = displayname;
else
fromspan.innerHTML = displayname + " (in <i>" + threadname + "</i>)";
fromspan.innerHTML = displayname + ' (in <span style="font-style: italic; font-synthesis: none;">' + threadname + '</span>)';
}
linkdiv.append(fromspan);
Expand Down
2 changes: 1 addition & 1 deletion signalbackup/signalbackup.h
Original file line number Diff line number Diff line change
Expand Up @@ -778,7 +778,7 @@ inline void SignalBackup::runQuery(std::string const &q, std::string const &mode
if (!d_database.exec(q, &res))
return;

std::string q_comm = q.substr(0, STRLEN("DELETE")); // delete, insert and update are same length...
std::string q_comm(q, 0, STRLEN("DELETE")); // delete, insert and update are same length...
std::for_each(q_comm.begin(), q_comm.end(), [] (char &ch) { ch = std::toupper(ch); });

if (q_comm == "DELETE" || q_comm == "INSERT" || q_comm == "UPDATE")
Expand Down
25 changes: 19 additions & 6 deletions signalplaintextbackupdatabase/signalplaintextbackupdatabase.cc
Original file line number Diff line number Diff line change
Expand Up @@ -173,12 +173,12 @@ SignalPlaintextBackupDatabase::SignalPlaintextBackupDatabase(std::string const &
std::string body;
bool hasbody = false;
std::string sourceaddress;
for (auto const &sub : n)
for (auto const &childnode : n)
{
//std::cout << sub.name() << std::endl;
if (sub.name() == "parts")
//std::cout << childnode.name() << std::endl;
if (childnode.name() == "parts")
{
for (auto const &part : sub)
for (auto const &part : childnode)
{
if (part.hasAttribute("text"))
{
Expand Down Expand Up @@ -212,10 +212,10 @@ SignalPlaintextBackupDatabase::SignalPlaintextBackupDatabase(std::string const &
}
}
}
else if (sub.name() == "addrs")
else if (childnode.name() == "addrs")
{
int numaddresses = 0;
for (auto const &addr : sub)
for (auto const &addr : childnode)
{
++numaddresses;
//addr.print();
Expand Down Expand Up @@ -380,6 +380,19 @@ SignalPlaintextBackupDatabase::SignalPlaintextBackupDatabase(std::string const &
" )"
" WHERE smses.rowid IN (SELECT to_update.rowid FROM to_update)",
{all_names_res.value(i, "address"), old_addresses.value(j, "address")});
// alternative... looks better, not sure if it is better (will always change all rows?)
// d_database.exec("UPDATE smses SET targetaddresses = "
// "("
// " SELECT json_group_array("
// " CASE"
// " WHEN value = ? THEN ?"
// " ELSE value"
// " END"
// " )"
// " FROM json_each(smses.targetaddresses)"
// ") "
// "WHERE targetaddresses IS NOT NULL",
// {old_addresses.value(j, "address"), all_names_res.value(i, "address")});
//std::cout << "TgtAddr change: " << d_database.changed() << std::endl;
}
}
Expand Down
13 changes: 13 additions & 0 deletions signalplaintextbackupdatabase/signalplaintextbackupdatabase.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,19 @@ inline std::string SignalPlaintextBackupDatabase::normalizePhoneNumber(std::stri
else if (STRING_STARTS_WITH(result, "011"))
result = "+" + result.substr(STRLEN("011"));

// Special case to deal with numbers that start with _two_ international call prefixes _ countrycodes:
// eg (with countrycode '1'): 01110019999999999
if (result.size() >= 15 && // we'll assume max number size of 15 (sources differ), the plus stands for (at least) 2 digits.
!d_countrycode.empty() &&
d_countrycode[0] == '+' &&
STRING_STARTS_WITH(result, d_countrycode) &&
(result.substr(d_countrycode.size(), (d_countrycode.size() - 1) + 2) == ("00" + d_countrycode.substr(1)) ||
result.substr(d_countrycode.size(), (d_countrycode.size() - 1) + 3) == ("011" + d_countrycode.substr(1)))) [[unlikely]]
{
Logger::warning("Detected doubled prefix and countrycode in phone number (", in, ")");
result = normalizePhoneNumber(result.substr(d_countrycode.size()), false);
}

if (result[0] != '+' && !d_countrycode.empty())
result = d_countrycode + (result[0] == '0' ? result.substr(1) : result);
}
Expand Down

0 comments on commit fdd2b9d

Please sign in to comment.