Skip to content

Commit

Permalink
Use proper extensions on attchments in HTML export (part of #232). Up…
Browse files Browse the repository at this point in the history
…date mimetypes. Use proper extension in dumpavatars. Add counter to dumpmedia.
  • Loading branch information
bepaald committed Aug 19, 2024
1 parent 1d14dcc commit 41f937e
Show file tree
Hide file tree
Showing 11 changed files with 1,095 additions and 1,033 deletions.
367 changes: 186 additions & 181 deletions BUILDSCRIPT.sh

Large diffs are not rendered by default.

2 changes: 2 additions & 0 deletions BUILDSCRIPT_MULTIPROC.bash44
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ SRC=("keyvalueframe/statics.cc"
"logger/supportsansi.cc"
"logger/statics.cc"
"logger/outputhead.cc"
"mimetypes/statics.cc"
"databaseversionframe/statics.cc"
"endframe/statics.cc"
"sqlcipherdecryptor/destructor.cc"
Expand Down Expand Up @@ -358,6 +359,7 @@ OBJ=("keyvalueframe/o/statics.o"
"logger/o/supportsansi.o"
"logger/o/statics.o"
"logger/o/outputhead.o"
"mimetypes/o/statics.o"
"databaseversionframe/o/statics.o"
"endframe/o/statics.o"
"sqlcipherdecryptor/o/destructor.o"
Expand Down
2 changes: 1 addition & 1 deletion autoversion.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,6 @@
#ifndef VERSION_H_
#define VERSION_H_

#define VERSIONDATE "20240818.193953"
#define VERSIONDATE "20240819.211304"

#endif
809 changes: 7 additions & 802 deletions mimetypes/mimetypes.h

Large diffs are not rendered by default.

820 changes: 820 additions & 0 deletions mimetypes/statics.cc

Large diffs are not rendered by default.

14 changes: 10 additions & 4 deletions signalbackup/dumpavatars.cc
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,16 @@ bool SignalBackup::dumpAvatars(std::string const &dir, std::vector<std::string>
std::find(contacts.begin(), contacts.end(), name) == contacts.end())
continue;

std::string filename = sanitizeFilename(name + ".jpg");
if (filename.empty() || filename == ".jpg") // filename was not set in database or was not impossible
// to sanitize (eg reserved name in windows 'COM1')
filename = af->recipient() + ".jpg";
// get avatar data, to get extension
std::string extension;
unsigned char *avatardata = af->attachmentData();
uint64_t avatarsize = af->attachmentSize();
AttachmentMetadata amd = getAttachmentMetaData(std::string(), avatardata, avatarsize, true/*skiphash*/);
extension = "." + std::string(MimeTypes::getExtension(amd.filetype, "jpg"));
std::string filename = sanitizeFilename(name + extension);
if (filename.empty() || filename == extension) // filename was not set in database or was not impossible
// to sanitize (eg reserved name in windows 'COM1')
filename = af->recipient() + extension;

// make filename unique
while (bepaald::fileOrDirExists(dir + "/" + filename))
Expand Down
5 changes: 2 additions & 3 deletions signalbackup/dumpmedia.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ bool SignalBackup::dumpMedia(std::string const &dir, std::vector<std::string> co
if (!prepareOutputDirectory(dir, overwrite))
return false;

MimeTypes mimetypes;
std::pair<std::vector<int>, std::vector<std::string>> conversations; // links thread_id to thread title, if the
// folder already exists, but from another _id,
// it is a different thread with the same name
Expand Down Expand Up @@ -137,7 +136,7 @@ bool SignalBackup::dumpMedia(std::string const &dir, std::vector<std::string> co
for (auto const &aframe : d_attachments)
#endif
{
Logger::message_overwrite("Saving attachments... ", count); //, "/", results.rows());
Logger::message_overwrite("Saving attachments... ", count, "/", d_attachments.size());

AttachmentFrame *a = aframe.second.get();

Expand Down Expand Up @@ -198,7 +197,7 @@ bool SignalBackup::dumpMedia(std::string const &dir, std::vector<std::string> co

// get file ext
std::string mime = results.valueAsString(0, d_part_ct);
std::string ext = std::string(mimetypes.getExtension(mime));
std::string ext = std::string(MimeTypes::getExtension(mime));
if (ext.empty())
{
ext = "attach";
Expand Down
34 changes: 18 additions & 16 deletions signalbackup/getattachmentmetadata.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

#include "../base64/base64.h"

SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string const &file, unsigned char *data, long long int data_size) const
SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string const &file, unsigned char *data, long long int data_size, bool skiphash) const
{
//struct AttachmentMetadata
//{
Expand All @@ -48,22 +48,24 @@ SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string



// gethash
std::string hash;
unsigned char rawhash[SHA256_DIGEST_LENGTH];
std::unique_ptr<EVP_MD_CTX, decltype(&::EVP_MD_CTX_free)> sha256(EVP_MD_CTX_new(), &::EVP_MD_CTX_free);
if (!sha256 ||
EVP_DigestInit_ex(sha256.get(), EVP_sha256(), nullptr) != 1 ||
EVP_DigestUpdate(sha256.get(), data, data_size) != 1 ||
EVP_DigestFinal_ex(sha256.get(), rawhash, nullptr) != 1) [[unlikely]]
if (!skiphash)
{
Logger::warning("Failed to set hash");
hash = std::string();
// gethash
unsigned char rawhash[SHA256_DIGEST_LENGTH];
std::unique_ptr<EVP_MD_CTX, decltype(&::EVP_MD_CTX_free)> sha256(EVP_MD_CTX_new(), &::EVP_MD_CTX_free);
if (!sha256 ||
EVP_DigestInit_ex(sha256.get(), EVP_sha256(), nullptr) != 1 ||
EVP_DigestUpdate(sha256.get(), data, data_size) != 1 ||
EVP_DigestFinal_ex(sha256.get(), rawhash, nullptr) != 1) [[unlikely]]
{
Logger::warning("Failed to set hash");
hash = std::string();
}
hash = Base64::bytesToBase64String(rawhash, SHA256_DIGEST_LENGTH);
//std::cout << bepaald::bytesToHexString(rawhash, SHA256_DIGEST_LENGTH) << std::endl;
//std::cout << "GOT HASH: " << hash << std::endl;
}
hash = Base64::bytesToBase64String(rawhash, SHA256_DIGEST_LENGTH);
//std::cout << bepaald::bytesToHexString(rawhash, SHA256_DIGEST_LENGTH) << std::endl;
//std::cout << "GOT HASH: " << hash << std::endl;


// set buffer for file header
int bufsize = std::min(data_size, 30ll);
Expand Down Expand Up @@ -268,7 +270,7 @@ SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string
return AttachmentMetadata{-1, -1, std::string(), data_size, hash, file};
}

SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string const &file) const
SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string const &file, bool skiphash) const
{

//struct AttachmentMetadata
Expand Down Expand Up @@ -307,5 +309,5 @@ SignalBackup::AttachmentMetadata SignalBackup::getAttachmentMetaData(std::string
return AttachmentMetadata{-1, -1, std::string(), file_size, std::string(), file};
}

return getAttachmentMetaData(file, file_data.get(), file_size);
return getAttachmentMetaData(file, file_data.get(), file_size, skiphash);
}
32 changes: 17 additions & 15 deletions signalbackup/htmlwrite.cc
Original file line number Diff line number Diff line change
Expand Up @@ -1855,10 +1855,6 @@ void SignalBackup::HTMLwriteAttachmentDiv(std::ofstream &htmloutput, SqliteDB::Q
return;
}

// write the attachment data
if (!HTMLwriteAttachment(directory, threaddir, rowid, uniqueid, overwrite, append))
continue;

std::string content_type = attachment_results.valueAsString(a, d_part_ct);
std::string original_filename;
if (!attachment_results.isNull(a, "file_name") && !attachment_results(a, "file_name").empty())
Expand All @@ -1867,6 +1863,12 @@ void SignalBackup::HTMLwriteAttachmentDiv(std::ofstream &htmloutput, SqliteDB::Q
HTMLescapeString(&original_filename);
}

std::string extension(MimeTypes::getExtension(content_type, "bin"));

// write the attachment data
if (!HTMLwriteAttachment(directory, threaddir, rowid, uniqueid, extension, overwrite, append))
continue;

htmloutput << std::string(indent, ' ') << "<div class=\"attachment"
<< ((!STRING_STARTS_WITH(content_type, "image/") && !STRING_STARTS_WITH(content_type, "video/") && !STRING_STARTS_WITH(content_type, "audio/")) ?
" attachment-unknown-type" : "")
Expand All @@ -1878,7 +1880,7 @@ void SignalBackup::HTMLwriteAttachmentDiv(std::ofstream &htmloutput, SqliteDB::Q
htmloutput << std::string(indent, ' ') << " <input type=\"checkbox\" id=\"zoomCheck-" << rowid << "-" << uniqueid << "\">\n";
htmloutput << std::string(indent, ' ') << " <label for=\"zoomCheck-" << rowid << "-" << uniqueid << "\">\n";
htmloutput << std::string(indent, ' ') << " <img src=\"media/Attachment_" << rowid
<< "_" << uniqueid << ".bin\" alt=\"Image attachment\" loading=\"lazy\">\n";
<< "_" << uniqueid << "." << extension << "\" alt=\"Image attachment\" loading=\"lazy\">\n";
htmloutput << std::string(indent, ' ') << " </label>\n";
if (attachment_results.hasColumn("caption") &&
!attachment_results.isNull(a, "caption"))
Expand All @@ -1891,9 +1893,9 @@ void SignalBackup::HTMLwriteAttachmentDiv(std::ofstream &htmloutput, SqliteDB::Q
htmloutput << std::string(indent, ' ') << " <div class=\"msg-vid-container\">\n";
htmloutput << std::string(indent, ' ') << " <" << content_type.substr(0, 5) << " controls>\n";
htmloutput << std::string(indent, ' ') << " <source src=\"media/Attachment_" << rowid
<< "_" << uniqueid << ".bin\" type=\"" << content_type << "\">\n";
<< "_" << uniqueid << "." << extension << "\" type=\"" << content_type << "\">\n";
htmloutput << std::string(indent, ' ') << " Media of type " << content_type << "<span class=\"msg-dl-link\"><a href=\"media/Attachment_" << rowid
<< "_" << uniqueid << ".bin\" type=\"" << content_type << "\">&#129055;</a></span>\n";
<< "_" << uniqueid << "." << extension << "\" type=\"" << content_type << "\">&#129055;</a></span>\n";
htmloutput << std::string(indent, ' ') << " </" << content_type.substr(0, 5) << ">\n";
if (attachment_results.hasColumn("caption") &&
!attachment_results.isNull(a, "caption"))
Expand All @@ -1904,21 +1906,21 @@ void SignalBackup::HTMLwriteAttachmentDiv(std::ofstream &htmloutput, SqliteDB::Q
{
if (original_filename.empty())
htmloutput << std::string(indent, ' ') << " Attachment of unknown type <span class=\"msg-dl-link\"><a href=\"media/Attachment_" << rowid
<< "_" << uniqueid << ".bin\">&#129055;</a></span>\n";
<< "_" << uniqueid << "." << extension << "\">&#129055;</a></span>\n";
else
htmloutput << std::string(indent, ' ') << " Attachment '" << original_filename << "' <span class=\"msg-dl-link\"><a href=\"media/Attachment_" << rowid
//<< "_" << uniqueid << ".bin\" download=\"" << original_filename << "\">&#129055;</a></span>\n"; // does not work
<< "_" << uniqueid << ".bin\">&#129055;</a></span>\n";
<< "_" << uniqueid << "." << extension << "\">&#129055;</a></span>\n";
}
else // other
{
if (original_filename.empty())
htmloutput << std::string(indent, ' ') << " Attachment of type " << content_type << "<span class=\"msg-dl-link\"><a href=\"media/Attachment_" << rowid
<< "_" << uniqueid << ".bin\" type=\"" << content_type << "\">&#129055;</a></span>\n";
<< "_" << uniqueid << "." << extension << "\" type=\"" << content_type << "\">&#129055;</a></span>\n";
else
htmloutput << std::string(indent, ' ') << " Attachment '" << original_filename << "'<span class=\"msg-dl-link\"><a href=\"media/Attachment_" << rowid
//<< "_" << uniqueid << ".bin\" type=\"" << content_type << "\" download=\"" << original_filename << "\">&#129055;</a></span>\n"; // does not work
<< "_" << uniqueid << ".bin\" type=\"" << content_type << "\">&#129055;</a></span>\n";
<< "_" << uniqueid << "." << extension << "\" type=\"" << content_type << "\">&#129055;</a></span>\n";
}

htmloutput << std::string(indent, ' ') << "</div>\n";
Expand Down Expand Up @@ -1947,10 +1949,11 @@ void SignalBackup::HTMLwriteSharedContactDiv(std::ofstream &htmloutput, std::str

long long int rowid = sc.valueAsInt(0, "avatar_rowid", -1);
long long int uniqueid = sc.valueAsInt(0, "avatar_uniqueid", -1);
std::string extension("bin");
if (rowid >= 0 && uniqueid >= 0)
{
// write the attachment data
HTMLwriteAttachment(directory, threaddir, rowid, uniqueid, overwrite, append);
HTMLwriteAttachment(directory, threaddir, rowid, uniqueid, extension, overwrite, append);
}

// prefer phone number
Expand Down Expand Up @@ -2006,11 +2009,10 @@ void SignalBackup::HTMLwriteSharedContactDiv(std::ofstream &htmloutput, std::str
htmloutput << std::string(indent, ' ') << "<div class=\"shared-contact\">\n";
if (rowid > -1 && uniqueid > -1)
{
htmloutput << std::string(indent, ' ') << " <div class=\"shared-contact-avatar\" style=\"background-image: url('" << "media/Attachment_" << rowid << "_" << uniqueid << ".bin" << "');\">"
<< '\n';
htmloutput << std::string(indent, ' ') << " <div class=\"shared-contact-avatar\" style=\"background-image: url('" << "media/Attachment_" << rowid << "_" << uniqueid << "." << extension << "');\">\n";
htmloutput << std::string(indent, ' ') << " <input type=\"checkbox\" id=\"zoomCheck-" << rowid << "-" << uniqueid << "\">\n";
htmloutput << std::string(indent, ' ') << " <label for=\"zoomCheck-" << rowid << "-" << uniqueid << "\">\n";
htmloutput << std::string(indent, ' ') << " <img src=\"media/Attachment_" << rowid << "_" << uniqueid << ".bin\" alt=\"Shared avatar\" loading=\"lazy\">\n";
htmloutput << std::string(indent, ' ') << " <img src=\"media/Attachment_" << rowid << "_" << uniqueid << "." << extension << "\" alt=\"Shared avatar\" loading=\"lazy\">\n";
htmloutput << std::string(indent, ' ') << " </label>\n";
htmloutput << std::string(indent, ' ') << " </div>\n";
}
Expand Down
35 changes: 27 additions & 8 deletions signalbackup/htmlwriteattachment.cc
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
#include <cerrno>

bool SignalBackup::HTMLwriteAttachment(std::string const &directory, std::string const &threaddir,
long long int rowid, long long int uniqueid, bool overwrite,
bool append) const
long long int rowid, long long int uniqueid, std::string const &ext,
bool overwrite, bool append) const
{
if (!bepaald::contains(d_attachments, std::pair{rowid, uniqueid}))
return false;
Expand All @@ -45,31 +45,50 @@ bool SignalBackup::HTMLwriteAttachment(std::string const &directory, std::string

// check actual attachmentfile file
std::string attachment_filename = directory + "/" + threaddir +
"/media/Attachment_" + bepaald::toString(rowid) + "_" + bepaald::toString(uniqueid) + ".bin";
"/media/Attachment_" + bepaald::toString(rowid) + "_" + bepaald::toString(uniqueid) + "." + ext;
if (bepaald::fileOrDirExists(attachment_filename))
{
if (append) // file already exists, but we were asked to just use the existing file, so we're done
return true;

if (!overwrite) // file already exists, but we were no asked to overwrite -> error!
{
Logger::error("Attachment file exists. Not overwriting");
return false;
}
}

// write actual attachment:
AttachmentFrame *a = d_attachments.at({rowid, uniqueid}).get();

// migrate .bin files if they exist -> this could be temporary (2024-19-08)
std::string old_attachment_filename = directory + "/" + threaddir +
"/media/Attachment_" + bepaald::toString(rowid) + "_" + bepaald::toString(uniqueid) + ".bin";
std::error_code error;
if (bepaald::fileOrDirExists(old_attachment_filename) && append && !overwrite &&
a->attachmentSize() == std::filesystem::file_size(old_attachment_filename, error))
{
if (d_verbose) [[unlikely]]
Logger::message("Migrating file: ", old_attachment_filename, " -> ", attachment_filename);
std::filesystem::rename(old_attachment_filename, attachment_filename, error);
if (!error)
return true;
else [[unlikely]]
{
Logger::error("Failed to rename existing attachment (", old_attachment_filename, " -> ",
attachment_filename, ")");
return false;
}
}

// write actual attachment:
std::ofstream attachmentstream(attachment_filename, std::ios_base::binary);
if (!attachmentstream.is_open())
{
Logger::error("Failed to open file for writing: '", attachment_filename, "'",
" (errno: ", std::strerror(errno), ")"); // note: errno is not required to be set by std
// temporary !!
{
std::error_code ec;
std::filesystem::space_info const si = std::filesystem::space(directory, ec);
if (!ec)
std::filesystem::space_info const si = std::filesystem::space(directory, error);
if (!error)
{
Logger::message("Space available: ", static_cast<std::intmax_t>(si.available),
"\nAttachment size: ", a->attachmentSize());
Expand Down
8 changes: 5 additions & 3 deletions signalbackup/signalbackup.h
Original file line number Diff line number Diff line change
Expand Up @@ -360,8 +360,9 @@ class SignalBackup
void dtSetColumnNames(SqliteDB *ddb);
long long int scanSelf() const;
bool cleanAttachments();
AttachmentMetadata getAttachmentMetaData(std::string const &filename) const;
AttachmentMetadata getAttachmentMetaData(std::string const &filename, unsigned char *data, long long int data_size) const;
AttachmentMetadata getAttachmentMetaData(std::string const &filename, bool skiphash = false) const;
AttachmentMetadata getAttachmentMetaData(std::string const &filename, unsigned char *data,
long long int data_size, bool skiphash = false) const;
inline bool updatePartTableForReplace(AttachmentMetadata const &data, long long int id);
bool scrambleHelper(std::string const &table, std::vector<std::string> const &columns) const;
std::vector<long long int> getGroupUpdateRecipients(int thread = -1) const;
Expand Down Expand Up @@ -411,7 +412,7 @@ class SignalBackup
std::string const &directory, std::string const &threaddir,
bool overwrite, bool append) const;
bool HTMLwriteAttachment(std::string const &directory, std::string const &threaddir, long long int rowid,
long long int uniqueid, bool overwrite, bool append) const;
long long int uniqueid, std::string const &ext, bool overwrite, bool append) const;
bool HTMLprepMsgBody(std::string *body, std::vector<std::tuple<long long int, long long int, long long int>> const &mentions,
std::map<long long int, RecipientInfo> *recipients_info, bool incoming,
std::pair<std::shared_ptr<unsigned char []>, size_t> const &brdata, bool isquote) const;
Expand Down Expand Up @@ -982,6 +983,7 @@ inline std::string SignalBackup::utf8BytesToHexString(std::string const &data) c
return utf8BytesToHexString(reinterpret_cast<unsigned char const *>(data.data()), data.size());
}

// the const here is supposed to be temporary (2024-19-08)
inline void SignalBackup::warnOnce(std::string const &warning, bool error)
{
if (!bepaald::contains(d_warningsgiven, warning))
Expand Down

0 comments on commit 41f937e

Please sign in to comment.