From c2b1c0625e4242fe99743df67a924b0c2816cebc Mon Sep 17 00:00:00 2001 From: Fabian Schlenz Date: Fri, 20 Apr 2018 06:43:48 +0200 Subject: [PATCH] Playing around with exporters. Added a stupidly simple CSV exporter. --- .../telegram_backup/CommandLineController.kt | 11 +- .../fabianonline/telegram_backup/Database.kt | 36 ++++- .../telegram_backup/exporter/CSVExporter.kt | 111 ++++++++++++++ .../exporter/CSVLinkExporter.kt | 139 ++++++++++++++++++ src/main/resources/templates/csv/links.csv | 3 + src/main/resources/templates/csv/messages.csv | 3 + 6 files changed, 295 insertions(+), 8 deletions(-) create mode 100644 src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVExporter.kt create mode 100644 src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVLinkExporter.kt create mode 100644 src/main/resources/templates/csv/links.csv create mode 100644 src/main/resources/templates/csv/messages.csv diff --git a/src/main/kotlin/de/fabianonline/telegram_backup/CommandLineController.kt b/src/main/kotlin/de/fabianonline/telegram_backup/CommandLineController.kt index ffd860a..c73f832 100644 --- a/src/main/kotlin/de/fabianonline/telegram_backup/CommandLineController.kt +++ b/src/main/kotlin/de/fabianonline/telegram_backup/CommandLineController.kt @@ -16,7 +16,7 @@ package de.fabianonline.telegram_backup import de.fabianonline.telegram_backup.TelegramUpdateHandler -import de.fabianonline.telegram_backup.exporter.HTMLExporter +import de.fabianonline.telegram_backup.exporter.* import com.github.badoualy.telegram.api.Kotlogram import com.github.badoualy.telegram.api.TelegramApp import com.github.badoualy.telegram.api.TelegramClient @@ -131,9 +131,11 @@ class CommandLineController(val options: CommandLineOptions) { } val export = options.get("export")?.toLowerCase() - logger.debug("options.val_export: {}", export) - when (export) { - "html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export(); System.exit(0) } + logger.debug("options.export: {}", export) + when(export) { + "html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export() ; System.exit(0) } + "csv" -> { CSVExporter(database, file_base, settings).export(); System.exit(0) } + "csv_links" -> { CSVLinkExporter(database, file_base, settings).export() ; System.exit(0) } null -> { /* No export whished -> do nothing. */ } else -> show_error("Unknown export format '${export}'.") } @@ -278,6 +280,7 @@ class CommandLineController(val options: CommandLineOptions) { println(" --target Target directory for the files.") println(" --export Export the database. Valid formats are:") println(" html - Creates HTML files.") + println(" csv - Creates daily CSV files for the last 7 days. Set max_file_age to change the number of days.") println(" --license Displays the license of this program.") println(" --daemon Keep running after the backup and automatically save new messages.") println(" --anonymize (Try to) Remove all sensitive information from output. Useful for requesting support.") diff --git a/src/main/kotlin/de/fabianonline/telegram_backup/Database.kt b/src/main/kotlin/de/fabianonline/telegram_backup/Database.kt index a9b1fdf..d15803c 100644 --- a/src/main/kotlin/de/fabianonline/telegram_backup/Database.kt +++ b/src/main/kotlin/de/fabianonline/telegram_backup/Database.kt @@ -623,8 +623,36 @@ class Database constructor(val file_base: String, val user_manager: UserManager) rs.close() return result } - - fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0): LinkedList> { + + fun getMessagesForCSVExport(start: Long, end: Long, method: (HashMap) -> Unit) { + var query = "SELECT text, time*1000, users.first_name as user_first_name, users.last_name as user_last_name, " + + "users.username as user_username, messages.json, source_type, source_id, text " + + "FROM messages " + + "LEFT JOIN users ON users.id=messages.sender_id " + + "WHERE time>=${start} AND time<${end} AND messages.api_layer=${Kotlogram.API_LAYER} " + + "ORDER BY messages.time" + val rs = stmt.executeQuery(query) + while (rs.next()) { + val map = HashMap() + map.put("text", rs.getString(1)) + map.put("time", rs.getTime(2)) + map.put("user_first_name", rs.getString(3)) + map.put("user_last_name", rs.getString(4)) + map.put("user_username", rs.getString(5)) + map.put("json", rs.getString(6)) + map.put("source_type", rs.getString(7)) + map.put("source_id", rs.getInt(8)) + map.put("message", rs.getString(9)) + method.invoke(map) + } + rs.close() + } + + fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0, time_range: LongRange? = null): LinkedList> { + var conditions = "" + if (time_range != null) { + conditions = "AND time>=#{time_range.start} AND time<=#{time_range.endInclusive}" + } var query = "SELECT messages.message_id as message_id, text, time*1000 as time, has_media, " + "media_type, media_file, media_size, users.first_name as user_first_name, users.last_name as user_last_name, " + "users.username as user_username, users.id as user_id, " + @@ -632,9 +660,9 @@ class Database constructor(val file_base: String, val user_manager: UserManager) "FROM messages " + "LEFT JOIN users ON users.id=messages.sender_id " + "LEFT JOIN users AS users_fwd ON users_fwd.id=fwd_from_id WHERE " + - c.query + " " + + c.query + " " + conditions + " " + "ORDER BY messages.message_id" - + if ( limit != -1 ) { query = query + " LIMIT ${limit} OFFSET ${offset}" } diff --git a/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVExporter.kt b/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVExporter.kt new file mode 100644 index 0000000..146e9c1 --- /dev/null +++ b/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVExporter.kt @@ -0,0 +1,111 @@ +/* Telegram_Backup + * Copyright (C) 2016 Fabian Schlenz + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ + +package de.fabianonline.telegram_backup.exporter + +import java.io.File +import java.io.PrintWriter +import java.io.OutputStreamWriter +import java.io.FileOutputStream +import java.nio.charset.Charset +import java.io.FileWriter +import java.io.IOException +import java.io.FileNotFoundException +import java.net.URL +import org.apache.commons.io.FileUtils +import java.util.LinkedList +import java.util.HashMap +import java.time.LocalDate +import java.time.LocalTime +import java.time.LocalDateTime +import java.time.ZoneOffset +import java.time.format.DateTimeFormatter +import java.sql.Time +import java.text.SimpleDateFormat + +import com.github.mustachejava.DefaultMustacheFactory +import com.github.mustachejava.Mustache +import com.github.mustachejava.MustacheFactory +import de.fabianonline.telegram_backup.* +import com.github.badoualy.telegram.tl.api.* +import com.google.gson.* +import com.github.salomonbrys.kotson.* + + +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class CSVExporter(val db: Database, val file_base: String, val settings: Settings) { + val logger = LoggerFactory.getLogger(CSVExporter::class.java) + val mustache = DefaultMustacheFactory().compile("templates/csv/messages.csv") + val dialogs = db.getListOfDialogsForExport() + val chats = db.getListOfChatsForExport() + val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val base = file_base + "files" + File.separatorChar + + fun export() { + val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT) + val timezone = ZoneOffset.systemDefault() + val days = if (settings.max_file_age==-1) 7 else settings.max_file_age + + // Create base dir + logger.debug("Creating base dir") + File(base).mkdirs() + + if (days > 0) { + for (dayOffset in days downTo 1) { + val day = today.minusDays(dayOffset.toLong()) + + val start = day.toEpochSecond(timezone.rules.getOffset(day)) + val end = start + 24 * 60 * 60 + val filename = base + "messages.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv" + if (!File(file_base + filename).exists()) { + logger.debug("Range: {} to {}", start, end) + println("Processing messages for ${day}...") + exportToFile(start, end, filename) + } + } + } else { + println("Processing all messages...") + exportToFile(0, Long.MAX_VALUE, base + "messages.all.csv") + } + } + + fun exportToFile(start: Long, end: Long, filename: String) { + val list = mutableListOf>() + db.getMessagesForCSVExport(start, end) {data: HashMap -> + val scope = HashMap() + val timestamp = data["time"] as Time + scope.put("time", datetime_format.format(timestamp)) + scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null) + if (data["source_type"]=="dialog") { + scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: "")) + } else { + scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name) + } + scope.put("message", data["message"] as String) + list.add(scope) + } + val writer = getWriter(filename) + mustache.execute(writer, mapOf("messages" to list)) + writer.close() + } + + private fun getWriter(filename: String): OutputStreamWriter { + logger.trace("Creating writer for file {}", filename.anonymize()) + return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder()) + } +} diff --git a/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVLinkExporter.kt b/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVLinkExporter.kt new file mode 100644 index 0000000..60fb8c2 --- /dev/null +++ b/src/main/kotlin/de/fabianonline/telegram_backup/exporter/CSVLinkExporter.kt @@ -0,0 +1,139 @@ +/* Telegram_Backup + * Copyright (C) 2016 Fabian Schlenz + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . */ + +package de.fabianonline.telegram_backup.exporter + +import java.io.File +import java.io.PrintWriter +import java.io.OutputStreamWriter +import java.io.FileOutputStream +import java.nio.charset.Charset +import java.io.FileWriter +import java.io.IOException +import java.io.FileNotFoundException +import java.net.URL +import org.apache.commons.io.FileUtils +import java.util.LinkedList +import java.util.HashMap +import java.time.LocalDate +import java.time.LocalTime +import java.time.LocalDateTime +import java.time.ZoneOffset +import java.time.format.DateTimeFormatter +import java.sql.Time +import java.text.SimpleDateFormat + +import com.github.mustachejava.DefaultMustacheFactory +import com.github.mustachejava.Mustache +import com.github.mustachejava.MustacheFactory +import de.fabianonline.telegram_backup.* +import com.github.badoualy.telegram.tl.api.* +import com.google.gson.* +import com.github.salomonbrys.kotson.* + + +import org.slf4j.Logger +import org.slf4j.LoggerFactory + +class CSVLinkExporter(val db: Database, val file_base: String, val settings: Settings) { + val logger = LoggerFactory.getLogger(CSVLinkExporter::class.java) + val mustache = DefaultMustacheFactory().compile("templates/csv/links.csv") + val dialogs = db.getListOfDialogsForExport() + val chats = db.getListOfChatsForExport() + val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss") + val base = file_base + "files" + File.separatorChar + + val invalid_entity_index = "[INVALID ENTITY INDEX]" + + fun export() { + val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT) + val timezone = ZoneOffset.systemDefault() + val days = if (settings.max_file_age==-1) 7 else settings.max_file_age + + // Create base dir + logger.debug("Creating base dir") + File(base).mkdirs() + + if (days > 0) { + for (dayOffset in days downTo 1) { + val day = today.minusDays(dayOffset.toLong()) + + val start = day.toEpochSecond(timezone.rules.getOffset(day)) + val end = start + 24 * 60 * 60 + val filename = base + "links.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv" + if (!File(file_base + filename).exists()) { + logger.debug("Range: {} to {}", start, end) + println("Processing messages for ${day}...") + exportToFile(start, end, filename) + } + } + } else { + println("Processing all messages...") + exportToFile(0, Long.MAX_VALUE, base + "links.all.csv") + } + } + + fun exportToFile(start: Long, end: Long, filename: String) { + + //val messages: List> = db.getMessagesForCSVExport(start, end) + val list = mutableListOf>() + val parser = JsonParser() + //logger.debug("Got {} messages", messages.size) + db.getMessagesForCSVExport(start, end) {data: HashMap -> + //val msg: TLMessage = data.get("message_object") as TLMessage + val json = parser.parse(data.get("json") as String).obj + if (!json.contains("entities")) return@getMessagesForCSVExport + + val urls: List? = json["entities"].array.filter{it.obj.isA("messageEntityTextUrl") || it.obj.isA("messageEntityUrl")}?.map { + var url: String + try { + url = if (it.obj.contains("url")) it["url"].string else json["message"].string.substring(it["offset"].int, it["offset"].int + it["length"].int) + if (!url.toLowerCase().startsWith("http:") && !url.toLowerCase().startsWith("https://")) url = "http://${url}" + } catch (e: StringIndexOutOfBoundsException) { + url = invalid_entity_index + } + url + } + + if (urls != null) for(url in urls) { + val scope = HashMap() + scope.put("url", url) + if (url == invalid_entity_index) { + scope.put("host", invalid_entity_index) + } else { + scope.put("host", URL(url).getHost()) + } + val timestamp = data["time"] as Time + scope.put("time", datetime_format.format(timestamp)) + scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null) + if (data["source_type"]=="dialog") { + scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: "")) + } else { + scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name) + } + list.add(scope) + } + } + val writer = getWriter(filename) + mustache.execute(writer, mapOf("links" to list)) + writer.close() + } + + private fun getWriter(filename: String): OutputStreamWriter { + logger.trace("Creating writer for file {}", filename.anonymize()) + return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder()) + } +} diff --git a/src/main/resources/templates/csv/links.csv b/src/main/resources/templates/csv/links.csv new file mode 100644 index 0000000..063c2ed --- /dev/null +++ b/src/main/resources/templates/csv/links.csv @@ -0,0 +1,3 @@ +{{#links}} +"{{time}}","{{url}}","{{host}}","{{username}}","{{chat_name}}" +{{/links}} diff --git a/src/main/resources/templates/csv/messages.csv b/src/main/resources/templates/csv/messages.csv new file mode 100644 index 0000000..139bec7 --- /dev/null +++ b/src/main/resources/templates/csv/messages.csv @@ -0,0 +1,3 @@ +{{#messages}} +"{{time}}","{{username}}","{{chat_name}}","{{message}}" +{{/messages}}