mirror of
https://github.com/fabianonline/telegram_backup.git
synced 2024-11-22 08:46:15 +00:00
Playing around with exporters. Added a stupidly simple CSV exporter.
This commit is contained in:
parent
3c68e6d814
commit
c2b1c0625e
@ -16,7 +16,7 @@
|
|||||||
package de.fabianonline.telegram_backup
|
package de.fabianonline.telegram_backup
|
||||||
|
|
||||||
import de.fabianonline.telegram_backup.TelegramUpdateHandler
|
import de.fabianonline.telegram_backup.TelegramUpdateHandler
|
||||||
import de.fabianonline.telegram_backup.exporter.HTMLExporter
|
import de.fabianonline.telegram_backup.exporter.*
|
||||||
import com.github.badoualy.telegram.api.Kotlogram
|
import com.github.badoualy.telegram.api.Kotlogram
|
||||||
import com.github.badoualy.telegram.api.TelegramApp
|
import com.github.badoualy.telegram.api.TelegramApp
|
||||||
import com.github.badoualy.telegram.api.TelegramClient
|
import com.github.badoualy.telegram.api.TelegramClient
|
||||||
@ -131,9 +131,11 @@ class CommandLineController(val options: CommandLineOptions) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
val export = options.get("export")?.toLowerCase()
|
val export = options.get("export")?.toLowerCase()
|
||||||
logger.debug("options.val_export: {}", export)
|
logger.debug("options.export: {}", export)
|
||||||
when (export) {
|
when(export) {
|
||||||
"html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export(); System.exit(0) }
|
"html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export() ; System.exit(0) }
|
||||||
|
"csv" -> { CSVExporter(database, file_base, settings).export(); System.exit(0) }
|
||||||
|
"csv_links" -> { CSVLinkExporter(database, file_base, settings).export() ; System.exit(0) }
|
||||||
null -> { /* No export whished -> do nothing. */ }
|
null -> { /* No export whished -> do nothing. */ }
|
||||||
else -> show_error("Unknown export format '${export}'.")
|
else -> show_error("Unknown export format '${export}'.")
|
||||||
}
|
}
|
||||||
@ -278,6 +280,7 @@ class CommandLineController(val options: CommandLineOptions) {
|
|||||||
println(" --target <x> Target directory for the files.")
|
println(" --target <x> Target directory for the files.")
|
||||||
println(" --export <format> Export the database. Valid formats are:")
|
println(" --export <format> Export the database. Valid formats are:")
|
||||||
println(" html - Creates HTML files.")
|
println(" html - Creates HTML files.")
|
||||||
|
println(" csv - Creates daily CSV files for the last 7 days. Set max_file_age to change the number of days.")
|
||||||
println(" --license Displays the license of this program.")
|
println(" --license Displays the license of this program.")
|
||||||
println(" --daemon Keep running after the backup and automatically save new messages.")
|
println(" --daemon Keep running after the backup and automatically save new messages.")
|
||||||
println(" --anonymize (Try to) Remove all sensitive information from output. Useful for requesting support.")
|
println(" --anonymize (Try to) Remove all sensitive information from output. Useful for requesting support.")
|
||||||
|
@ -623,8 +623,36 @@ class Database constructor(val file_base: String, val user_manager: UserManager)
|
|||||||
rs.close()
|
rs.close()
|
||||||
return result
|
return result
|
||||||
}
|
}
|
||||||
|
|
||||||
fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0): LinkedList<HashMap<String, Any>> {
|
fun getMessagesForCSVExport(start: Long, end: Long, method: (HashMap<String, Any>) -> Unit) {
|
||||||
|
var query = "SELECT text, time*1000, users.first_name as user_first_name, users.last_name as user_last_name, " +
|
||||||
|
"users.username as user_username, messages.json, source_type, source_id, text " +
|
||||||
|
"FROM messages " +
|
||||||
|
"LEFT JOIN users ON users.id=messages.sender_id " +
|
||||||
|
"WHERE time>=${start} AND time<${end} AND messages.api_layer=${Kotlogram.API_LAYER} " +
|
||||||
|
"ORDER BY messages.time"
|
||||||
|
val rs = stmt.executeQuery(query)
|
||||||
|
while (rs.next()) {
|
||||||
|
val map = HashMap<String, Any>()
|
||||||
|
map.put("text", rs.getString(1))
|
||||||
|
map.put("time", rs.getTime(2))
|
||||||
|
map.put("user_first_name", rs.getString(3))
|
||||||
|
map.put("user_last_name", rs.getString(4))
|
||||||
|
map.put("user_username", rs.getString(5))
|
||||||
|
map.put("json", rs.getString(6))
|
||||||
|
map.put("source_type", rs.getString(7))
|
||||||
|
map.put("source_id", rs.getInt(8))
|
||||||
|
map.put("message", rs.getString(9))
|
||||||
|
method.invoke(map)
|
||||||
|
}
|
||||||
|
rs.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0, time_range: LongRange? = null): LinkedList<HashMap<String, Any>> {
|
||||||
|
var conditions = ""
|
||||||
|
if (time_range != null) {
|
||||||
|
conditions = "AND time>=#{time_range.start} AND time<=#{time_range.endInclusive}"
|
||||||
|
}
|
||||||
var query = "SELECT messages.message_id as message_id, text, time*1000 as time, has_media, " +
|
var query = "SELECT messages.message_id as message_id, text, time*1000 as time, has_media, " +
|
||||||
"media_type, media_file, media_size, users.first_name as user_first_name, users.last_name as user_last_name, " +
|
"media_type, media_file, media_size, users.first_name as user_first_name, users.last_name as user_last_name, " +
|
||||||
"users.username as user_username, users.id as user_id, " +
|
"users.username as user_username, users.id as user_id, " +
|
||||||
@ -632,9 +660,9 @@ class Database constructor(val file_base: String, val user_manager: UserManager)
|
|||||||
"FROM messages " +
|
"FROM messages " +
|
||||||
"LEFT JOIN users ON users.id=messages.sender_id " +
|
"LEFT JOIN users ON users.id=messages.sender_id " +
|
||||||
"LEFT JOIN users AS users_fwd ON users_fwd.id=fwd_from_id WHERE " +
|
"LEFT JOIN users AS users_fwd ON users_fwd.id=fwd_from_id WHERE " +
|
||||||
c.query + " " +
|
c.query + " " + conditions + " " +
|
||||||
"ORDER BY messages.message_id"
|
"ORDER BY messages.message_id"
|
||||||
|
|
||||||
if ( limit != -1 ) {
|
if ( limit != -1 ) {
|
||||||
query = query + " LIMIT ${limit} OFFSET ${offset}"
|
query = query + " LIMIT ${limit} OFFSET ${offset}"
|
||||||
}
|
}
|
||||||
|
@ -0,0 +1,111 @@
|
|||||||
|
/* Telegram_Backup
|
||||||
|
* Copyright (C) 2016 Fabian Schlenz
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
package de.fabianonline.telegram_backup.exporter
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import java.io.PrintWriter
|
||||||
|
import java.io.OutputStreamWriter
|
||||||
|
import java.io.FileOutputStream
|
||||||
|
import java.nio.charset.Charset
|
||||||
|
import java.io.FileWriter
|
||||||
|
import java.io.IOException
|
||||||
|
import java.io.FileNotFoundException
|
||||||
|
import java.net.URL
|
||||||
|
import org.apache.commons.io.FileUtils
|
||||||
|
import java.util.LinkedList
|
||||||
|
import java.util.HashMap
|
||||||
|
import java.time.LocalDate
|
||||||
|
import java.time.LocalTime
|
||||||
|
import java.time.LocalDateTime
|
||||||
|
import java.time.ZoneOffset
|
||||||
|
import java.time.format.DateTimeFormatter
|
||||||
|
import java.sql.Time
|
||||||
|
import java.text.SimpleDateFormat
|
||||||
|
|
||||||
|
import com.github.mustachejava.DefaultMustacheFactory
|
||||||
|
import com.github.mustachejava.Mustache
|
||||||
|
import com.github.mustachejava.MustacheFactory
|
||||||
|
import de.fabianonline.telegram_backup.*
|
||||||
|
import com.github.badoualy.telegram.tl.api.*
|
||||||
|
import com.google.gson.*
|
||||||
|
import com.github.salomonbrys.kotson.*
|
||||||
|
|
||||||
|
|
||||||
|
import org.slf4j.Logger
|
||||||
|
import org.slf4j.LoggerFactory
|
||||||
|
|
||||||
|
class CSVExporter(val db: Database, val file_base: String, val settings: Settings) {
|
||||||
|
val logger = LoggerFactory.getLogger(CSVExporter::class.java)
|
||||||
|
val mustache = DefaultMustacheFactory().compile("templates/csv/messages.csv")
|
||||||
|
val dialogs = db.getListOfDialogsForExport()
|
||||||
|
val chats = db.getListOfChatsForExport()
|
||||||
|
val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
|
||||||
|
val base = file_base + "files" + File.separatorChar
|
||||||
|
|
||||||
|
fun export() {
|
||||||
|
val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT)
|
||||||
|
val timezone = ZoneOffset.systemDefault()
|
||||||
|
val days = if (settings.max_file_age==-1) 7 else settings.max_file_age
|
||||||
|
|
||||||
|
// Create base dir
|
||||||
|
logger.debug("Creating base dir")
|
||||||
|
File(base).mkdirs()
|
||||||
|
|
||||||
|
if (days > 0) {
|
||||||
|
for (dayOffset in days downTo 1) {
|
||||||
|
val day = today.minusDays(dayOffset.toLong())
|
||||||
|
|
||||||
|
val start = day.toEpochSecond(timezone.rules.getOffset(day))
|
||||||
|
val end = start + 24 * 60 * 60
|
||||||
|
val filename = base + "messages.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv"
|
||||||
|
if (!File(file_base + filename).exists()) {
|
||||||
|
logger.debug("Range: {} to {}", start, end)
|
||||||
|
println("Processing messages for ${day}...")
|
||||||
|
exportToFile(start, end, filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println("Processing all messages...")
|
||||||
|
exportToFile(0, Long.MAX_VALUE, base + "messages.all.csv")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun exportToFile(start: Long, end: Long, filename: String) {
|
||||||
|
val list = mutableListOf<Map<String, String?>>()
|
||||||
|
db.getMessagesForCSVExport(start, end) {data: HashMap<String, Any> ->
|
||||||
|
val scope = HashMap<String, String?>()
|
||||||
|
val timestamp = data["time"] as Time
|
||||||
|
scope.put("time", datetime_format.format(timestamp))
|
||||||
|
scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null)
|
||||||
|
if (data["source_type"]=="dialog") {
|
||||||
|
scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: ""))
|
||||||
|
} else {
|
||||||
|
scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name)
|
||||||
|
}
|
||||||
|
scope.put("message", data["message"] as String)
|
||||||
|
list.add(scope)
|
||||||
|
}
|
||||||
|
val writer = getWriter(filename)
|
||||||
|
mustache.execute(writer, mapOf("messages" to list))
|
||||||
|
writer.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getWriter(filename: String): OutputStreamWriter {
|
||||||
|
logger.trace("Creating writer for file {}", filename.anonymize())
|
||||||
|
return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder())
|
||||||
|
}
|
||||||
|
}
|
@ -0,0 +1,139 @@
|
|||||||
|
/* Telegram_Backup
|
||||||
|
* Copyright (C) 2016 Fabian Schlenz
|
||||||
|
*
|
||||||
|
* This program is free software: you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation, either version 3 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* This program is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
|
||||||
|
|
||||||
|
package de.fabianonline.telegram_backup.exporter
|
||||||
|
|
||||||
|
import java.io.File
|
||||||
|
import java.io.PrintWriter
|
||||||
|
import java.io.OutputStreamWriter
|
||||||
|
import java.io.FileOutputStream
|
||||||
|
import java.nio.charset.Charset
|
||||||
|
import java.io.FileWriter
|
||||||
|
import java.io.IOException
|
||||||
|
import java.io.FileNotFoundException
|
||||||
|
import java.net.URL
|
||||||
|
import org.apache.commons.io.FileUtils
|
||||||
|
import java.util.LinkedList
|
||||||
|
import java.util.HashMap
|
||||||
|
import java.time.LocalDate
|
||||||
|
import java.time.LocalTime
|
||||||
|
import java.time.LocalDateTime
|
||||||
|
import java.time.ZoneOffset
|
||||||
|
import java.time.format.DateTimeFormatter
|
||||||
|
import java.sql.Time
|
||||||
|
import java.text.SimpleDateFormat
|
||||||
|
|
||||||
|
import com.github.mustachejava.DefaultMustacheFactory
|
||||||
|
import com.github.mustachejava.Mustache
|
||||||
|
import com.github.mustachejava.MustacheFactory
|
||||||
|
import de.fabianonline.telegram_backup.*
|
||||||
|
import com.github.badoualy.telegram.tl.api.*
|
||||||
|
import com.google.gson.*
|
||||||
|
import com.github.salomonbrys.kotson.*
|
||||||
|
|
||||||
|
|
||||||
|
import org.slf4j.Logger
|
||||||
|
import org.slf4j.LoggerFactory
|
||||||
|
|
||||||
|
class CSVLinkExporter(val db: Database, val file_base: String, val settings: Settings) {
|
||||||
|
val logger = LoggerFactory.getLogger(CSVLinkExporter::class.java)
|
||||||
|
val mustache = DefaultMustacheFactory().compile("templates/csv/links.csv")
|
||||||
|
val dialogs = db.getListOfDialogsForExport()
|
||||||
|
val chats = db.getListOfChatsForExport()
|
||||||
|
val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
|
||||||
|
val base = file_base + "files" + File.separatorChar
|
||||||
|
|
||||||
|
val invalid_entity_index = "[INVALID ENTITY INDEX]"
|
||||||
|
|
||||||
|
fun export() {
|
||||||
|
val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT)
|
||||||
|
val timezone = ZoneOffset.systemDefault()
|
||||||
|
val days = if (settings.max_file_age==-1) 7 else settings.max_file_age
|
||||||
|
|
||||||
|
// Create base dir
|
||||||
|
logger.debug("Creating base dir")
|
||||||
|
File(base).mkdirs()
|
||||||
|
|
||||||
|
if (days > 0) {
|
||||||
|
for (dayOffset in days downTo 1) {
|
||||||
|
val day = today.minusDays(dayOffset.toLong())
|
||||||
|
|
||||||
|
val start = day.toEpochSecond(timezone.rules.getOffset(day))
|
||||||
|
val end = start + 24 * 60 * 60
|
||||||
|
val filename = base + "links.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv"
|
||||||
|
if (!File(file_base + filename).exists()) {
|
||||||
|
logger.debug("Range: {} to {}", start, end)
|
||||||
|
println("Processing messages for ${day}...")
|
||||||
|
exportToFile(start, end, filename)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
println("Processing all messages...")
|
||||||
|
exportToFile(0, Long.MAX_VALUE, base + "links.all.csv")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
fun exportToFile(start: Long, end: Long, filename: String) {
|
||||||
|
|
||||||
|
//val messages: List<Map<String, Any>> = db.getMessagesForCSVExport(start, end)
|
||||||
|
val list = mutableListOf<Map<String, String?>>()
|
||||||
|
val parser = JsonParser()
|
||||||
|
//logger.debug("Got {} messages", messages.size)
|
||||||
|
db.getMessagesForCSVExport(start, end) {data: HashMap<String, Any> ->
|
||||||
|
//val msg: TLMessage = data.get("message_object") as TLMessage
|
||||||
|
val json = parser.parse(data.get("json") as String).obj
|
||||||
|
if (!json.contains("entities")) return@getMessagesForCSVExport
|
||||||
|
|
||||||
|
val urls: List<String>? = json["entities"].array.filter{it.obj.isA("messageEntityTextUrl") || it.obj.isA("messageEntityUrl")}?.map {
|
||||||
|
var url: String
|
||||||
|
try {
|
||||||
|
url = if (it.obj.contains("url")) it["url"].string else json["message"].string.substring(it["offset"].int, it["offset"].int + it["length"].int)
|
||||||
|
if (!url.toLowerCase().startsWith("http:") && !url.toLowerCase().startsWith("https://")) url = "http://${url}"
|
||||||
|
} catch (e: StringIndexOutOfBoundsException) {
|
||||||
|
url = invalid_entity_index
|
||||||
|
}
|
||||||
|
url
|
||||||
|
}
|
||||||
|
|
||||||
|
if (urls != null) for(url in urls) {
|
||||||
|
val scope = HashMap<String, String?>()
|
||||||
|
scope.put("url", url)
|
||||||
|
if (url == invalid_entity_index) {
|
||||||
|
scope.put("host", invalid_entity_index)
|
||||||
|
} else {
|
||||||
|
scope.put("host", URL(url).getHost())
|
||||||
|
}
|
||||||
|
val timestamp = data["time"] as Time
|
||||||
|
scope.put("time", datetime_format.format(timestamp))
|
||||||
|
scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null)
|
||||||
|
if (data["source_type"]=="dialog") {
|
||||||
|
scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: ""))
|
||||||
|
} else {
|
||||||
|
scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name)
|
||||||
|
}
|
||||||
|
list.add(scope)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
val writer = getWriter(filename)
|
||||||
|
mustache.execute(writer, mapOf("links" to list))
|
||||||
|
writer.close()
|
||||||
|
}
|
||||||
|
|
||||||
|
private fun getWriter(filename: String): OutputStreamWriter {
|
||||||
|
logger.trace("Creating writer for file {}", filename.anonymize())
|
||||||
|
return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder())
|
||||||
|
}
|
||||||
|
}
|
3
src/main/resources/templates/csv/links.csv
Normal file
3
src/main/resources/templates/csv/links.csv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{{#links}}
|
||||||
|
"{{time}}","{{url}}","{{host}}","{{username}}","{{chat_name}}"
|
||||||
|
{{/links}}
|
Can't render this file because it has a wrong number of fields in line 2.
|
3
src/main/resources/templates/csv/messages.csv
Normal file
3
src/main/resources/templates/csv/messages.csv
Normal file
@ -0,0 +1,3 @@
|
|||||||
|
{{#messages}}
|
||||||
|
"{{time}}","{{username}}","{{chat_name}}","{{message}}"
|
||||||
|
{{/messages}}
|
Can't render this file because it has a wrong number of fields in line 2.
|
Loading…
Reference in New Issue
Block a user