Playing around with exporters. Added a stupidly simple CSV exporter.

This commit is contained in:
Fabian Schlenz 2018-04-20 06:43:48 +02:00
parent 3c68e6d814
commit c2b1c0625e
6 changed files with 295 additions and 8 deletions

View File

@ -16,7 +16,7 @@
package de.fabianonline.telegram_backup
import de.fabianonline.telegram_backup.TelegramUpdateHandler
import de.fabianonline.telegram_backup.exporter.HTMLExporter
import de.fabianonline.telegram_backup.exporter.*
import com.github.badoualy.telegram.api.Kotlogram
import com.github.badoualy.telegram.api.TelegramApp
import com.github.badoualy.telegram.api.TelegramClient
@ -131,9 +131,11 @@ class CommandLineController(val options: CommandLineOptions) {
}
val export = options.get("export")?.toLowerCase()
logger.debug("options.val_export: {}", export)
when (export) {
"html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export(); System.exit(0) }
logger.debug("options.export: {}", export)
when(export) {
"html" -> { HTMLExporter(database, user_manager, settings=settings, file_base=file_base).export() ; System.exit(0) }
"csv" -> { CSVExporter(database, file_base, settings).export(); System.exit(0) }
"csv_links" -> { CSVLinkExporter(database, file_base, settings).export() ; System.exit(0) }
null -> { /* No export whished -> do nothing. */ }
else -> show_error("Unknown export format '${export}'.")
}
@ -278,6 +280,7 @@ class CommandLineController(val options: CommandLineOptions) {
println(" --target <x> Target directory for the files.")
println(" --export <format> Export the database. Valid formats are:")
println(" html - Creates HTML files.")
println(" csv - Creates daily CSV files for the last 7 days. Set max_file_age to change the number of days.")
println(" --license Displays the license of this program.")
println(" --daemon Keep running after the backup and automatically save new messages.")
println(" --anonymize (Try to) Remove all sensitive information from output. Useful for requesting support.")

View File

@ -623,8 +623,36 @@ class Database constructor(val file_base: String, val user_manager: UserManager)
rs.close()
return result
}
fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0): LinkedList<HashMap<String, Any>> {
fun getMessagesForCSVExport(start: Long, end: Long, method: (HashMap<String, Any>) -> Unit) {
var query = "SELECT text, time*1000, users.first_name as user_first_name, users.last_name as user_last_name, " +
"users.username as user_username, messages.json, source_type, source_id, text " +
"FROM messages " +
"LEFT JOIN users ON users.id=messages.sender_id " +
"WHERE time>=${start} AND time<${end} AND messages.api_layer=${Kotlogram.API_LAYER} " +
"ORDER BY messages.time"
val rs = stmt.executeQuery(query)
while (rs.next()) {
val map = HashMap<String, Any>()
map.put("text", rs.getString(1))
map.put("time", rs.getTime(2))
map.put("user_first_name", rs.getString(3))
map.put("user_last_name", rs.getString(4))
map.put("user_username", rs.getString(5))
map.put("json", rs.getString(6))
map.put("source_type", rs.getString(7))
map.put("source_id", rs.getInt(8))
map.put("message", rs.getString(9))
method.invoke(map)
}
rs.close()
}
fun getMessagesForExport(c: AbstractChat, limit: Int=-1, offset: Int=0, time_range: LongRange? = null): LinkedList<HashMap<String, Any>> {
var conditions = ""
if (time_range != null) {
conditions = "AND time>=#{time_range.start} AND time<=#{time_range.endInclusive}"
}
var query = "SELECT messages.message_id as message_id, text, time*1000 as time, has_media, " +
"media_type, media_file, media_size, users.first_name as user_first_name, users.last_name as user_last_name, " +
"users.username as user_username, users.id as user_id, " +
@ -632,9 +660,9 @@ class Database constructor(val file_base: String, val user_manager: UserManager)
"FROM messages " +
"LEFT JOIN users ON users.id=messages.sender_id " +
"LEFT JOIN users AS users_fwd ON users_fwd.id=fwd_from_id WHERE " +
c.query + " " +
c.query + " " + conditions + " " +
"ORDER BY messages.message_id"
if ( limit != -1 ) {
query = query + " LIMIT ${limit} OFFSET ${offset}"
}

View File

@ -0,0 +1,111 @@
/* Telegram_Backup
* Copyright (C) 2016 Fabian Schlenz
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
package de.fabianonline.telegram_backup.exporter
import java.io.File
import java.io.PrintWriter
import java.io.OutputStreamWriter
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.io.FileWriter
import java.io.IOException
import java.io.FileNotFoundException
import java.net.URL
import org.apache.commons.io.FileUtils
import java.util.LinkedList
import java.util.HashMap
import java.time.LocalDate
import java.time.LocalTime
import java.time.LocalDateTime
import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import java.sql.Time
import java.text.SimpleDateFormat
import com.github.mustachejava.DefaultMustacheFactory
import com.github.mustachejava.Mustache
import com.github.mustachejava.MustacheFactory
import de.fabianonline.telegram_backup.*
import com.github.badoualy.telegram.tl.api.*
import com.google.gson.*
import com.github.salomonbrys.kotson.*
import org.slf4j.Logger
import org.slf4j.LoggerFactory
class CSVExporter(val db: Database, val file_base: String, val settings: Settings) {
val logger = LoggerFactory.getLogger(CSVExporter::class.java)
val mustache = DefaultMustacheFactory().compile("templates/csv/messages.csv")
val dialogs = db.getListOfDialogsForExport()
val chats = db.getListOfChatsForExport()
val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val base = file_base + "files" + File.separatorChar
fun export() {
val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT)
val timezone = ZoneOffset.systemDefault()
val days = if (settings.max_file_age==-1) 7 else settings.max_file_age
// Create base dir
logger.debug("Creating base dir")
File(base).mkdirs()
if (days > 0) {
for (dayOffset in days downTo 1) {
val day = today.minusDays(dayOffset.toLong())
val start = day.toEpochSecond(timezone.rules.getOffset(day))
val end = start + 24 * 60 * 60
val filename = base + "messages.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv"
if (!File(file_base + filename).exists()) {
logger.debug("Range: {} to {}", start, end)
println("Processing messages for ${day}...")
exportToFile(start, end, filename)
}
}
} else {
println("Processing all messages...")
exportToFile(0, Long.MAX_VALUE, base + "messages.all.csv")
}
}
fun exportToFile(start: Long, end: Long, filename: String) {
val list = mutableListOf<Map<String, String?>>()
db.getMessagesForCSVExport(start, end) {data: HashMap<String, Any> ->
val scope = HashMap<String, String?>()
val timestamp = data["time"] as Time
scope.put("time", datetime_format.format(timestamp))
scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null)
if (data["source_type"]=="dialog") {
scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: ""))
} else {
scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name)
}
scope.put("message", data["message"] as String)
list.add(scope)
}
val writer = getWriter(filename)
mustache.execute(writer, mapOf("messages" to list))
writer.close()
}
private fun getWriter(filename: String): OutputStreamWriter {
logger.trace("Creating writer for file {}", filename.anonymize())
return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder())
}
}

View File

@ -0,0 +1,139 @@
/* Telegram_Backup
* Copyright (C) 2016 Fabian Schlenz
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>. */
package de.fabianonline.telegram_backup.exporter
import java.io.File
import java.io.PrintWriter
import java.io.OutputStreamWriter
import java.io.FileOutputStream
import java.nio.charset.Charset
import java.io.FileWriter
import java.io.IOException
import java.io.FileNotFoundException
import java.net.URL
import org.apache.commons.io.FileUtils
import java.util.LinkedList
import java.util.HashMap
import java.time.LocalDate
import java.time.LocalTime
import java.time.LocalDateTime
import java.time.ZoneOffset
import java.time.format.DateTimeFormatter
import java.sql.Time
import java.text.SimpleDateFormat
import com.github.mustachejava.DefaultMustacheFactory
import com.github.mustachejava.Mustache
import com.github.mustachejava.MustacheFactory
import de.fabianonline.telegram_backup.*
import com.github.badoualy.telegram.tl.api.*
import com.google.gson.*
import com.github.salomonbrys.kotson.*
import org.slf4j.Logger
import org.slf4j.LoggerFactory
class CSVLinkExporter(val db: Database, val file_base: String, val settings: Settings) {
val logger = LoggerFactory.getLogger(CSVLinkExporter::class.java)
val mustache = DefaultMustacheFactory().compile("templates/csv/links.csv")
val dialogs = db.getListOfDialogsForExport()
val chats = db.getListOfChatsForExport()
val datetime_format = SimpleDateFormat("yyyy-MM-dd HH:mm:ss")
val base = file_base + "files" + File.separatorChar
val invalid_entity_index = "[INVALID ENTITY INDEX]"
fun export() {
val today = LocalDateTime.of(LocalDate.now(), LocalTime.MIDNIGHT)
val timezone = ZoneOffset.systemDefault()
val days = if (settings.max_file_age==-1) 7 else settings.max_file_age
// Create base dir
logger.debug("Creating base dir")
File(base).mkdirs()
if (days > 0) {
for (dayOffset in days downTo 1) {
val day = today.minusDays(dayOffset.toLong())
val start = day.toEpochSecond(timezone.rules.getOffset(day))
val end = start + 24 * 60 * 60
val filename = base + "links.${day.format(DateTimeFormatter.ISO_LOCAL_DATE)}.csv"
if (!File(file_base + filename).exists()) {
logger.debug("Range: {} to {}", start, end)
println("Processing messages for ${day}...")
exportToFile(start, end, filename)
}
}
} else {
println("Processing all messages...")
exportToFile(0, Long.MAX_VALUE, base + "links.all.csv")
}
}
fun exportToFile(start: Long, end: Long, filename: String) {
//val messages: List<Map<String, Any>> = db.getMessagesForCSVExport(start, end)
val list = mutableListOf<Map<String, String?>>()
val parser = JsonParser()
//logger.debug("Got {} messages", messages.size)
db.getMessagesForCSVExport(start, end) {data: HashMap<String, Any> ->
//val msg: TLMessage = data.get("message_object") as TLMessage
val json = parser.parse(data.get("json") as String).obj
if (!json.contains("entities")) return@getMessagesForCSVExport
val urls: List<String>? = json["entities"].array.filter{it.obj.isA("messageEntityTextUrl") || it.obj.isA("messageEntityUrl")}?.map {
var url: String
try {
url = if (it.obj.contains("url")) it["url"].string else json["message"].string.substring(it["offset"].int, it["offset"].int + it["length"].int)
if (!url.toLowerCase().startsWith("http:") && !url.toLowerCase().startsWith("https://")) url = "http://${url}"
} catch (e: StringIndexOutOfBoundsException) {
url = invalid_entity_index
}
url
}
if (urls != null) for(url in urls) {
val scope = HashMap<String, String?>()
scope.put("url", url)
if (url == invalid_entity_index) {
scope.put("host", invalid_entity_index)
} else {
scope.put("host", URL(url).getHost())
}
val timestamp = data["time"] as Time
scope.put("time", datetime_format.format(timestamp))
scope.put("username", if (data["user_username"]!=null) data["user_username"] as String else null)
if (data["source_type"]=="dialog") {
scope.put("chat_name", "@" + (dialogs.firstOrNull{it.id==data["source_id"]}?.username ?: ""))
} else {
scope.put("chat_name", chats.firstOrNull{it.id==data["source_id"]}?.name)
}
list.add(scope)
}
}
val writer = getWriter(filename)
mustache.execute(writer, mapOf("links" to list))
writer.close()
}
private fun getWriter(filename: String): OutputStreamWriter {
logger.trace("Creating writer for file {}", filename.anonymize())
return OutputStreamWriter(FileOutputStream(filename), Charset.forName("UTF-8").newEncoder())
}
}

View File

@ -0,0 +1,3 @@
{{#links}}
"{{time}}","{{url}}","{{host}}","{{username}}","{{chat_name}}"
{{/links}}
Can't render this file because it has a wrong number of fields in line 2.

View File

@ -0,0 +1,3 @@
{{#messages}}
"{{time}}","{{username}}","{{chat_name}}","{{message}}"
{{/messages}}
Can't render this file because it has a wrong number of fields in line 2.