diff --git a/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt b/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt new file mode 100644 index 0000000..9c14382 --- /dev/null +++ b/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt @@ -0,0 +1,17 @@ +package net.h34t.filemure + +import org.apache.pdfbox.Loader +import org.apache.pdfbox.text.PDFTextStripper + +class ContentExtractor { + + fun extractPdf(pdfBytes: ByteArray): String { + val doc = Loader.loadPDF(pdfBytes) + + return PDFTextStripper().getText(doc) + } + + fun extractPlain(bytes: ByteArray): String { + return bytes.toString(Charsets.UTF_8) + } +} \ No newline at end of file diff --git a/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt b/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt index 0e59e47..27f5ada 100644 --- a/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt +++ b/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt @@ -1,12 +1,15 @@ package net.h34t.filemure.controller import io.javalin.http.Context +import net.h34t.filemure.ContentExtractor import net.h34t.filemure.TemplateModifiers import net.h34t.filemure.repository.SqliteRepository import net.h34t.filemure.requireSession class UploadController(val modifiers: TemplateModifiers, val repository: SqliteRepository) { + private val pdfContentExtractor = ContentExtractor() + fun upload(ctx: Context) { val session = ctx.requireSession() @@ -16,13 +19,30 @@ class UploadController(val modifiers: TemplateModifiers, val repository: SqliteR val files = ctx.uploadedFiles() + val extIds = files.map { + it.contentAndClose { contentStream -> + val content = contentStream.readAllBytes() + val contentType = it.contentType() + + val contentExtracted = when (contentType) { + "application/pdf" -> pdfContentExtractor.extractPdf(content) + "text/plain" -> pdfContentExtractor.extractPlain(content) + else -> "" + } + + repository.addFileToLimbo( + accountid, + it.filename(), + contentType, + it.size(), + contentExtracted, + content + ).extId + } + } + when (target) { "document" -> { - val extIds = files.map { - it.contentAndClose { content -> - repository.addFileToLimbo(accountid, it.filename(), it.contentType(), it.size(), content).extId - } - } ctx.status(200) ctx.json( Result( @@ -33,11 +53,6 @@ class UploadController(val modifiers: TemplateModifiers, val repository: SqliteR } "limbo" -> { - files.forEach { - it.contentAndClose { content -> - repository.addFileToLimbo(accountid, it.filename(), it.contentType(), it.size(), content) - } - } ctx.status(200) ctx.json( Result( diff --git a/app/src/main/kotlin/net/h34t/filemure/repository/SqliteRepository.kt b/app/src/main/kotlin/net/h34t/filemure/repository/SqliteRepository.kt index 9ac75ae..1c12a8c 100644 --- a/app/src/main/kotlin/net/h34t/filemure/repository/SqliteRepository.kt +++ b/app/src/main/kotlin/net/h34t/filemure/repository/SqliteRepository.kt @@ -122,7 +122,8 @@ class SqliteRepository(url: String) { filename: String, contentType: String?, size: Long, - content: InputStream + contentExtracted: String, + content: ByteArray ): IdPair = database.databaseQueries.transactionWithResult { val extId = generateExtId() @@ -132,7 +133,8 @@ class SqliteRepository(url: String) { filename = filename, content_type = contentType, file_size = size, - content = content.readAllBytes() + content_extracted = contentExtracted, + content = content ) IdPair( id = lastInsertedId(), @@ -140,7 +142,6 @@ class SqliteRepository(url: String) { ) } - fun addNewFileToDocument( accountId: Long, documentId: Long, diff --git a/app/src/main/sqldelight/net/h34t/filemure/db/Database.sq b/app/src/main/sqldelight/net/h34t/filemure/db/Database.sq index 075e31d..4db4ef7 100644 --- a/app/src/main/sqldelight/net/h34t/filemure/db/Database.sq +++ b/app/src/main/sqldelight/net/h34t/filemure/db/Database.sq @@ -70,7 +70,7 @@ insertFileForDocument: INSERT INTO file (account_id, document_id, ext_id, filename, content_type, file_size, content) VALUES (?, ?,?,?,?,?,?); insertFileIntoLimbo: -INSERT INTO file (account_id, ext_id, filename, content_type, file_size, content) VALUES (?,?,?,?,?,?); +INSERT INTO file (account_id, ext_id, filename, content_type, file_size, content_extracted, content) VALUES (?,?,?,?,?,?, ?); getLimboFileCount: SELECT count(*) AS count FROM file WHERE account_id=? AND document_id IS NULL AND state=?; @@ -221,21 +221,23 @@ UPDATE file SET state=? WHERE account_id=? AND ext_id IN ?; searchDocument: SELECT - id, - account_id, - ext_id, - title, - description, - tags, - created, - reference_date, - state + d.id, + d.account_id, + d.ext_id, + d.title, + d.description, + d.tags, + d.created, + d.reference_date, + d.state FROM - document + document d LEFT OUTER JOIN file f ON (f.document_id = d.id) WHERE - account_id = :account_id AND - state = :state AND - (title LIKE :query OR - description LIKE :query AND - tags LIKE :query); + d.account_id = :account_id AND + d.state = :state AND + (d.title LIKE :query OR + d.description LIKE :query OR + d.tags LIKE :query OR + f.filename LIKE :query OR + f.content_extracted LIKE :query);