diff --git a/app/build.gradle.kts b/app/build.gradle.kts index 5ede55b..11a23ff 100644 --- a/app/build.gradle.kts +++ b/app/build.gradle.kts @@ -11,6 +11,8 @@ dependencies { implementation("app.cash.sqldelight:sqlite-driver:2.0.2") implementation("com.fasterxml.jackson.core:jackson-databind:2.18.2") implementation("com.fasterxml.jackson.module:jackson-module-kotlin:2.18.+") + implementation("com.squareup.okhttp3:okhttp:4.12.0") + implementation(libs.slf4jsimple) implementation(libs.javalin) implementation(libs.commonsText) diff --git a/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt b/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt index 9c14382..70b4dbf 100644 --- a/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt +++ b/app/src/main/kotlin/net/h34t/filemure/ContentExtractor.kt @@ -1,14 +1,46 @@ package net.h34t.filemure +import net.h34t.filemure.classification.AIClassifier import org.apache.pdfbox.Loader +import org.apache.pdfbox.rendering.PDFRenderer import org.apache.pdfbox.text.PDFTextStripper +import java.io.ByteArrayOutputStream +import javax.imageio.ImageIO -class ContentExtractor { +class ContentExtractor( + val aiClassifier: AIClassifier +) { + + fun extractImage(contentType: String, imageBytes: ByteArray): String { + val text = aiClassifier.classifyImage( + prompt = "Analyze the given image and concise summary of its contents, including the type of document depicted (invoice, certificate, doctor's note, ...).", + contentType = contentType, + data = imageBytes, + ) + + println("AI image classification:\n\"\"\"\n$text\n\"\"\"") + + return text + } fun extractPdf(pdfBytes: ByteArray): String { val doc = Loader.loadPDF(pdfBytes) + val text = PDFTextStripper().getText(doc) - return PDFTextStripper().getText(doc) + return if (text.isNotBlank()) { + text + } else { + val renderer = PDFRenderer(doc) + val bi = renderer.renderImage(0) + val baos = ByteArrayOutputStream() + + ImageIO.write(bi, "JPG", baos) + + // for debugging + // ImageIO.write(bi, "JPG", File.createTempFile("pdfimage-", ".jpg", File("."))) + + extractImage("image/jpeg", baos.toByteArray()) + } } fun extractPlain(bytes: ByteArray): String { diff --git a/app/src/main/kotlin/net/h34t/filemure/FilemureApp.kt b/app/src/main/kotlin/net/h34t/filemure/FilemureApp.kt index 8709755..0a787d0 100644 --- a/app/src/main/kotlin/net/h34t/filemure/FilemureApp.kt +++ b/app/src/main/kotlin/net/h34t/filemure/FilemureApp.kt @@ -2,12 +2,15 @@ package net.h34t.filemure import io.javalin.Javalin import io.javalin.http.UnauthorizedResponse +import net.h34t.filemure.classification.AIClassifier import net.h34t.filemure.controller.* import net.h34t.filemure.repository.SqliteRepository import net.h34t.filemure.tpl.Frame import net.h34t.filemure.tpl.Unauthorized -class FilemureApp(repository: SqliteRepository) { +class FilemureApp( + repository: SqliteRepository, + contentExtractor: ContentExtractor) { private val modifiers = TemplateModifiers() @@ -15,7 +18,7 @@ class FilemureApp(repository: SqliteRepository) { private val overviewController = OverviewController(modifiers, repository) private val limboController = LimboController(modifiers, repository) - private val uploadController = UploadController(modifiers, repository) + private val uploadController = UploadController(modifiers, repository, contentExtractor) private val documentController = DocumentController(modifiers, repository) private val searchController = SearchController(modifiers, repository) diff --git a/app/src/main/kotlin/net/h34t/filemure/Server.kt b/app/src/main/kotlin/net/h34t/filemure/Server.kt index a9c19f2..289bf3c 100644 --- a/app/src/main/kotlin/net/h34t/filemure/Server.kt +++ b/app/src/main/kotlin/net/h34t/filemure/Server.kt @@ -2,7 +2,9 @@ package net.h34t.app.net.h34t.filemure import io.javalin.Javalin import io.javalin.http.staticfiles.Location +import net.h34t.filemure.ContentExtractor import net.h34t.filemure.FilemureApp +import net.h34t.filemure.classification.AIClassifier import net.h34t.filemure.repository.SqliteRepository import org.eclipse.jetty.http.HttpCookie import org.eclipse.jetty.server.session.DefaultSessionCache @@ -21,7 +23,13 @@ fun main() { val sessionExpiry = System.getenv("session_expiry_sec")?.toInt() ?: (Duration.ofMinutes(30).toSeconds().toInt()) - val app = FilemureApp(SqliteRepository("jdbc:sqlite:$db")) + val aiClassifier = AIClassifier( + key = System.getenv("ai_key") + ) + + val contentExtractor = ContentExtractor(aiClassifier) + + val app = FilemureApp(SqliteRepository("jdbc:sqlite:$db"), contentExtractor) Javalin .create { config -> diff --git a/app/src/main/kotlin/net/h34t/filemure/Types.kt b/app/src/main/kotlin/net/h34t/filemure/Types.kt index 4ba61d2..5082963 100644 --- a/app/src/main/kotlin/net/h34t/filemure/Types.kt +++ b/app/src/main/kotlin/net/h34t/filemure/Types.kt @@ -50,7 +50,7 @@ value class Tag private constructor(val value: String) { } fun parse(text: String?) = - text?.split(splitter)?.map { of(it) } ?: emptyList() + text?.trim()?.split(splitter)?.map { of(it) } ?: emptyList() } } diff --git a/app/src/main/kotlin/net/h34t/filemure/classification/AIClassifier.kt b/app/src/main/kotlin/net/h34t/filemure/classification/AIClassifier.kt new file mode 100644 index 0000000..1f60fca --- /dev/null +++ b/app/src/main/kotlin/net/h34t/filemure/classification/AIClassifier.kt @@ -0,0 +1,171 @@ +package net.h34t.filemure.classification + +import com.fasterxml.jackson.annotation.JsonProperty +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.kotlin.readValue +import okhttp3.MediaType.Companion.toMediaType +import okhttp3.OkHttpClient +import okhttp3.Request +import okhttp3.RequestBody +import okhttp3.RequestBody.Companion.toRequestBody +import java.util.* + +/** + * examples for models: + * Vision: "meta-llama/llama-3.2-11b-vision-instruct" + * Text: "meta-llama/llama-3.2-3b-instruct" + */ +class AIClassifier( + val key: String, + val visionModel: String = "meta-llama/llama-3.2-90b-vision-instruct", + val textModel: String = "meta-llama/llama-3.3-70b-instruct", + val baseUrl: String = "https://openrouter.ai/api/v1/chat/completions" +) { + + data class OpenrouterRequest( + val model: String, + val messages: List + ) + + interface ContentType + + data class ReqMessage( + val role: String, + val content: List + ) + + data class TextContent( + val type: String = "text", + val text: String + ) : ContentType + + data class ImageContent( + val type: String = "image_url", + val image_url: ImageUrl + ) : ContentType + + data class ImageUrl( + val url: String + ) + + data class OpenrouterResponse( + @JsonProperty("id") + val id: String, + @JsonProperty("provider") + val provider: String, + @JsonProperty("model") + val model: String, + @JsonProperty("object") + val contentObject: String, + @JsonProperty("created") + val created: Long, + @JsonProperty("choices") + val choices: List, + @JsonProperty("system_fingerprint") + val systemFingerprint: String?, + @JsonProperty("usage") + val usage: Usage + ) + + data class Choice( + @JsonProperty("logprobs") + val logprobs: Unit?, + @JsonProperty("finish_reason") + val finishReason: String?, + @JsonProperty("native_finish_reason") + val nativeFinishReason: String?, + @JsonProperty("index") + val index: Int, + @JsonProperty("message") + val message: ResMessage + ) + + data class ResMessage( + @JsonProperty("role") + val role: String, + @JsonProperty("content") + val content: String, + @JsonProperty("refusal") + val refusal: String? + ) + + data class Usage( + @JsonProperty("prompt_tokens") + val prompt_tokens: Int, + @JsonProperty("completion_tokens") + val completion_tokens: Int, + @JsonProperty("total_tokens") + val total_tokens: Int + ) + + private val encoder = Base64.getEncoder() + + fun queryText( + query: String, + model: String = textModel, + ): String { + return query( + model = model, + content = listOf(TextContent(text = query)) + ) + } + + private fun query(content: List, model: String): String { + val orRequest = OpenrouterRequest( + model = model, + messages = listOf( + ReqMessage( + role = "user", + content = content + ) + ) + ) + + val mapper = ObjectMapper() + val bodyContent = mapper.writeValueAsString(orRequest) + + val client = OkHttpClient() + + val json = "application/json".toMediaType() + + val body: RequestBody = bodyContent.toRequestBody(json) + val req = Request.Builder() + .url(baseUrl) + .header("Authorization", "Bearer $key") + .header("Content-Type", "application/json") + .post(body) + .build() + + client.newCall(req).execute().use { res -> + val bodyString = res.body?.string()?.trim() + val responseBody = bodyString?.let { mapper.readValue(it) } + ?: throw Exception("body is null") + + return responseBody.choices.joinToString("\n") { it.message.content }.trim() + } + } + + fun classifyImage( + prompt: String, + contentType: String, + data: ByteArray, + model: String = visionModel + ): String { + val dataUrl = "data:$contentType;base64," + encoder.encodeToString(data) + + return query( + model = model, + content = listOf( + TextContent( + type = "text", + // text = "Extract the message text only in this file and repeat it word-for-word." + text = prompt + ), + ImageContent( + type = "image_url", + image_url = ImageUrl(url = dataUrl) + ) + ) + ) + } +} \ No newline at end of file diff --git a/app/src/main/kotlin/net/h34t/filemure/controller/DocumentController.kt b/app/src/main/kotlin/net/h34t/filemure/controller/DocumentController.kt index 0c21056..c4ba0f2 100644 --- a/app/src/main/kotlin/net/h34t/filemure/controller/DocumentController.kt +++ b/app/src/main/kotlin/net/h34t/filemure/controller/DocumentController.kt @@ -72,7 +72,8 @@ class DocumentController(val modifiers: TemplateModifiers, val repository: Sqlit val referenceDate = referenceDates.firstOrNull() ?: LocalDateTime.now() val tags = selectedFiles.map { File(it.filename).extension }.distinct().asSequence() - val description = "" + + val description = selectedFiles.mapNotNull { it.contentExtracted }.joinToString("\n\n") ctx.tempolin( Frame( diff --git a/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt b/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt index 27f5ada..4f570ff 100644 --- a/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt +++ b/app/src/main/kotlin/net/h34t/filemure/controller/UploadController.kt @@ -6,9 +6,11 @@ import net.h34t.filemure.TemplateModifiers import net.h34t.filemure.repository.SqliteRepository import net.h34t.filemure.requireSession -class UploadController(val modifiers: TemplateModifiers, val repository: SqliteRepository) { - - private val pdfContentExtractor = ContentExtractor() +class UploadController( + private val modifiers: TemplateModifiers, + private val repository: SqliteRepository, + private val contentExtractor: ContentExtractor +) { fun upload(ctx: Context) { val session = ctx.requireSession() @@ -25,8 +27,8 @@ class UploadController(val modifiers: TemplateModifiers, val repository: SqliteR val contentType = it.contentType() val contentExtracted = when (contentType) { - "application/pdf" -> pdfContentExtractor.extractPdf(content) - "text/plain" -> pdfContentExtractor.extractPlain(content) + "application/pdf" -> contentExtractor.extractPdf(content) + "text/plain" -> contentExtractor.extractPlain(content) else -> "" }