diff --git a/.env b/.env new file mode 100644 index 0000000..816d5e5 --- /dev/null +++ b/.env @@ -0,0 +1,8 @@ +PINRY_TOKEN=XXXXXXXXXXXXX +CAMERA_INDEX=0 +UPLOAD=True +OCR_PROCESSOR=pyobjc +USE_OLLAMA_CAPTION=False +USE_OLLAMA_OCR=False +CHECK_MARKERS=True +UPLOAD_MARKED_IMAGE=False \ No newline at end of file diff --git a/.venv b/.venv new file mode 100644 index 0000000..7e0e6a7 --- /dev/null +++ b/.venv @@ -0,0 +1 @@ +machine_archivist-mftt diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..53420f1 --- /dev/null +++ b/LICENSE @@ -0,0 +1,3 @@ +Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License + +This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/ diff --git a/Mobile Scan.app/Contents/Info.plist b/Mobile Scan.app/Contents/Info.plist new file mode 100644 index 0000000..f157474 --- /dev/null +++ b/Mobile Scan.app/Contents/Info.plist @@ -0,0 +1,109 @@ + + + + + AMIsApplet + + AMStayOpen + + BuildMachineOSBuild + 22A380021 + CFBundleAllowMixedLocalizations + + CFBundleDevelopmentRegion + English + CFBundleDocumentTypes + + + CFBundleTypeExtensions + + * + + CFBundleTypeName + Automator workflow file + CFBundleTypeOSTypes + + **** + + CFBundleTypeRole + Viewer + + + CFBundleExecutable + Automator Application Stub + CFBundleIconFile + ApplicationStub + CFBundleIdentifier + com.apple.automator.Mobile-Scan + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + Mobile Scan + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.3 + CFBundleSignature + ???? + CFBundleSupportedPlatforms + + MacOSX + + CFBundleURLTypes + + CFBundleVersion + 527 + DTCompiler + com.apple.compilers.llvm.clang.1_0 + DTPlatformBuild + + DTPlatformName + macosx + DTPlatformVersion + 15.3 + DTSDKBuild + 24D39 + DTSDKName + macosx15.3.internal + DTXcode + 1600 + DTXcodeBuild + 16A6170g + LSMinimumSystemVersion + 10.9 + LSUIElement + + NSAppleEventsUsageDescription + This workflow needs to control other applications to run. + NSAppleMusicUsageDescription + This workflow needs access to your music to run. + NSAppleScriptEnabled + YES + NSCalendarsUsageDescription + This workflow needs access to your calendars to run. + NSCameraUsageDescription + This workflow needs access to your camera to run. + NSContactsUsageDescription + This workflow needs access to your contacts to run. + NSHomeKitUsageDescription + This workflow needs access to your HomeKit Home to run. + NSMicrophoneUsageDescription + This workflow needs access to your microphone to run. + NSPhotoLibraryUsageDescription + This workflow needs access to your photos to run. + NSPrincipalClass + NSApplication + NSRemindersUsageDescription + This workflow needs access to your reminders to run. + NSServices + + NSSiriUsageDescription + This workflow needs access to Siri to run. + NSSystemAdministrationUsageDescription + This workflow needs access to administer this system in order to run. + UTExportedTypeDeclarations + + UTImportedTypeDeclarations + + + diff --git a/Mobile Scan.app/Contents/MacOS/Automator Application Stub b/Mobile Scan.app/Contents/MacOS/Automator Application Stub new file mode 100755 index 0000000..e7fbc9a Binary files /dev/null and b/Mobile Scan.app/Contents/MacOS/Automator Application Stub differ diff --git a/Mobile Scan.app/Contents/Resources/ApplicationStub.icns b/Mobile Scan.app/Contents/Resources/ApplicationStub.icns new file mode 100644 index 0000000..5c5f989 Binary files /dev/null and b/Mobile Scan.app/Contents/Resources/ApplicationStub.icns differ diff --git a/Mobile Scan.app/Contents/Resources/Assets.car b/Mobile Scan.app/Contents/Resources/Assets.car new file mode 100644 index 0000000..c2e5fa9 Binary files /dev/null and b/Mobile Scan.app/Contents/Resources/Assets.car differ diff --git a/Mobile Scan.app/Contents/Resources/InfoPlist.loctable b/Mobile Scan.app/Contents/Resources/InfoPlist.loctable new file mode 100644 index 0000000..b763cd0 Binary files /dev/null and b/Mobile Scan.app/Contents/Resources/InfoPlist.loctable differ diff --git a/Mobile Scan.app/Contents/_CodeSignature/CodeResources b/Mobile Scan.app/Contents/_CodeSignature/CodeResources new file mode 100644 index 0000000..056b575 --- /dev/null +++ b/Mobile Scan.app/Contents/_CodeSignature/CodeResources @@ -0,0 +1,171 @@ + + + + + files + + Resources/ApplicationStub.icns + + RYTqh+7iocnEIV8iTs9EgJjEkO4= + + Resources/Assets.car + + SJkzJQ0zpEu+KXeAJk45wvdOq4Q= + + Resources/InfoPlist.loctable + + KEQC0DFC9lrETWe0E5eVGGsPylc= + + + files2 + + Resources/ApplicationStub.icns + + hash + + RYTqh+7iocnEIV8iTs9EgJjEkO4= + + hash2 + + odOqeBevxysHIbR5V5qgZz11qTuV9cL5jKaIcUw3R0I= + + + Resources/Assets.car + + hash + + SJkzJQ0zpEu+KXeAJk45wvdOq4Q= + + hash2 + + HfxV2L1WKqSFn2ShFdQSMo39Xh4FPrSyJzz2hW4d+IQ= + + + Resources/InfoPlist.loctable + + hash + + KEQC0DFC9lrETWe0E5eVGGsPylc= + + hash2 + + 3cSIcj64rHY2k+pLrnrgd1Li6hmbquwgX94QcefajJ8= + + + document.wflow + + cdhash + + icthnS5QkIcGta2GiU+/saZ3dpk= + + requirement + cdhash H"53ddb51651bfcda8e1da3b39bd627aae0993b3cc" or cdhash H"89cb619d2e50908706b5ad86894fbfb1a6777699" + + + rules + + ^Resources/ + + ^Resources/.*\.lproj/ + + optional + + weight + 1000 + + ^Resources/.*\.lproj/locversion.plist$ + + omit + + weight + 1100 + + ^Resources/Base\.lproj/ + + weight + 1010 + + ^version.plist$ + + + rules2 + + .*\.dSYM($|/) + + weight + 11 + + ^(.*/)?\.DS_Store$ + + omit + + weight + 2000 + + ^(Frameworks|SharedFrameworks|PlugIns|Plug-ins|XPCServices|Helpers|MacOS|Library/(Automator|Spotlight|LoginItems))/ + + nested + + weight + 10 + + ^.* + + ^Info\.plist$ + + omit + + weight + 20 + + ^PkgInfo$ + + omit + + weight + 20 + + ^Resources/ + + weight + 20 + + ^Resources/.*\.lproj/ + + optional + + weight + 1000 + + ^Resources/.*\.lproj/locversion.plist$ + + omit + + weight + 1100 + + ^Resources/Base\.lproj/ + + weight + 1010 + + ^[^/]+$ + + nested + + weight + 10 + + ^embedded\.provisionprofile$ + + weight + 20 + + ^version\.plist$ + + weight + 20 + + + + diff --git a/Mobile Scan.app/Contents/document.wflow b/Mobile Scan.app/Contents/document.wflow new file mode 100644 index 0000000..096a60a --- /dev/null +++ b/Mobile Scan.app/Contents/document.wflow @@ -0,0 +1,124 @@ + + + + + AMApplicationBuild + 527 + AMApplicationVersion + 2.10 + AMDocumentVersion + 2 + actions + + + action + + AMAccepts + + Container + List + Optional + + Types + + com.apple.applescript.object + + + AMActionVersion + 1.0.2 + AMApplication + + Automator + + AMParameterProperties + + source + + + AMProvides + + Container + List + Types + + com.apple.applescript.object + + + ActionBundlePath + /System/Library/Automator/Run AppleScript.action + ActionName + Run AppleScript + ActionParameters + + source + tell application "Terminal" -- Check if there are any open windows if (count of windows) is greater than 0 then -- Get the position of the first window set windowPos to position of window 1 -- Use the first window if it's open set existingWindow to window 1 do script "~/thesis/code/scan.sh 2 ; sleep 2; ~/thesis/code/publish.sh; exit" in existingWindow else -- If no windows are open, create a new window and set its position to the last window's position set windowPos to {200, 200} -- Default position if no window was open previously do script "~/thesis/code/scan.sh 2; sleep 2; ~/thesis/code/publish.sh; exit" set position of the front window to windowPos end if activate end tell delay 3 -- Wait for the scripts to finish executing tell application "Terminal" -- Close all Terminal windows set terminalWindows to every window repeat with aWindow in terminalWindows if busy of aWindow is false then close aWindow end if end repeat end tell + + BundleIdentifier + com.apple.Automator.RunScript + CFBundleVersion + 1.0.2 + CanShowSelectedItemsWhenRun + + CanShowWhenRun + + Category + + AMCategoryUtilities + + Class Name + RunScriptAction + InputUUID + 8C226B47-458F-4409-BF3A-8ABE247C1FF2 + Keywords + + Run + + OutputUUID + F5590A01-ECCB-4790-974A-DA4BC1310717 + UUID + 45370EDA-E9D5-465E-B975-C2D83FE6A8D4 + UnlocalizedApplications + + Automator + + arguments + + 0 + + default value + on run {input, parameters} + + (* Your script goes here *) + + return input +end run + name + source + required + 0 + type + 0 + uuid + 0 + + + isViewVisible + 1 + location + 631.500000:315.000000 + nibPath + /System/Library/Automator/Run AppleScript.action/Contents/Resources/Base.lproj/main.nib + + isViewVisible + 1 + + + connectors + + workflowMetaData + + workflowTypeIdentifier + com.apple.Automator.application + + + diff --git a/test "b/Mobile Scan.app/Icon\r" similarity index 100% rename from test rename to "Mobile Scan.app/Icon\r" diff --git a/README.md b/README.md new file mode 100644 index 0000000..bdd9f90 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +# Machine Archivist + +This repository holds all python and supplementary files to set up the Machine Archivist, an artistic documentation tool. +For detailed setup use, read RunInstructions.md + +The code was created in the context of a Master Thesis titled Human : Waste by Aron Petau. + + +© 2025 **Aron Petau**. All rights reserved. + +![License](https://img.shields.io/badge/License-CC_BY--NC--SA_4.0-lightgrey) + +This project is licensed under the **Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License**. See the [LICENSE](./LICENSE) file for more details. diff --git a/RunInstructions.md b/RunInstructions.md new file mode 100644 index 0000000..bfaf19c --- /dev/null +++ b/RunInstructions.md @@ -0,0 +1,92 @@ +To reproduce the archive, the following dependencies should be met: + +*this tutorial is for macOS only,* +but you should be able to get it running anywhere that can run ollama + +**Currently, all paths are hard-coded, so the scripts will only work if the Repository is cloned to the home directory** +Plus, you have to adjust either your username or the entire path in the scripts + +## Brew + +```bash +/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)" +``` + +## git + +```bash +brew install git +``` + +## Node.js and npm, as well as yarn + +```bash +brew install node +npm install --global yarn +``` + +## Babashka and Clojure + +```bash +brew install borkdude/brew/babashka +brew install clojure/tools/clojure +``` + +both prerequisites for publish-spa + +## Publish-spa + +```bash +git clone https://github.com/logseq/publish-spa +cd publish-spa && yarn install +yarn global add $PWD +``` + +this is a tool used to automatically publish the logseq archive +everything works without, but you will have to manually export the archive + +## The Logseq desktop app (optional) + +```bash +brew install --cask logseq +``` + +only if you want a convenient way to modify the archive + +## Python + +```bash +brew install python@3.13 +``` + +## Ollama + +```bash +brew install --cask ollama +``` + +I am using the cask here because it autostarts the server, but you can also use the CLI version if you prefer. +ollama handles all the LLM needs. if that is not needed, you can skip this step + +# The Archive Repository + +```bash +git clone arontaupe/thesis +``` + +# Make the script executable + +```bash +cd thesis/code +chmod +x publish.sh +chmod +x scan.sh +``` + +then, either run the scripts individually or use the provided app that should now work + +```bash +./scan.sh +./publish.sh +``` + +the app is called `Scan to Archive` and should be in the thesis folder diff --git a/Scan to Archive.app/Contents/Info.plist b/Scan to Archive.app/Contents/Info.plist new file mode 100644 index 0000000..359ccb1 --- /dev/null +++ b/Scan to Archive.app/Contents/Info.plist @@ -0,0 +1,109 @@ + + + + + AMIsApplet + + AMStayOpen + + BuildMachineOSBuild + 22A380021 + CFBundleAllowMixedLocalizations + + CFBundleDevelopmentRegion + English + CFBundleDocumentTypes + + + CFBundleTypeExtensions + + * + + CFBundleTypeName + Automator workflow file + CFBundleTypeOSTypes + + **** + + CFBundleTypeRole + Viewer + + + CFBundleExecutable + Automator Application Stub + CFBundleIconFile + ApplicationStub + CFBundleIdentifier + com.apple.automator.Scan-to-Archive + CFBundleInfoDictionaryVersion + 6.0 + CFBundleName + Scan to Archive + CFBundlePackageType + APPL + CFBundleShortVersionString + 1.3 + CFBundleSignature + ???? + CFBundleSupportedPlatforms + + MacOSX + + CFBundleURLTypes + + CFBundleVersion + 527 + DTCompiler + com.apple.compilers.llvm.clang.1_0 + DTPlatformBuild + + DTPlatformName + macosx + DTPlatformVersion + 15.3 + DTSDKBuild + 24D39 + DTSDKName + macosx15.3.internal + DTXcode + 1600 + DTXcodeBuild + 16A6170g + LSMinimumSystemVersion + 10.9 + LSUIElement + + NSAppleEventsUsageDescription + This workflow needs to control other applications to run. + NSAppleMusicUsageDescription + This workflow needs access to your music to run. + NSAppleScriptEnabled + YES + NSCalendarsUsageDescription + This workflow needs access to your calendars to run. + NSCameraUsageDescription + This workflow needs access to your camera to run. + NSContactsUsageDescription + This workflow needs access to your contacts to run. + NSHomeKitUsageDescription + This workflow needs access to your HomeKit Home to run. + NSMicrophoneUsageDescription + This workflow needs access to your microphone to run. + NSPhotoLibraryUsageDescription + This workflow needs access to your photos to run. + NSPrincipalClass + NSApplication + NSRemindersUsageDescription + This workflow needs access to your reminders to run. + NSServices + + NSSiriUsageDescription + This workflow needs access to Siri to run. + NSSystemAdministrationUsageDescription + This workflow needs access to administer this system in order to run. + UTExportedTypeDeclarations + + UTImportedTypeDeclarations + + + diff --git a/Scan to Archive.app/Contents/MacOS/Automator Application Stub b/Scan to Archive.app/Contents/MacOS/Automator Application Stub new file mode 100755 index 0000000..1be55ec Binary files /dev/null and b/Scan to Archive.app/Contents/MacOS/Automator Application Stub differ diff --git a/Scan to Archive.app/Contents/Resources/ApplicationStub.icns b/Scan to Archive.app/Contents/Resources/ApplicationStub.icns new file mode 100644 index 0000000..5c5f989 Binary files /dev/null and b/Scan to Archive.app/Contents/Resources/ApplicationStub.icns differ diff --git a/Scan to Archive.app/Contents/Resources/Assets.car b/Scan to Archive.app/Contents/Resources/Assets.car new file mode 100644 index 0000000..c2e5fa9 Binary files /dev/null and b/Scan to Archive.app/Contents/Resources/Assets.car differ diff --git a/Scan to Archive.app/Contents/Resources/InfoPlist.loctable b/Scan to Archive.app/Contents/Resources/InfoPlist.loctable new file mode 100644 index 0000000..b763cd0 Binary files /dev/null and b/Scan to Archive.app/Contents/Resources/InfoPlist.loctable differ diff --git a/Scan to Archive.app/Contents/_CodeSignature/CodeResources b/Scan to Archive.app/Contents/_CodeSignature/CodeResources new file mode 100644 index 0000000..a7ccddc --- /dev/null +++ b/Scan to Archive.app/Contents/_CodeSignature/CodeResources @@ -0,0 +1,171 @@ + + + + + files + + Resources/ApplicationStub.icns + + RYTqh+7iocnEIV8iTs9EgJjEkO4= + + Resources/Assets.car + + SJkzJQ0zpEu+KXeAJk45wvdOq4Q= + + Resources/InfoPlist.loctable + + KEQC0DFC9lrETWe0E5eVGGsPylc= + + + files2 + + Resources/ApplicationStub.icns + + hash + + RYTqh+7iocnEIV8iTs9EgJjEkO4= + + hash2 + + odOqeBevxysHIbR5V5qgZz11qTuV9cL5jKaIcUw3R0I= + + + Resources/Assets.car + + hash + + SJkzJQ0zpEu+KXeAJk45wvdOq4Q= + + hash2 + + HfxV2L1WKqSFn2ShFdQSMo39Xh4FPrSyJzz2hW4d+IQ= + + + Resources/InfoPlist.loctable + + hash + + KEQC0DFC9lrETWe0E5eVGGsPylc= + + hash2 + + 3cSIcj64rHY2k+pLrnrgd1Li6hmbquwgX94QcefajJ8= + + + document.wflow + + cdhash + + ob3mamb5318KsN1K1DZXjWgrE04= + + requirement + cdhash H"65f45121e9098a26ec1a4451a04f2c5c571c9ef7" or cdhash H"a1bde66a66f9df5f0ab0dd4ad436578d682b134e" + + + rules + + ^Resources/ + + ^Resources/.*\.lproj/ + + optional + + weight + 1000 + + ^Resources/.*\.lproj/locversion.plist$ + + omit + + weight + 1100 + + ^Resources/Base\.lproj/ + + weight + 1010 + + ^version.plist$ + + + rules2 + + .*\.dSYM($|/) + + weight + 11 + + ^(.*/)?\.DS_Store$ + + omit + + weight + 2000 + + ^(Frameworks|SharedFrameworks|PlugIns|Plug-ins|XPCServices|Helpers|MacOS|Library/(Automator|Spotlight|LoginItems))/ + + nested + + weight + 10 + + ^.* + + ^Info\.plist$ + + omit + + weight + 20 + + ^PkgInfo$ + + omit + + weight + 20 + + ^Resources/ + + weight + 20 + + ^Resources/.*\.lproj/ + + optional + + weight + 1000 + + ^Resources/.*\.lproj/locversion.plist$ + + omit + + weight + 1100 + + ^Resources/Base\.lproj/ + + weight + 1010 + + ^[^/]+$ + + nested + + weight + 10 + + ^embedded\.provisionprofile$ + + weight + 20 + + ^version\.plist$ + + weight + 20 + + + + diff --git a/Scan to Archive.app/Contents/document.wflow b/Scan to Archive.app/Contents/document.wflow new file mode 100644 index 0000000..899b88e --- /dev/null +++ b/Scan to Archive.app/Contents/document.wflow @@ -0,0 +1,124 @@ + + + + + AMApplicationBuild + 527 + AMApplicationVersion + 2.10 + AMDocumentVersion + 2 + actions + + + action + + AMAccepts + + Container + List + Optional + + Types + + com.apple.applescript.object + + + AMActionVersion + 1.0.2 + AMApplication + + Automator + + AMParameterProperties + + source + + + AMProvides + + Container + List + Types + + com.apple.applescript.object + + + ActionBundlePath + /System/Library/Automator/Run AppleScript.action + ActionName + Run AppleScript + ActionParameters + + source + tell application "Terminal" -- Check if there are any open windows if (count of windows) is greater than 0 then -- Get the position of the first window set windowPos to position of window 1 -- Use the first window if it's open set existingWindow to window 1 do script "~/thesis/code/scan.sh; sleep 2; ~/thesis/code/publish.sh; exit" in existingWindow else -- If no windows are open, create a new window and set its position to the last window's position set windowPos to {200, 200} -- Default position if no window was open previously do script "~/thesis/code/scan.sh; sleep 2; ~/thesis/code/publish.sh; exit" set position of the front window to windowPos end if activate end tell delay 3 -- Wait for the scripts to finish executing tell application "Terminal" -- Close all Terminal windows set terminalWindows to every window repeat with aWindow in terminalWindows if busy of aWindow is false then close aWindow end if end repeat end tell + + BundleIdentifier + com.apple.Automator.RunScript + CFBundleVersion + 1.0.2 + CanShowSelectedItemsWhenRun + + CanShowWhenRun + + Category + + AMCategoryUtilities + + Class Name + RunScriptAction + InputUUID + 8C226B47-458F-4409-BF3A-8ABE247C1FF2 + Keywords + + Run + + OutputUUID + F5590A01-ECCB-4790-974A-DA4BC1310717 + UUID + 45370EDA-E9D5-465E-B975-C2D83FE6A8D4 + UnlocalizedApplications + + Automator + + arguments + + 0 + + default value + on run {input, parameters} + + (* Your script goes here *) + + return input +end run + name + source + required + 0 + type + 0 + uuid + 0 + + + isViewVisible + 1 + location + 631.500000:680.000000 + nibPath + /System/Library/Automator/Run AppleScript.action/Contents/Resources/Base.lproj/main.nib + + isViewVisible + 1 + + + connectors + + workflowMetaData + + workflowTypeIdentifier + com.apple.Automator.application + + + diff --git "a/Scan to Archive.app/Icon\r" "b/Scan to Archive.app/Icon\r" new file mode 100644 index 0000000..e69de29 diff --git a/archive_pipeline.py b/archive_pipeline.py new file mode 100644 index 0000000..d463ce3 --- /dev/null +++ b/archive_pipeline.py @@ -0,0 +1,432 @@ +# This script captures an image from the webcam, extracts' text using Tesseract or EasyOCR, +# generates a description using Ollama Vision, and uploads the text and image to Logseq. +# It also detects markers in the image using SIFT feature matching. +# The extracted text is checked for readability using textstat, and only a meaningful text is uploaded to Logseq. +# The original image is uploaded to Logseq as an asset. +# The annotated image is saved in the assets folder of the Logseq directory. + +import locale +import urllib +import pytesseract +import easyocr +import ollama +import textstat +from logseq_api import Logseq, add_to_last_block_as_child +from ollama_ocr import OCRProcessor +from marker_detect import * +from camera_handler import scan +from image_analysis_pyobjc import recognize_text_pyobjc +import argparse +from datetime import datetime +from logger_config import logger +from pinry_handler import upload_and_create_pin +import time +import sys +from dotenv import load_dotenv +from success_message import show_image_with_message + +import cv2 +from PIL import Image + +# Initialize OCR processors +MODEL_NAME = 'llama3.2-vision:11b' +ocr_processor = OCRProcessor(model_name=MODEL_NAME) + +# Set locale to German +locale.setlocale(locale.LC_TIME, 'de_DE.UTF-8') + + +def get_image_description_ollama(image_path): + """Uses an Ollama Vision model to generate a description of the image.""" + try: + res = ollama.chat( + model=MODEL_NAME, + messages=[{'role': 'user', 'content': 'Describe this image.', 'images': [image_path]}] + ) + return res['message']['content'] + except Exception as e: + logger.error(f"❌ Ollama vision failed: {e}") + return "" + + +def recognize_text_easyocr(image_path): + """Uses EasyOCR to extract text from the image.""" + try: + reader = easyocr.Reader(['en', 'de'], gpu=True) + results = reader.readtext(image_path, detail=0, paragraph=True) + return ' '.join(results) + except Exception as e: + logger.error(f"❌ EasyOCR failed: {e}") + return "" + + +def recognize_text_tesseract(image_path): + """Uses Tesseract OCR for text recognition.""" + try: + img = Image.open(image_path) + gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY) + return pytesseract.image_to_string(gray, config='--oem 1 --psm 6') + except Exception as e: + logger.error(f"❌ Tesseract OCR failed: {e}") + return "" + + +def is_meaningful_text(text): + """Checks if extracted text is meaningful using readability metrics.""" + if not text or len(text.split()) < 3: + return False + return textstat.flesch_reading_ease(text) > 40 or textstat.text_standard(text, float_output=True) > 30 + + +def transfer_img_to_logseq(frame): + """Saves an image and generates a timestamped path.""" + try: + now = datetime.now() + timestamp = now.strftime('%Y-%m-%dT%H-%M-%S-%f')[:-2] + asset_path = f'/Users/aron/thesis/logseq/assets/scans/{timestamp}.jpg' + cv2.imwrite(asset_path, frame) + return asset_path + except Exception as e: + logger.error(f"❌ Image saving failed: {e}") + return None + + +def ollama_ocr(image_path): + """Performs OCR using Ollama's Llama Vision model.""" + try: + return ocr_processor.process_image(image_path=image_path, + format_type="markdown", + preprocess=True, + ) + except Exception as e: + logger.error(f"❌ Ollama OCR failed: {e}") + return "" + + +def append_block_to_logseq_page(title, block_title, block_content, logseq): + """Appends a block to a Logseq page.""" + try: + logseq.Editor.appendBlockInPage(title, block_title) + add_to_last_block_as_child(title, block_content, logseq) + except Exception as e: + logger.error(f"❌ Logseq block append failed: {e}") + + +def upload_to_logseq(logseq, + title, + ocr_text, + caption, + image_path_list, + hashtags=[], + marker_list=None, + ollama_ocr_text=None): + """Uploads extracted text and images to Logseq.""" + try: + logseq.App.showMsg('Uploading to Logseq...') + logger.info(f" {image_path_list=}") + # insert markers as tags + marker_string = "" + + # add the elems of hashtags to the marker_string + for hash in hashtags: + # strip the initial # from the hashtag + hash = hash[1:] + marker_list.append(hash) + + if marker_list: + # append markers as tags to marker_string + marker_string = ', '.join(marker_list) + + logseq.Editor.createPage(title, {'tags': f'{marker_string}, upload', + 'public': 'true', + 'author': 'Automated Archive', + 'source': 'Camera', + 'date': f'{datetime.now().strftime("%Y-%m-%d")}', + 'timestamp': f'{datetime.now().strftime("%H:%M:%S")}', + }) + # Commented out because the markers are now tags + # logseq.Editor.appendBlockInPage(title, "## Detected Markers:") + # add_to_last_block_as_child(title, marker_string, logseq) + else: + logseq.Editor.createPage(title, {'tags': 'upload', + 'public': 'true', + 'author': 'Automated Archive', + 'source': 'Camera', + 'date': f'{datetime.now().strftime("%Y-%m-%d")}', + 'timestamp': f'{datetime.now().strftime("%H:%M:%S")}', + }) + + # logseq.Editor.appendBlockInPage(title, "## Detected Markers:") + # add_to_last_block_as_child(title, "No markers detected.", logseq) + + # add classical ocr text + if ocr_text: + logseq.Editor.appendBlockInPage(title, "## OCR Text:") + add_to_last_block_as_child(title, ocr_text, logseq) + # add ollama ocr text + if ollama_ocr_text: + logseq.Editor.appendBlockInPage(title, "## Ollama OCR Text:") + add_to_last_block_as_child(title, ollama_ocr_text, logseq) + # upload images + if image_path_list: + logseq.Editor.appendBlockInPage(title, "## Scans:") + # convert to relative paths + relative_path_list = [] + for path in image_path_list: + parts = path.split("assets/scans/", 1) # Split at "scans/" + if len(parts) > 1: + relative_path_list.append("./assets/scans/" + parts[1]) # Add "./" at the start + + for i, rel_image_path in enumerate(relative_path_list): + block_property = 'annotated:: false' if i == 0 else 'annotated:: true' + logseq.Editor.appendBlockInPage(title, f"### Scan {i + 1}\n{block_property}") + add_to_last_block_as_child(title, f"![{rel_image_path}]({rel_image_path})", logseq) + + # add extracted text from ollama + if caption: + logseq.Editor.appendBlockInPage(title, "Ollama Image Caption:") + add_to_last_block_as_child(title, caption, logseq) + + logseq.App.showMsg('Upload complete!') + except Exception as e: + logger.error(f"❌ Logseq upload failed: {e}") + + +def str_to_bool(value): + """Convert string values from .env to boolean.""" + return value.lower() in ("true", "1", "yes") + + +def parse_arguments(): + """Parse and return command-line arguments.""" + load_dotenv() # Load environment variables + + # Load environment variables with fallbacks + default_camera = int(os.getenv("CAMERA_INDEX", 0)) + default_upload = str_to_bool(os.getenv("UPLOAD", "True")) + default_ocr_processor = os.getenv("OCR_PROCESSOR", "pyobjc") + default_use_ollama_caption = str_to_bool(os.getenv("USE_OLLAMA_CAPTION", "False")) + default_use_ollama_ocr = str_to_bool(os.getenv("USE_OLLAMA_OCR", "False")) + default_check_markers = str_to_bool(os.getenv("CHECK_MARKERS", "True")) + default_upload_marked_image = str_to_bool(os.getenv("UPLOAD_MARKED_IMAGE", "False")) + + parser = argparse.ArgumentParser( + description="Scan an image using a specific camera index or provide an image file.") + + parser.add_argument("--camera", type=int, default=default_camera, + help=f"Set the camera index (integer value required) (default: {default_camera})") + + parser.add_argument("--image", type=str, help="Optional image file path to use instead of capturing a new image") + + parser.add_argument("--upload", action="store_true", default=default_upload, + help=f"Enable Logseq upload (default: {default_upload})") + + parser.add_argument("--ocr-processor", choices=["easyocr", "tesseract", "pyobjc"], + default=default_ocr_processor, + help=f"Choose OCR processor (default: '{default_ocr_processor}')") + + parser.add_argument("--use-ollama-caption", action="store_true", default=default_use_ollama_caption, + help=f"Use Ollama for image description (default: {default_use_ollama_caption})") + + parser.add_argument("--use-ollama-ocr", action="store_true", default=default_use_ollama_ocr, + help=f"Use Ollama for OCR (default: {default_use_ollama_ocr})") + + parser.add_argument("--check-markers", action="store_true", default=default_check_markers, + help=f"Check for markers in the image (default: {default_check_markers})") + + parser.add_argument("--upload-marked-image", action="store_true", default=default_upload_marked_image, + help=f"Upload marked image if markers detected (default: {default_upload_marked_image})") + + return parser.parse_args() + + +def log_configuration(args): + """Log configuration settings.""" + logger.info("🔧 Configuration:") + for key, value in vars(args).items(): + logger.info(f" {key.capitalize()}: {value}") + logger.info("------------------------------------------------------\n") + + +def capture_image(camera_index): + """Capture an image from the camera.""" + snap_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3] + image_path = f"./scans/snap_{snap_timestamp}.png" + frame, captured_image_path = scan(image_path=image_path, camera_index=camera_index, hold_preview=False, + preview_scale=0.5, timeout=30) + if frame is None or captured_image_path is None: + logger.error("❌ Image capture failed or timed out. Exiting...") + sys.exit(1) + return frame, captured_image_path + + +def extract_text(image_path, ocr_processor): + """Extract text using the selected OCR processor.""" + ocr_text = "" + hashtags = [] + if ocr_processor == "easyocr": + ocr_text = recognize_text_easyocr(image_path) + elif ocr_processor == "tesseract": + ocr_text = recognize_text_tesseract(image_path) + elif ocr_processor == "pyobjc": + ocr_text, hashtags, barcode_info = recognize_text_pyobjc(image_path) + if barcode_info: + logger.info(f"🔍 Barcode/QR Code info: {barcode_info}") + ocr_text += f"\n\nBarcode/QR Code Info: {barcode_info}" + return ocr_text, hashtags + + +def generate_image_description(image_path, use_ollama_caption, use_ollama_ocr): + """Generate image description and OCR text using Ollama if enabled.""" + caption = get_image_description_ollama(image_path) if use_ollama_caption else "" + ollama_ocr_text = ollama_ocr(image_path) if use_ollama_ocr else "" + return caption, ollama_ocr_text + + +def detect_and_upload_markers(image_path, check_markers, upload_marked_image): + """Detect markers in the image and upload marked image if enabled.""" + marker_list = [] + + if check_markers: + markers, marked_image_path, keypoints_image_path = detect_markers(image_path, templates, min_area=500) + logger.info(f" {markers=}") + if markers: + logger.info(f"🎯 Detected markers: {markers}, #{len(markers)}") + marker_list.extend(markers) + if upload_marked_image: + marked_frame = cv2.imread(marked_image_path) + marked_asset_path, _ = transfer_img_to_logseq(marked_frame) + else: + logger.info("🛑 No markers detected.") + + return marker_list, marked_image_path, keypoints_image_path + + +def upload_to_logseq_if_needed(upload, + ocr_text, + caption, + ollama_ocr_text, + frame, + marker_list, + marked_image_path, + hashtags=[]): + """Upload to Logseq if required.""" + if upload: + logger.info('🚀 Uploading to Logseq...') + logseq = Logseq(host="127.0.0.1", port=12315, token="vetinari") + + asset_path_list = [transfer_img_to_logseq(frame)] + if marker_list: + marked_frame = cv2.imread(marked_image_path) + marked_asset_path = transfer_img_to_logseq(marked_frame) + asset_path_list.append(marked_asset_path) + + pagetitle = f"Upload-{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}" + upload_to_logseq(logseq, + title=pagetitle, + ocr_text=ocr_text, + caption=caption, + image_path_list=asset_path_list, + marker_list=marker_list, + ollama_ocr_text=ollama_ocr_text, + hashtags=hashtags) + logger.info(f"✅ Upload complete.") + return pagetitle + return None + + +def main(): + """Main function to execute OCR workflow.""" + try: + args = parse_arguments() + log_configuration(args) + + start_time = time.time() + if args.image: + + image_path = args.image + logger.info(f'📂 Using provided image file.: {image_path}') + if not os.path.exists(image_path): + logger.error(f"Error: Provided image file '{image_path}' does not exist.") + return + frame = cv2.imread(image_path) + + if frame is None: + print(f"Error: Failed to load image from '{image_path}'") + return + else: + frame, image_path = capture_image(args.camera) + + logger.info('🔍 Extracting text...') + ocr_text, hashtags = extract_text(image_path, args.ocr_processor) + + # Check for meaningless text if OCR processor is easyocr or tesseract + if args.ocr_processor in ["easyocr", "tesseract"] and not is_meaningful_text(ocr_text): + logger.warning('❌ OCR output was not meaningful. Discarding...') + ocr_text = "" + + logger.info(f"✅ Text extraction complete in {time.time() - start_time:.2f} seconds.\n") + + logger.info('🖼️ Get image description...') + caption, ollama_ocr_text = generate_image_description(image_path, args.use_ollama_caption, args.use_ollama_ocr) + + logger.info(f"✅ Image description generation complete in {time.time() - start_time:.2f} seconds.\n") + + logger.info('🎯 Detecting markers...') + marker_list, marked_image_path, keypoints_image_path = detect_and_upload_markers(image_path, + args.check_markers, + args.upload_marked_image) + + pagetitle = upload_to_logseq_if_needed(args.upload, + ocr_text, + caption, + ollama_ocr_text, + frame, + marker_list, + marked_image_path=marked_image_path, + hashtags=hashtags) + logseqpageurl = None + if pagetitle: + logger.info(f"✅ Upload complete. Page title: {pagetitle}") + # get the page URL from the title + baseurl = 'https://archive.petau.net/#/page/' + # make sure the pagetitle is URL-safe, a full URL looks like this + pagetitle_url = urllib.parse.quote(pagetitle, safe="/?#[]@&=") + logseqpageurl = f"{baseurl}{pagetitle_url}" + + logger.info(f"Page URL: {logseqpageurl}") + + pinry_tags = marker_list + hashtags + img_desc = caption if caption else None + # Call the wrapper function to upload and create the pin + pin_url = upload_and_create_pin(image_path, board_id=2, tags=pinry_tags, source_url=logseqpageurl, + description=img_desc) + + if pin_url: + logger.info(f"✅ Pin URL: {pin_url}") + if pagetitle: + if args.upload: + logger.info('Adding Pin URL to Logseq...') + logseq = Logseq(host="127.0.0.1", port=12315, token="vetinari") + + append_block_to_logseq_page(pagetitle, "## Pin URL:", f'[The Pinry URL for this upload]({pin_url})', + logseq) + else: + logger.error("❌ Pin creation failed.") + + logger.info(f"🚀 Pipeline completed in {time.time() - start_time:.2f} seconds.") + logger.info("------------------------------------------------------\n") + + success_message = "Scan Completed Successfully!" + url_to_open = "https://pinry.petau.net" # URL to open after timeout + + # Show the image with the success message and open the URL after 5 seconds + show_image_with_message(keypoints_image_path, success_message, timeout=5, url=url_to_open) + + except KeyboardInterrupt: + logger.info("\nGracefully shutting down... Interrupt received (Ctrl+C).") + sys.exit(0) # Exit gracefully with status code 0 + + +if __name__ == "__main__": + main() diff --git a/camera_debug.py b/camera_debug.py new file mode 100644 index 0000000..6218126 --- /dev/null +++ b/camera_debug.py @@ -0,0 +1,23 @@ +import cv2 + +camera = cv2.VideoCapture(0) # Change index if needed + +if not camera.isOpened(): + print("❌ Could not access the webcam") +else: + # Try setting 4K resolution + camera.set(cv2.CAP_PROP_FRAME_WIDTH, 3840) + camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160) + + # Read the actual resolution + actual_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)) + actual_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + print(f"Requested: 3840x2160 → Camera Output: {actual_width}x{actual_height}") + + ret, frame = camera.read() + if ret: + print(f"Captured frame size: {frame.shape[1]}x{frame.shape[0]}") + cv2.imwrite("./debug_img.png", frame) + + camera.release() diff --git a/camera_finder.py b/camera_finder.py new file mode 100644 index 0000000..1c89225 --- /dev/null +++ b/camera_finder.py @@ -0,0 +1,57 @@ +import cv2 +import subprocess +import re +from logger_config import logger + + +def get_camera_names(): + """Gets a list of available camera names using ffmpeg (macOS only).""" + try: + result = subprocess.run( + ["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""], + stderr=subprocess.PIPE, + stdout=subprocess.PIPE, + text=True + ) + output = result.stderr # ffmpeg prints device list to stderr + + # Extract camera names from ffmpeg output + camera_lines = re.findall(r'\[AVFoundation input device @ .*?] \".*\"', output) + cameras = [re.search(r'"(.*?)"', line).group(1) for line in camera_lines] + return cameras + except Exception as e: + logger.error(f"❌ Error getting camera names: {e}") + return [] + + +def list_available_cameras(): + """Lists available camera indexes and their names, with a preview.""" + logger.info("🔍 Scanning for available cameras...") + + camera_names = get_camera_names() + found_cameras = [] + + for i in range(10): # Check the first 10 indexes + cap = cv2.VideoCapture(i, cv2.CAP_AVFOUNDATION) + if cap.isOpened(): + name = camera_names[i] if i < len(camera_names) else f"Unknown Camera {i}" + logger.info(f"✅ Camera {i}: {name}") + found_cameras.append((i, name)) + + ret, frame = cap.read() + if ret: + cv2.imshow(f"Camera {i}: {name}", frame) + cv2.waitKey(1000) # Show preview for 1 second + cv2.destroyAllWindows() + cap.release() + else: + logger.warning(f"❌ No camera found at index {i}") + + if found_cameras: + logger.info("\n🎥 Available Cameras:") + for index, name in found_cameras: + logger.info(f" {index}: {name}") + + +if __name__ == '__main__': + list_available_cameras() diff --git a/camera_handler.py b/camera_handler.py new file mode 100644 index 0000000..8f5cb64 --- /dev/null +++ b/camera_handler.py @@ -0,0 +1,116 @@ +import tkinter as tk +from PIL import Image, ImageTk, ImageEnhance +import cv2 +import time +import os + + +def scan(image_path=None, camera_index=None, hold_preview=False, preview_scale=1.0, timeout=10): + """ + Captures an image with a beautiful, high-quality Tkinter preview. + Features a centered, large countdown before capture. + """ + assert image_path is not None, "Image path not provided." + + if camera_index is None: + camera_index = 0 # Default camera index + + # Open camera + camera = cv2.VideoCapture(camera_index) + if not camera.isOpened(): + print("❌ Error: Could not access the webcam") + return None, None + + # Get highest available resolution + camera.set(cv2.CAP_PROP_FRAME_WIDTH, 9999) + camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 9999) + + max_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH)) + max_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT)) + + print(f"🔍 Using resolution: {max_width}x{max_height}") + + # Calculate preview dimensions + preview_width = int(max_width * preview_scale) + preview_height = int(max_height * preview_scale) + + # Initialize Tkinter window + root = tk.Tk() + root.title("📸 Capture Preview") + root.geometry(f"{preview_width}x{preview_height}") # Adjust to preview size + root.configure(bg="black") + root.overrideredirect(True) # Borderless fullscreen effect + + # Create canvas for overlay + canvas = tk.Canvas(root, width=preview_width, height=preview_height, highlightthickness=0) + canvas.pack() + + countdown_label = tk.Label(root, text="4", font=("Helvetica", 100, "bold"), fg="white", bg="black") + countdown_label.place(x=preview_width // 2, y=preview_height // 2, anchor="center") # Center countdown + + frame = None + countdown_start = time.time() + + def update_preview(): + """Update the camera preview & countdown.""" + nonlocal frame + + ret, frame = camera.read() + if not ret: + print("❌ Error: Failed to read from camera.") + root.destroy() + return + + # Scale down only for preview + frame_preview = cv2.resize(frame, (preview_width, preview_height)) + + frame_rgb = cv2.cvtColor(frame_preview, cv2.COLOR_BGR2RGB) + img = Image.fromarray(frame_rgb) + + # Apply slight dimming effect + enhancer = ImageEnhance.Brightness(img) + img = enhancer.enhance(0.6) + + img_tk = ImageTk.PhotoImage(img) + canvas.create_image(0, 0, anchor=tk.NW, image=img_tk) + canvas.img_tk = img_tk # Keep reference + + # Update countdown timer + elapsed = int(time.time() - countdown_start) + remaining = max(0, 4 - elapsed) + countdown_label.config(text=str(remaining - 1)) + + if remaining == 0: + capture() # Capture automatically at 0 + + root.after(100, update_preview) + + def capture(event=None): + """Capture image and close window.""" + nonlocal frame + if frame is not None: + cv2.imwrite(image_path, frame) + print(f"✅ Image saved: {image_path}") + camera.release() + root.quit() + + def check_timeout(): + """Exit if hold_preview is enabled and timeout is reached.""" + if hold_preview and time.time() - countdown_start > timeout: + print("⌛ Timeout reached. Exiting...") + root.quit() + root.after(1000, check_timeout) + + # Bind Enter key to capture function + root.bind("", capture) + + # Start preview loop + update_preview() + check_timeout() + root.mainloop() + + return frame, image_path if os.path.exists(image_path) else (None, None) + + +if __name__ == '__main__': + scan("./scans/debug_img.png", hold_preview=False, preview_scale=0.5, timeout=10) diff --git a/image_analysis_pyobjc.py b/image_analysis_pyobjc.py new file mode 100644 index 0000000..18f6a54 --- /dev/null +++ b/image_analysis_pyobjc.py @@ -0,0 +1,113 @@ +import Vision +import AppKit +import os +from logger_config import logger +import re # Regular expression module to find hashtags + + +def nsimage_to_cgimage(image): + """Converts NSImage to CGImage.""" + bitmap_rep = AppKit.NSBitmapImageRep.imageRepWithData_(image.TIFFRepresentation()) + if not bitmap_rep: + logger.error("❌ Failed to get bitmap representation.") + return None + + cg_image = bitmap_rep.CGImage() + if not cg_image: + logger.error("❌ Failed to convert to CGImage.") + return None + + return cg_image + + +def recognize_text_pyobjc(image_path, + recognition_level=Vision.VNRequestTextRecognitionLevelAccurate, + uses_language_correction=True): + """Uses macOS Vision framework to extract text and barcode/QR code data from an image.""" + + # Check if the file exists early + if not os.path.exists(image_path): + logger.error("❌ Image path does not exist: %s", image_path) + return "", [], "" + + try: + # Load the image using AppKit + image = AppKit.NSImage.alloc().initWithContentsOfFile_(image_path) + if not image: + logger.error("❌ Failed to load image.") + return "", [], "" + + # Log image size + size = image.size() + logger.info(f"Image size: {size.width} x {size.height}") + + # Convert NSImage to CGImage + cg_image = nsimage_to_cgimage(image) + if not cg_image: + return "", [], "" + + # Create a text recognition request + text_request = Vision.VNRecognizeTextRequest.alloc().init() + text_request.setRecognitionLevel_(recognition_level) + text_request.setUsesLanguageCorrection_(uses_language_correction) + + # Create a barcode detection request + barcode_request = Vision.VNDetectBarcodesRequest.alloc().init() + + # Create an image request handler + handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None) + + # Perform the requests + success, error = handler.performRequests_error_([text_request, barcode_request], None) + if not success: + logger.error(f"❌ Vision OCR failed: {error}") + return "", [], "" + + # Extract recognized text + text_results = text_request.results() + if not text_results: + logger.info("No text detected.") + extracted_text = "No text detected." + else: + extracted_text = "\n".join([result.text() for result in text_results]) + + # Extract hashtags + hashtags = extract_hashtags(extracted_text) + + # Extract barcode/QR code information + barcode_info = extract_barcode_info(barcode_request.results()) + + return extracted_text, hashtags, barcode_info + + except Exception as e: + logger.error(f"❌ Error: {e}") + return "", [], "" + + +def extract_hashtags(text): + """Extracts hashtags from the given text.""" + # Regular expression to find hashtags (words starting with # followed by alphanumeric characters) + hashtags = re.findall(r'#\w+', text) + return hashtags + + +def extract_barcode_info(barcode_results): + """Extracts barcode or QR code information from the detection results.""" + barcode_data = [] + for barcode in barcode_results: + # Extract the string associated with the barcode + if hasattr(barcode, 'payloadString'): + data = barcode.payloadString() + if data: + barcode_data.append(data) + logger.info("Barcode/QR code data: %s", barcode_data) + return ', '.join(barcode_data) if barcode_data else "" + + +if __name__ == '__main__': + image_path = 'scans/snap_2025-02-22_18-12-04-435736.png' + extracted_text, hashtags, barcode_info = recognize_text_pyobjc(image_path) + + logger.info("Extracted Text:\n%s", extracted_text) + logger.info("Hashtags found: %s", hashtags) + logger.info("Barcode/QR code Info: %s", barcode_info) diff --git a/logger_config.py b/logger_config.py new file mode 100644 index 0000000..858b336 --- /dev/null +++ b/logger_config.py @@ -0,0 +1,29 @@ +import logging +from logging.handlers import RotatingFileHandler + +# Create a logger +logger = logging.getLogger("archive_logger") +logger.setLevel(logging.INFO) # Set the logging level + +# Create a rotating file handler +log_file = 'archive.log' +max_log_size = 5 * 1024 * 1024 # Max log file size (5 MB) +backup_count = 3 # Keep up to 3 backup files + +rotating_handler = RotatingFileHandler(log_file, maxBytes=max_log_size, backupCount=backup_count) +rotating_handler.setLevel(logging.DEBUG) + +# Create a stream handler (for console output) +console_handler = logging.StreamHandler() +console_handler.setLevel(logging.DEBUG) + +# Create a formatter +formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s') + +# Add formatter to handlers +rotating_handler.setFormatter(formatter) +console_handler.setFormatter(formatter) + +# Add handlers to the logger +logger.addHandler(rotating_handler) +logger.addHandler(console_handler) # Add console handler diff --git a/logseq_api.py b/logseq_api.py new file mode 100644 index 0000000..19346ea --- /dev/null +++ b/logseq_api.py @@ -0,0 +1,186 @@ +import requests +import json + +# Doc site: https://plugins-doc.logseq.com/ . +# This list is auto-generated from https://github.com/logseq/plugins/tree/master/docs . +apis = [ + 'logseq.settings', + 'logseq.updateSettings', + 'logseq.once', + 'logseq.toggleMainUI', + 'logseq.listeners', + 'logseq.ready', + 'logseq.connected', + 'logseq.removeListener', + 'logseq.showMainUI', + 'logseq.resolveResourceFullUrl', + 'logseq.provideStyle', + 'logseq.caller', + 'logseq.addListener', + 'logseq.hideSettingsUI', + 'logseq.provideUI', + 'logseq.setMainUIInlineStyle', + 'logseq.emit', + 'logseq.showSettingsUI', + 'logseq.listenerCount', + 'logseq.removeAllListeners', + 'logseq.onSettingsChanged', + 'logseq.provideTheme', + 'logseq.Experiments', + 'logseq.eventNames', + 'logseq.FileStorage', + 'logseq.provideModel', + 'logseq.baseInfo', + 'logseq.setMainUIAttrs', + 'logseq.useSettingsSchema', + 'logseq.hideMainUI', + 'logseq.isMainUIVisible', + 'logseq.beforeunload', + 'logseq.UI.showMsg', + 'logseq.UI.closeMsg', + 'logseq.App.registerPageMenuItem', + 'logseq.App.getUserInfo', + 'logseq.App.setRightSidebarVisible', + 'logseq.App.showMsg', + 'logseq.App.quit', + 'logseq.App.registerUIItem', + 'logseq.App.setFullScreen', + 'logseq.App.onMacroRendererSlotted', + 'logseq.App.getInfo', + 'logseq.App.onPageHeadActionsSlotted', + 'logseq.App.onCurrentGraphChanged', + 'logseq.App.registerCommandShortcut', + 'logseq.App.getStateFromStore', + 'logseq.App.onSidebarVisibleChanged', + 'logseq.App.registerCommand', + 'logseq.App.setLeftSidebarVisible', + 'logseq.App.replaceState', + 'logseq.App.setZoomFactor', + 'logseq.App.execGitCommand', + 'logseq.App.invokeExternalCommand', + 'logseq.App.queryElementById', + 'logseq.App.onThemeModeChanged', + 'logseq.App.openExternalLink', + 'logseq.App.pushState', + 'logseq.App.getCurrentGraph', + 'logseq.App.onRouteChanged', + 'logseq.App.queryElementRect', + 'logseq.App.registerCommandPalette', + 'logseq.App.relaunch', + 'logseq.App.getUserConfigs', + 'logseq.App.onBlockRendererSlotted', + 'logseq.DB.datascriptQuery', + 'logseq.DB.onChanged', + 'logseq.DB.q', + 'logseq.DB.onBlockChanged', + 'logseq.Assets.listFilesOfCurrentGraph', + 'logseq.Editor.insertBatchBlock', + 'logseq.Editor.getAllPages', + 'logseq.Editor.createPage', + 'logseq.Editor.getBlockProperty', + 'logseq.Editor.getBlockProperties', + 'logseq.Editor.insertAtEditingCursor', + 'logseq.Editor.getCurrentPage', + 'logseq.Editor.appendBlockInPage', + 'logseq.Editor.getSelectedBlocks', + 'logseq.Editor.insertBlock', + 'logseq.Editor.getPagesTreeFromNamespace', + 'logseq.Editor.onInputSelectionEnd', + 'logseq.Editor.scrollToBlockInPage', + 'logseq.Editor.moveBlock', + 'logseq.Editor.getPreviousSiblingBlock', + 'logseq.Editor.exitEditingMode', + 'logseq.Editor.getPagesFromNamespace', + 'logseq.Editor.getNextSiblingBlock', + 'logseq.Editor.getPage', + 'logseq.Editor.renamePage', + 'logseq.Editor.prependBlockInPage', + 'logseq.Editor.deletePage', + 'logseq.Editor.editBlock', + 'logseq.Editor.checkEditing', + 'logseq.Editor.getCurrentPageBlocksTree', + 'logseq.Editor.getCurrentBlock', + 'logseq.Editor.upsertBlockProperty', + 'logseq.Editor.registerSlashCommand', + 'logseq.Editor.getPageBlocksTree', + 'logseq.Editor.getPageLinkedReferences', + 'logseq.Editor.updateBlock', + 'logseq.Editor.registerBlockContextMenuItem', + 'logseq.Editor.removeBlock', + 'logseq.Editor.restoreEditingCursor', + 'logseq.Editor.removeBlockProperty', + 'logseq.Editor.getBlock', + 'logseq.Editor.openInRightSidebar', + 'logseq.Editor.setBlockCollapsed', + 'logseq.Editor.getEditingBlockContent', + 'logseq.Editor.getEditingCursorPosition', + 'logseq.Git.saveIgnoreFile', + 'logseq.Git.loadIgnoreFile', + 'logseq.Git.execCommand'] + + +def raw_api_call(host, port, token, method, args): + resp = requests.post(f'http://{host}:{port}/api', json={ + 'method': method, + 'args': args + }, headers={ + 'Authorization': 'Bearer ' + token + }) + try: + return resp.json() + except json.JSONDecodeError: + return resp.text + + +def _define_api(host, port, token, cls, method): + [_, *hier, name] = method.split('.') + + @staticmethod + def _wrap(*args): + return raw_api_call(host, port, token, method, args) + + if hier: + for ns in hier: + if not hasattr(cls, ns): + setattr(cls, ns, type(ns, (object,), {})) + cls = getattr(cls, ns) + setattr(cls, name, _wrap) + + +def _create_class(host, port, token): + class Logseq: pass + + for api in apis: + _define_api(host, port, token, Logseq, api) + return Logseq + + +def Logseq(host, port, token): + return _create_class(host, port, token)() + +def add_to_last_block_as_child(page, block_content, logseq): + # Step 1: Get all blocks from the page + blocks = logseq.Editor.getPageBlocksTree(page) + assert blocks, "No blocks found in the page" + + # Step 2: Find the last block + if blocks and isinstance(blocks, list) and len(blocks) > 0: + last_block = blocks[-1] # Get the last block in the list + parent_block_uuid = last_block.get("uuid") + #print(f"Last block UUID: {parent_block_uuid}") + + # Step 3: Insert a new block as a child of the last block + logseq.Editor.insertBlock(parent_block_uuid, block_content, {"children": "true"}) + + +# Usage: +if __name__ == '__main__': + logseq = Logseq('127.0.0.1', '12315', 'vetinari') + logseq.Editor.deletePage('APITest') + logseq.Editor.createPage('APITest') + logseq.Editor.checkEditing() + logseq.Editor.appendBlockInPage('APITest', 'Parent block') + add_to_last_block_as_child('APITest', 'Child block', logseq) + + logseq.App.showMsg('Testing API') + diff --git a/logseq_cleaning_script.py b/logseq_cleaning_script.py new file mode 100644 index 0000000..4933c03 --- /dev/null +++ b/logseq_cleaning_script.py @@ -0,0 +1,30 @@ +import os +import shutil +# execute with care + +assets_dir = '/thesis/logseq/assets' +journal_dir = './logseq/journals' +pages_dir = './logseq/pages' +to_delete_dir = './logseq/to_delete' + +if not os.path.exists(to_delete_dir): + os.makedirs(to_delete_dir) + +assets_files = os.listdir(assets_dir) +referenced_files = [] + + +for dirname in [journal_dir, pages_dir]: + for filename in os.listdir(dirname): + if filename.endswith('.md'): + with open(os.path.join(dirname, filename)) as f: + for line in f: + for asset in assets_files: + if asset in line: + referenced_files.append(asset) + + +for asset in assets_files: + if asset not in referenced_files and not asset.endswith(".edn"): + print(asset) + shutil.move(os.path.join(assets_dir, asset), to_delete_dir) \ No newline at end of file diff --git a/marker_detect.py b/marker_detect.py new file mode 100644 index 0000000..0361183 --- /dev/null +++ b/marker_detect.py @@ -0,0 +1,166 @@ +import os +import cv2 +import numpy as np +import time +import cairosvg +from logger_config import logger + +TEMPLATE_DIR = "./markers/" +USE_ORB = False # Set to True to use ORB, False to use SIFT + + +# Load and convert SVG templates to grayscale images +def load_template(filename): + """Load a template image, converting SVG to grayscale if necessary.""" + template_path = os.path.join(TEMPLATE_DIR, filename) + + if filename.endswith(".svg"): + # Convert SVG to PNG (grayscale) + png_data = cairosvg.svg2png(url=template_path) + np_arr = np.frombuffer(png_data, dtype=np.uint8) + template = cv2.imdecode(np_arr, cv2.IMREAD_GRAYSCALE) + else: + # Load JPG/PNG directly + template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE) + + return template + + +# Load all templates +templates = {} +logger.info("🔄 Loading templates...") +start_time = time.time() + +for filename in os.listdir(TEMPLATE_DIR): + if filename.endswith((".jpg", ".png", ".svg")): + template_name = os.path.splitext(filename)[0] + template = load_template(filename) + + if template is not None: + templates[template_name] = template + else: + logger.error(f"❌ Failed to load template: {filename}") + +logger.info(f"✅ Template loading complete in {time.time() - start_time:.2f} seconds.\n") + +# Log the loaded templates once all are loaded +logger.info("Templates loaded: %s", ', '.join(templates.keys())) + +# Initialize feature detector (SIFT or ORB) +if USE_ORB: + detector = cv2.ORB_create(nfeatures=500) + matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True) + ratio_test = 0.6 # ORB ratio test +else: + detector = cv2.SIFT_create() + matcher = cv2.FlannBasedMatcher(dict(algorithm=1, trees=10), dict()) + ratio_test = 0.6 # SIFT stricter ratio test + + +def is_valid_aspect_ratio(bounding_box, expected_aspect_ratio=1.0, tolerance=0.2): + """Ensure detected bounding box is approximately square.""" + x, y, w, h = bounding_box + aspect_ratio = w / float(h) + return (expected_aspect_ratio - tolerance) <= aspect_ratio <= (expected_aspect_ratio + tolerance) + + +def detect_markers(image_path, templates, min_matches=15, min_area=500): + """Detects markers using feature matching and filters based on shape constraints.""" + logger.info(f"🔄 Reading image: {image_path}") + image = cv2.imread(image_path) + + if image is None: + logger.error("❌ Failed to load image") + return [] + + gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY) + detected_markers = [] + + # Detect keypoints and descriptors in the input image + kp_image, des_image = detector.detectAndCompute(gray_image, None) + logger.info(f"🔍 Detected {len(kp_image)} keypoints in the input image.") + + keypoints_image = cv2.drawKeypoints(image, kp_image, None, (0, 255, 0), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS) + + logger.info("🔄 Starting template matching...") + + for name, template in templates.items(): + logger.debug(f"🔍 Processing template: {name}") + + # Detect keypoints and descriptors in the template + kp_template, des_template = detector.detectAndCompute(template, None) + + if des_template is None or des_image is None: + continue + + logger.debug(f"🔍 Found {len(kp_template)} keypoints in template {name}.") + + # Match descriptors + if USE_ORB: + matches = matcher.match(des_template, des_image) + good_matches = sorted(matches, key=lambda x: x.distance)[:min_matches] + else: + raw_matches = matcher.knnMatch(des_template, des_image, k=2) + good_matches = [m for m, n in raw_matches if m.distance < ratio_test * n.distance] + + if len(good_matches) >= min_matches: + src_pts = np.float32([kp_template[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2) + dst_pts = np.float32([kp_image[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2) + + if USE_ORB: + x_min, y_min = np.min(dst_pts, axis=0)[0] + x_max, y_max = np.max(dst_pts, axis=0)[0] + + bounding_box = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min)) + if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area: + cv2.rectangle(image, (bounding_box[0], bounding_box[1]), + (bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]), + (0, 255, 0), 2) + cv2.putText(image, name, (bounding_box[0], bounding_box[1] - 10), + cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2) + detected_markers.append(name) + else: + logger.warning(f"❌ {name} detected but doesn't meet square size constraints.") + + else: + M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0) + if M is not None: + h, w = template.shape + pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2) + dst = cv2.perspectiveTransform(pts, M) + + bounding_box = cv2.boundingRect(dst) + if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area: + hull = cv2.convexHull(dst) + if len(hull) == 4: + detected_markers.append(name) + image = cv2.polylines(image, [np.int32(hull)], True, (0, 255, 0), 3) + x, y = dst[0][0] + cv2.putText(image, name, (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), + 2) + else: + logger.warning(f"❌ {name} detected but not forming a perfect quadrilateral.") + + debug_image_path = "./scans/detected_markers.png" + keypoints_image_path = "./scans/keypoints.png" + + cv2.imwrite(debug_image_path, image) + + cv2.imwrite(keypoints_image_path, keypoints_image) + + logger.info(f"📸 Debug image saved to {debug_image_path}") + logger.info(f"📸 Keypoints image saved to {keypoints_image_path}") + + return detected_markers, debug_image_path, keypoints_image_path + + +if __name__ == '__main__': + image_path = "scans/snap_2025-02-23_17-10-06-836370.png" + logger.info(f"🔍 Detecting markers in image: {image_path}") + markers, debug_img, kp_img = detect_markers(image_path, templates) + + logger.info(f"🔍 Detected markers: {markers}") + if markers: + logger.info(f"📍 Total markers detected: {len(markers)}") + else: + logger.warning("🛑 No markers detected.") diff --git a/markers/HDPE.png b/markers/HDPE.png new file mode 100644 index 0000000..3edec68 Binary files /dev/null and b/markers/HDPE.png differ diff --git a/markers/LDPE.png b/markers/LDPE.png new file mode 100644 index 0000000..7f43b1f Binary files /dev/null and b/markers/LDPE.png differ diff --git a/markers/O.png b/markers/O.png new file mode 100644 index 0000000..0b1438d Binary files /dev/null and b/markers/O.png differ diff --git a/markers/PET.png b/markers/PET.png new file mode 100644 index 0000000..1bc74e5 Binary files /dev/null and b/markers/PET.png differ diff --git a/markers/PP.png b/markers/PP.png new file mode 100644 index 0000000..f7b19e0 Binary files /dev/null and b/markers/PP.png differ diff --git a/markers/PS.png b/markers/PS.png new file mode 100644 index 0000000..fe2e314 Binary files /dev/null and b/markers/PS.png differ diff --git a/markers/PVC.png b/markers/PVC.png new file mode 100644 index 0000000..77bd77f Binary files /dev/null and b/markers/PVC.png differ diff --git a/markers/bond strength.png b/markers/bond strength.png new file mode 100644 index 0000000..075577e Binary files /dev/null and b/markers/bond strength.png differ diff --git a/markers/cnc.png b/markers/cnc.png new file mode 100644 index 0000000..6e2d897 Binary files /dev/null and b/markers/cnc.png differ diff --git a/markers/color.png b/markers/color.png new file mode 100644 index 0000000..a2f704f Binary files /dev/null and b/markers/color.png differ diff --git a/markers/composite.png b/markers/composite.png new file mode 100644 index 0000000..79729e4 Binary files /dev/null and b/markers/composite.png differ diff --git a/markers/density.png b/markers/density.png new file mode 100644 index 0000000..d037b93 Binary files /dev/null and b/markers/density.png differ diff --git a/markers/flexibility.png b/markers/flexibility.png new file mode 100644 index 0000000..17d1fc4 Binary files /dev/null and b/markers/flexibility.png differ diff --git a/markers/haptics.png b/markers/haptics.png new file mode 100644 index 0000000..42660d8 Binary files /dev/null and b/markers/haptics.png differ diff --git a/markers/hot air.png b/markers/hot air.png new file mode 100644 index 0000000..9b80ccc Binary files /dev/null and b/markers/hot air.png differ diff --git a/markers/joinery.png b/markers/joinery.png new file mode 100644 index 0000000..2cd45eb Binary files /dev/null and b/markers/joinery.png differ diff --git a/markers/observation.png b/markers/observation.png new file mode 100644 index 0000000..a73bf4f Binary files /dev/null and b/markers/observation.png differ diff --git a/markers/opacity.png b/markers/opacity.png new file mode 100644 index 0000000..001b2b7 Binary files /dev/null and b/markers/opacity.png differ diff --git a/markers/press.png b/markers/press.png new file mode 100644 index 0000000..7b23d32 Binary files /dev/null and b/markers/press.png differ diff --git a/markers/process.png b/markers/process.png new file mode 100644 index 0000000..c0feb05 Binary files /dev/null and b/markers/process.png differ diff --git a/markers/shape.png b/markers/shape.png new file mode 100644 index 0000000..ec48b8a Binary files /dev/null and b/markers/shape.png differ diff --git a/markers/shredder.png b/markers/shredder.png new file mode 100644 index 0000000..3d18e97 Binary files /dev/null and b/markers/shredder.png differ diff --git a/markers/smell.png b/markers/smell.png new file mode 100644 index 0000000..e7180a5 Binary files /dev/null and b/markers/smell.png differ diff --git a/markers/structural.png b/markers/structural.png new file mode 100644 index 0000000..059fe24 Binary files /dev/null and b/markers/structural.png differ diff --git a/markers/surface.png b/markers/surface.png new file mode 100644 index 0000000..fc17de9 Binary files /dev/null and b/markers/surface.png differ diff --git a/markers/thermals.png b/markers/thermals.png new file mode 100644 index 0000000..eea0e99 Binary files /dev/null and b/markers/thermals.png differ diff --git a/markers/tool.png b/markers/tool.png new file mode 100644 index 0000000..cd0d137 Binary files /dev/null and b/markers/tool.png differ diff --git a/poetry.toml b/poetry.toml new file mode 100644 index 0000000..ab1033b --- /dev/null +++ b/poetry.toml @@ -0,0 +1,2 @@ +[virtualenvs] +in-project = true diff --git a/publish.sh b/publish.sh new file mode 100755 index 0000000..c23632c --- /dev/null +++ b/publish.sh @@ -0,0 +1,14 @@ +#!/bin/bash +echo "Trying to publish all changes to archive.petau.net" +echo "Getting HTML from Logseq" + +cd ~/thesis/logseq || exit +logseq-publish-spa ~/thesis/docs/ --theme-mode dark --accent-color orange --static-directory ~/logseq/static + +echo "Publishing to archive.petau.net" +cd ~/thesis || exit +git add . +git commit -m "CI: Publish to archive.petau.net" +git push +echo "Publish Complete" +exit 0 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..8af984d --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,22 @@ +[project] +name = "code" +version = "0.1.0" +description = "" +authors = [ + {name = "arontaupe",email = "aron@petau.net"} +] +readme = "README.md" +requires-python = ">=3.12" +dependencies = [ + "pytesseract (>=0.3.13,<0.4.0)", + "pillow (>=11.1.0,<11.2.0)", + "easyocr (>=1.7.2,<1.8.0)", + "textstat (>=0.7.4,<0.8.0)", + "opencv-python (>=4.11.0.86,<4.12.0.0)", + "requests (>=2.32.3,<2.33.0)" +] + + +[build-system] +requires = ["poetry-core>=2.0.0,<3.0.0"] +build-backend = "poetry.core.masonry.api" diff --git a/render_mermaid.sh b/render_mermaid.sh new file mode 100755 index 0000000..a6fcac0 --- /dev/null +++ b/render_mermaid.sh @@ -0,0 +1,30 @@ +#!/bin/bash + +# Ensure Mermaid CLI is installed +if ! command -v mmdc &> /dev/null +then + echo "Mermaid CLI (mmdc) not found. Install it with: npm install -g @mermaid-js/mermaid-cli" + exit 1 +fi + +# Create output directory if it doesn't exist +mkdir -p typst/assets/images + +# Loop through all .mmd files in the mermaid folder +for file in typst/assets/mermaid/*.mmd; do + # Extract filename without extension + filename=$(basename -- "$file" .mmd) + + # Convert to PNG + echo "Rendering $file -> typst/assets/images/$filename.png" + mmdc -i "$file" -o "typst/assets/images/$filename.png" -t neutral -b transparent -s 3 + + # Check if conversion was successful + if [ $? -eq 0 ]; then + echo "Successfully rendered: $filename.png" + else + echo "Failed to render: $filename.png" + fi +done + +echo "All Mermaid diagrams processed!" diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..e30927c --- /dev/null +++ b/requirements.txt @@ -0,0 +1,15 @@ +requests~=2.32.3 +opencv-python~=4.11.0.86 +pytesseract~=0.3.13 +numpy~=2.2.2 +pillow~=11.1.0 +easyocr~=1.7.2 +ollama~=0.4.7 +textstat~=0.7.4 +ollama-ocr +mpmath~=1.3.0 +CairoSVG~=2.7.1 +python-dotenv~=1.0.1 +ocrmac~=1.0.0 +pyobjc-framework-Vision~=11.0 +pyobjc-framework-Cocoa~=11.0 \ No newline at end of file diff --git a/scan.sh b/scan.sh new file mode 100755 index 0000000..21f8c88 --- /dev/null +++ b/scan.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +# Check if the script is already running +if pgrep -f "archive_pipeline.py" > /dev/null; then + echo "Scan already in progress. Ignoring new request." + exit 1 +fi + +# Check if Logseq is running +if ! pgrep -x "Logseq" > /dev/null; then + echo "⚠️ Logseq is not running. Starting Logseq..." + open -a "Logseq" +else + echo "✅ Logseq is running, continuing..." +fi + +echo "Scanning to Archive" + +cd ~/thesis/code || exit + +# Activate virtual environment +source ~/thesis/code/.venv/bin/activate + +# Default camera index is 0 +CAMERA_INDEX=0 + +# Check if a camera index was provided as an argument +if [ ! -z "$1" ]; then + CAMERA_INDEX=$1 +fi + +echo "Starting scan with camera index $CAMERA_INDEX..." +~/thesis/code/.venv/bin/python ~/thesis/code/archive_pipeline.py --camera "$CAMERA_INDEX" + +echo "Scan Complete" + +echo "Type 'scan' and hit enter to trigger an upload. You can also type 'scan --help' for more options." +echo "Start the *Scan to Archive* App if unsure." +exit 0 \ No newline at end of file diff --git a/success_message.py b/success_message.py new file mode 100644 index 0000000..795e9e0 --- /dev/null +++ b/success_message.py @@ -0,0 +1,48 @@ +import cv2 +import webbrowser +import time + +def show_image_with_message(image_path, message, timeout=5, url=None): + """Display image with a centered success message using OpenCV.""" + # Read the image using OpenCV + img = cv2.imread(image_path) + + if img is None: + print("Error: Unable to load image.") + return + + # Get image dimensions (height, width) + height, width, _ = img.shape + + # Add a text overlay with the success message + font = cv2.FONT_HERSHEY_SIMPLEX + font_scale = 1.5 + font_thickness = 3 # Thicker text for a "bold" effect + text_size = cv2.getTextSize(message, font, font_scale, font_thickness)[0] + + # Calculate the position to center the text + text_x = (width - text_size[0]) // 2 + text_y = (height + text_size[1]) // 2 + + # Add the text to the image with black color for the text (for a "bold" effect) + cv2.putText(img, message, (text_x, text_y), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA) + + # Display the image with the message + cv2.imshow("Success", img) + + # Wait for the specified timeout (in milliseconds) + cv2.waitKey(timeout * 1000) # Convert seconds to milliseconds + cv2.destroyAllWindows() + + # If a URL is provided, open it in the default browser + if url: + webbrowser.open(url) + +# Example usage +if __name__ == "__main__": + image_path = "scans/detected_markers.png" # Replace with your actual image path + success_message = "✅ Scan Completed Successfully!" + url_to_open = "https://pinry.petau.net" # URL to open after timeout + + # Show the image with the success message and open the URL after 5 seconds + show_image_with_message(image_path, success_message, timeout=5, url=url_to_open) diff --git a/vision_test_ocrmac.py b/vision_test_ocrmac.py new file mode 100644 index 0000000..9e916bb --- /dev/null +++ b/vision_test_ocrmac.py @@ -0,0 +1,15 @@ +from ocrmac import ocrmac + + +# OCR test +def recognize_text_macos(image_path): + annotations = ocrmac.OCR(image_path, + language_preference=['en-US'], + framework="livetext" + ).recognize() + print(annotations) + return annotations + + +if __name__ == '__main__': + recognize_text_macos('scans/snap_2025-02-22_18-12-04-435736.png')