Anonymized

This commit is contained in:
Aron Petau 2025-04-06 19:58:31 +02:00
parent 3cf084a50d
commit ceb5081a01
65 changed files with 2262 additions and 0 deletions

8
.env Normal file
View file

@ -0,0 +1,8 @@
PINRY_TOKEN=XXXXXXXXXXXXX
CAMERA_INDEX=0
UPLOAD=True
OCR_PROCESSOR=pyobjc
USE_OLLAMA_CAPTION=False
USE_OLLAMA_OCR=False
CHECK_MARKERS=True
UPLOAD_MARKED_IMAGE=False

1
.venv Normal file
View file

@ -0,0 +1 @@
machine_archivist-mftt

3
LICENSE Normal file
View file

@ -0,0 +1,3 @@
Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License
This work is licensed under the Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License. To view a copy of this license, visit https://creativecommons.org/licenses/by-nc-sa/4.0/

View file

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>AMIsApplet</key>
<true/>
<key>AMStayOpen</key>
<false/>
<key>BuildMachineOSBuild</key>
<string>22A380021</string>
<key>CFBundleAllowMixedLocalizations</key>
<true/>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleDocumentTypes</key>
<array>
<dict>
<key>CFBundleTypeExtensions</key>
<array>
<string>*</string>
</array>
<key>CFBundleTypeName</key>
<string>Automator workflow file</string>
<key>CFBundleTypeOSTypes</key>
<array>
<string>****</string>
</array>
<key>CFBundleTypeRole</key>
<string>Viewer</string>
</dict>
</array>
<key>CFBundleExecutable</key>
<string>Automator Application Stub</string>
<key>CFBundleIconFile</key>
<string>ApplicationStub</string>
<key>CFBundleIdentifier</key>
<string>com.apple.automator.Mobile-Scan</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>Mobile Scan</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.3</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>MacOSX</string>
</array>
<key>CFBundleURLTypes</key>
<array/>
<key>CFBundleVersion</key>
<string>527</string>
<key>DTCompiler</key>
<string>com.apple.compilers.llvm.clang.1_0</string>
<key>DTPlatformBuild</key>
<string></string>
<key>DTPlatformName</key>
<string>macosx</string>
<key>DTPlatformVersion</key>
<string>15.3</string>
<key>DTSDKBuild</key>
<string>24D39</string>
<key>DTSDKName</key>
<string>macosx15.3.internal</string>
<key>DTXcode</key>
<string>1600</string>
<key>DTXcodeBuild</key>
<string>16A6170g</string>
<key>LSMinimumSystemVersion</key>
<string>10.9</string>
<key>LSUIElement</key>
<true/>
<key>NSAppleEventsUsageDescription</key>
<string>This workflow needs to control other applications to run.</string>
<key>NSAppleMusicUsageDescription</key>
<string>This workflow needs access to your music to run.</string>
<key>NSAppleScriptEnabled</key>
<string>YES</string>
<key>NSCalendarsUsageDescription</key>
<string>This workflow needs access to your calendars to run.</string>
<key>NSCameraUsageDescription</key>
<string>This workflow needs access to your camera to run.</string>
<key>NSContactsUsageDescription</key>
<string>This workflow needs access to your contacts to run.</string>
<key>NSHomeKitUsageDescription</key>
<string>This workflow needs access to your HomeKit Home to run.</string>
<key>NSMicrophoneUsageDescription</key>
<string>This workflow needs access to your microphone to run.</string>
<key>NSPhotoLibraryUsageDescription</key>
<string>This workflow needs access to your photos to run.</string>
<key>NSPrincipalClass</key>
<string>NSApplication</string>
<key>NSRemindersUsageDescription</key>
<string>This workflow needs access to your reminders to run.</string>
<key>NSServices</key>
<array/>
<key>NSSiriUsageDescription</key>
<string>This workflow needs access to Siri to run.</string>
<key>NSSystemAdministrationUsageDescription</key>
<string>This workflow needs access to administer this system in order to run.</string>
<key>UTExportedTypeDeclarations</key>
<array/>
<key>UTImportedTypeDeclarations</key>
<array/>
</dict>
</plist>

Binary file not shown.

Binary file not shown.

Binary file not shown.

View file

@ -0,0 +1,171 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>files</key>
<dict>
<key>Resources/ApplicationStub.icns</key>
<data>
RYTqh+7iocnEIV8iTs9EgJjEkO4=
</data>
<key>Resources/Assets.car</key>
<data>
SJkzJQ0zpEu+KXeAJk45wvdOq4Q=
</data>
<key>Resources/InfoPlist.loctable</key>
<data>
KEQC0DFC9lrETWe0E5eVGGsPylc=
</data>
</dict>
<key>files2</key>
<dict>
<key>Resources/ApplicationStub.icns</key>
<dict>
<key>hash</key>
<data>
RYTqh+7iocnEIV8iTs9EgJjEkO4=
</data>
<key>hash2</key>
<data>
odOqeBevxysHIbR5V5qgZz11qTuV9cL5jKaIcUw3R0I=
</data>
</dict>
<key>Resources/Assets.car</key>
<dict>
<key>hash</key>
<data>
SJkzJQ0zpEu+KXeAJk45wvdOq4Q=
</data>
<key>hash2</key>
<data>
HfxV2L1WKqSFn2ShFdQSMo39Xh4FPrSyJzz2hW4d+IQ=
</data>
</dict>
<key>Resources/InfoPlist.loctable</key>
<dict>
<key>hash</key>
<data>
KEQC0DFC9lrETWe0E5eVGGsPylc=
</data>
<key>hash2</key>
<data>
3cSIcj64rHY2k+pLrnrgd1Li6hmbquwgX94QcefajJ8=
</data>
</dict>
<key>document.wflow</key>
<dict>
<key>cdhash</key>
<data>
icthnS5QkIcGta2GiU+/saZ3dpk=
</data>
<key>requirement</key>
<string>cdhash H"53ddb51651bfcda8e1da3b39bd627aae0993b3cc" or cdhash H"89cb619d2e50908706b5ad86894fbfb1a6777699"</string>
</dict>
</dict>
<key>rules</key>
<dict>
<key>^Resources/</key>
<true/>
<key>^Resources/.*\.lproj/</key>
<dict>
<key>optional</key>
<true/>
<key>weight</key>
<real>1000</real>
</dict>
<key>^Resources/.*\.lproj/locversion.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>1100</real>
</dict>
<key>^Resources/Base\.lproj/</key>
<dict>
<key>weight</key>
<real>1010</real>
</dict>
<key>^version.plist$</key>
<true/>
</dict>
<key>rules2</key>
<dict>
<key>.*\.dSYM($|/)</key>
<dict>
<key>weight</key>
<real>11</real>
</dict>
<key>^(.*/)?\.DS_Store$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>2000</real>
</dict>
<key>^(Frameworks|SharedFrameworks|PlugIns|Plug-ins|XPCServices|Helpers|MacOS|Library/(Automator|Spotlight|LoginItems))/</key>
<dict>
<key>nested</key>
<true/>
<key>weight</key>
<real>10</real>
</dict>
<key>^.*</key>
<true/>
<key>^Info\.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>20</real>
</dict>
<key>^PkgInfo$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>20</real>
</dict>
<key>^Resources/</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
<key>^Resources/.*\.lproj/</key>
<dict>
<key>optional</key>
<true/>
<key>weight</key>
<real>1000</real>
</dict>
<key>^Resources/.*\.lproj/locversion.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>1100</real>
</dict>
<key>^Resources/Base\.lproj/</key>
<dict>
<key>weight</key>
<real>1010</real>
</dict>
<key>^[^/]+$</key>
<dict>
<key>nested</key>
<true/>
<key>weight</key>
<real>10</real>
</dict>
<key>^embedded\.provisionprofile$</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
<key>^version\.plist$</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
</dict>
</dict>
</plist>

View file

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>AMApplicationBuild</key>
<string>527</string>
<key>AMApplicationVersion</key>
<string>2.10</string>
<key>AMDocumentVersion</key>
<string>2</string>
<key>actions</key>
<array>
<dict>
<key>action</key>
<dict>
<key>AMAccepts</key>
<dict>
<key>Container</key>
<string>List</string>
<key>Optional</key>
<true/>
<key>Types</key>
<array>
<string>com.apple.applescript.object</string>
</array>
</dict>
<key>AMActionVersion</key>
<string>1.0.2</string>
<key>AMApplication</key>
<array>
<string>Automator</string>
</array>
<key>AMParameterProperties</key>
<dict>
<key>source</key>
<dict/>
</dict>
<key>AMProvides</key>
<dict>
<key>Container</key>
<string>List</string>
<key>Types</key>
<array>
<string>com.apple.applescript.object</string>
</array>
</dict>
<key>ActionBundlePath</key>
<string>/System/Library/Automator/Run AppleScript.action</string>
<key>ActionName</key>
<string>Run AppleScript</string>
<key>ActionParameters</key>
<dict>
<key>source</key>
<string>tell application "Terminal" -- Check if there are any open windows if (count of windows) is greater than 0 then -- Get the position of the first window set windowPos to position of window 1 -- Use the first window if it's open set existingWindow to window 1 do script "~/thesis/code/scan.sh 2 ; sleep 2; ~/thesis/code/publish.sh; exit" in existingWindow else -- If no windows are open, create a new window and set its position to the last window's position set windowPos to {200, 200} -- Default position if no window was open previously do script "~/thesis/code/scan.sh 2; sleep 2; ~/thesis/code/publish.sh; exit" set position of the front window to windowPos end if activate end tell delay 3 -- Wait for the scripts to finish executing tell application "Terminal" -- Close all Terminal windows set terminalWindows to every window repeat with aWindow in terminalWindows if busy of aWindow is false then close aWindow end if end repeat end tell</string>
</dict>
<key>BundleIdentifier</key>
<string>com.apple.Automator.RunScript</string>
<key>CFBundleVersion</key>
<string>1.0.2</string>
<key>CanShowSelectedItemsWhenRun</key>
<false/>
<key>CanShowWhenRun</key>
<true/>
<key>Category</key>
<array>
<string>AMCategoryUtilities</string>
</array>
<key>Class Name</key>
<string>RunScriptAction</string>
<key>InputUUID</key>
<string>8C226B47-458F-4409-BF3A-8ABE247C1FF2</string>
<key>Keywords</key>
<array>
<string>Run</string>
</array>
<key>OutputUUID</key>
<string>F5590A01-ECCB-4790-974A-DA4BC1310717</string>
<key>UUID</key>
<string>45370EDA-E9D5-465E-B975-C2D83FE6A8D4</string>
<key>UnlocalizedApplications</key>
<array>
<string>Automator</string>
</array>
<key>arguments</key>
<dict>
<key>0</key>
<dict>
<key>default value</key>
<string>on run {input, parameters}
(* Your script goes here *)
return input
end run</string>
<key>name</key>
<string>source</string>
<key>required</key>
<string>0</string>
<key>type</key>
<string>0</string>
<key>uuid</key>
<string>0</string>
</dict>
</dict>
<key>isViewVisible</key>
<integer>1</integer>
<key>location</key>
<string>631.500000:315.000000</string>
<key>nibPath</key>
<string>/System/Library/Automator/Run AppleScript.action/Contents/Resources/Base.lproj/main.nib</string>
</dict>
<key>isViewVisible</key>
<integer>1</integer>
</dict>
</array>
<key>connectors</key>
<dict/>
<key>workflowMetaData</key>
<dict>
<key>workflowTypeIdentifier</key>
<string>com.apple.Automator.application</string>
</dict>
</dict>
</plist>

13
README.md Normal file
View file

@ -0,0 +1,13 @@
# Machine Archivist
This repository holds all python and supplementary files to set up the Machine Archivist, an artistic documentation tool.
For detailed setup use, read RunInstructions.md
The code was created in the context of a Master Thesis titled Human : Waste by Aron Petau.
© 2025 **Aron Petau**. All rights reserved.
![License](https://img.shields.io/badge/License-CC_BY--NC--SA_4.0-lightgrey)
This project is licensed under the **Creative Commons Attribution-NonCommercial-ShareAlike 4.0 International License**. See the [LICENSE](./LICENSE) file for more details.

92
RunInstructions.md Normal file
View file

@ -0,0 +1,92 @@
To reproduce the archive, the following dependencies should be met:
*this tutorial is for macOS only,*
but you should be able to get it running anywhere that can run ollama
**Currently, all paths are hard-coded, so the scripts will only work if the Repository is cloned to the home directory**
Plus, you have to adjust either your username or the entire path in the scripts
## Brew
```bash
/bin/bash -c "$(curl -fsSL https://raw.githubusercontent.com/Homebrew/install/HEAD/install.sh)"
```
## git
```bash
brew install git
```
## Node.js and npm, as well as yarn
```bash
brew install node
npm install --global yarn
```
## Babashka and Clojure
```bash
brew install borkdude/brew/babashka
brew install clojure/tools/clojure
```
both prerequisites for publish-spa
## Publish-spa
```bash
git clone https://github.com/logseq/publish-spa
cd publish-spa && yarn install
yarn global add $PWD
```
this is a tool used to automatically publish the logseq archive
everything works without, but you will have to manually export the archive
## The Logseq desktop app (optional)
```bash
brew install --cask logseq
```
only if you want a convenient way to modify the archive
## Python
```bash
brew install python@3.13
```
## Ollama
```bash
brew install --cask ollama
```
I am using the cask here because it autostarts the server, but you can also use the CLI version if you prefer.
ollama handles all the LLM needs. if that is not needed, you can skip this step
# The Archive Repository
```bash
git clone arontaupe/thesis
```
# Make the script executable
```bash
cd thesis/code
chmod +x publish.sh
chmod +x scan.sh
```
then, either run the scripts individually or use the provided app that should now work
```bash
./scan.sh
./publish.sh
```
the app is called `Scan to Archive` and should be in the thesis folder

View file

@ -0,0 +1,109 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>AMIsApplet</key>
<true/>
<key>AMStayOpen</key>
<false/>
<key>BuildMachineOSBuild</key>
<string>22A380021</string>
<key>CFBundleAllowMixedLocalizations</key>
<true/>
<key>CFBundleDevelopmentRegion</key>
<string>English</string>
<key>CFBundleDocumentTypes</key>
<array>
<dict>
<key>CFBundleTypeExtensions</key>
<array>
<string>*</string>
</array>
<key>CFBundleTypeName</key>
<string>Automator workflow file</string>
<key>CFBundleTypeOSTypes</key>
<array>
<string>****</string>
</array>
<key>CFBundleTypeRole</key>
<string>Viewer</string>
</dict>
</array>
<key>CFBundleExecutable</key>
<string>Automator Application Stub</string>
<key>CFBundleIconFile</key>
<string>ApplicationStub</string>
<key>CFBundleIdentifier</key>
<string>com.apple.automator.Scan-to-Archive</string>
<key>CFBundleInfoDictionaryVersion</key>
<string>6.0</string>
<key>CFBundleName</key>
<string>Scan to Archive</string>
<key>CFBundlePackageType</key>
<string>APPL</string>
<key>CFBundleShortVersionString</key>
<string>1.3</string>
<key>CFBundleSignature</key>
<string>????</string>
<key>CFBundleSupportedPlatforms</key>
<array>
<string>MacOSX</string>
</array>
<key>CFBundleURLTypes</key>
<array/>
<key>CFBundleVersion</key>
<string>527</string>
<key>DTCompiler</key>
<string>com.apple.compilers.llvm.clang.1_0</string>
<key>DTPlatformBuild</key>
<string></string>
<key>DTPlatformName</key>
<string>macosx</string>
<key>DTPlatformVersion</key>
<string>15.3</string>
<key>DTSDKBuild</key>
<string>24D39</string>
<key>DTSDKName</key>
<string>macosx15.3.internal</string>
<key>DTXcode</key>
<string>1600</string>
<key>DTXcodeBuild</key>
<string>16A6170g</string>
<key>LSMinimumSystemVersion</key>
<string>10.9</string>
<key>LSUIElement</key>
<true/>
<key>NSAppleEventsUsageDescription</key>
<string>This workflow needs to control other applications to run.</string>
<key>NSAppleMusicUsageDescription</key>
<string>This workflow needs access to your music to run.</string>
<key>NSAppleScriptEnabled</key>
<string>YES</string>
<key>NSCalendarsUsageDescription</key>
<string>This workflow needs access to your calendars to run.</string>
<key>NSCameraUsageDescription</key>
<string>This workflow needs access to your camera to run.</string>
<key>NSContactsUsageDescription</key>
<string>This workflow needs access to your contacts to run.</string>
<key>NSHomeKitUsageDescription</key>
<string>This workflow needs access to your HomeKit Home to run.</string>
<key>NSMicrophoneUsageDescription</key>
<string>This workflow needs access to your microphone to run.</string>
<key>NSPhotoLibraryUsageDescription</key>
<string>This workflow needs access to your photos to run.</string>
<key>NSPrincipalClass</key>
<string>NSApplication</string>
<key>NSRemindersUsageDescription</key>
<string>This workflow needs access to your reminders to run.</string>
<key>NSServices</key>
<array/>
<key>NSSiriUsageDescription</key>
<string>This workflow needs access to Siri to run.</string>
<key>NSSystemAdministrationUsageDescription</key>
<string>This workflow needs access to administer this system in order to run.</string>
<key>UTExportedTypeDeclarations</key>
<array/>
<key>UTImportedTypeDeclarations</key>
<array/>
</dict>
</plist>

Binary file not shown.

View file

@ -0,0 +1,171 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>files</key>
<dict>
<key>Resources/ApplicationStub.icns</key>
<data>
RYTqh+7iocnEIV8iTs9EgJjEkO4=
</data>
<key>Resources/Assets.car</key>
<data>
SJkzJQ0zpEu+KXeAJk45wvdOq4Q=
</data>
<key>Resources/InfoPlist.loctable</key>
<data>
KEQC0DFC9lrETWe0E5eVGGsPylc=
</data>
</dict>
<key>files2</key>
<dict>
<key>Resources/ApplicationStub.icns</key>
<dict>
<key>hash</key>
<data>
RYTqh+7iocnEIV8iTs9EgJjEkO4=
</data>
<key>hash2</key>
<data>
odOqeBevxysHIbR5V5qgZz11qTuV9cL5jKaIcUw3R0I=
</data>
</dict>
<key>Resources/Assets.car</key>
<dict>
<key>hash</key>
<data>
SJkzJQ0zpEu+KXeAJk45wvdOq4Q=
</data>
<key>hash2</key>
<data>
HfxV2L1WKqSFn2ShFdQSMo39Xh4FPrSyJzz2hW4d+IQ=
</data>
</dict>
<key>Resources/InfoPlist.loctable</key>
<dict>
<key>hash</key>
<data>
KEQC0DFC9lrETWe0E5eVGGsPylc=
</data>
<key>hash2</key>
<data>
3cSIcj64rHY2k+pLrnrgd1Li6hmbquwgX94QcefajJ8=
</data>
</dict>
<key>document.wflow</key>
<dict>
<key>cdhash</key>
<data>
ob3mamb5318KsN1K1DZXjWgrE04=
</data>
<key>requirement</key>
<string>cdhash H"65f45121e9098a26ec1a4451a04f2c5c571c9ef7" or cdhash H"a1bde66a66f9df5f0ab0dd4ad436578d682b134e"</string>
</dict>
</dict>
<key>rules</key>
<dict>
<key>^Resources/</key>
<true/>
<key>^Resources/.*\.lproj/</key>
<dict>
<key>optional</key>
<true/>
<key>weight</key>
<real>1000</real>
</dict>
<key>^Resources/.*\.lproj/locversion.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>1100</real>
</dict>
<key>^Resources/Base\.lproj/</key>
<dict>
<key>weight</key>
<real>1010</real>
</dict>
<key>^version.plist$</key>
<true/>
</dict>
<key>rules2</key>
<dict>
<key>.*\.dSYM($|/)</key>
<dict>
<key>weight</key>
<real>11</real>
</dict>
<key>^(.*/)?\.DS_Store$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>2000</real>
</dict>
<key>^(Frameworks|SharedFrameworks|PlugIns|Plug-ins|XPCServices|Helpers|MacOS|Library/(Automator|Spotlight|LoginItems))/</key>
<dict>
<key>nested</key>
<true/>
<key>weight</key>
<real>10</real>
</dict>
<key>^.*</key>
<true/>
<key>^Info\.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>20</real>
</dict>
<key>^PkgInfo$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>20</real>
</dict>
<key>^Resources/</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
<key>^Resources/.*\.lproj/</key>
<dict>
<key>optional</key>
<true/>
<key>weight</key>
<real>1000</real>
</dict>
<key>^Resources/.*\.lproj/locversion.plist$</key>
<dict>
<key>omit</key>
<true/>
<key>weight</key>
<real>1100</real>
</dict>
<key>^Resources/Base\.lproj/</key>
<dict>
<key>weight</key>
<real>1010</real>
</dict>
<key>^[^/]+$</key>
<dict>
<key>nested</key>
<true/>
<key>weight</key>
<real>10</real>
</dict>
<key>^embedded\.provisionprofile$</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
<key>^version\.plist$</key>
<dict>
<key>weight</key>
<real>20</real>
</dict>
</dict>
</dict>
</plist>

View file

@ -0,0 +1,124 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
<plist version="1.0">
<dict>
<key>AMApplicationBuild</key>
<string>527</string>
<key>AMApplicationVersion</key>
<string>2.10</string>
<key>AMDocumentVersion</key>
<string>2</string>
<key>actions</key>
<array>
<dict>
<key>action</key>
<dict>
<key>AMAccepts</key>
<dict>
<key>Container</key>
<string>List</string>
<key>Optional</key>
<true/>
<key>Types</key>
<array>
<string>com.apple.applescript.object</string>
</array>
</dict>
<key>AMActionVersion</key>
<string>1.0.2</string>
<key>AMApplication</key>
<array>
<string>Automator</string>
</array>
<key>AMParameterProperties</key>
<dict>
<key>source</key>
<dict/>
</dict>
<key>AMProvides</key>
<dict>
<key>Container</key>
<string>List</string>
<key>Types</key>
<array>
<string>com.apple.applescript.object</string>
</array>
</dict>
<key>ActionBundlePath</key>
<string>/System/Library/Automator/Run AppleScript.action</string>
<key>ActionName</key>
<string>Run AppleScript</string>
<key>ActionParameters</key>
<dict>
<key>source</key>
<string>tell application "Terminal" -- Check if there are any open windows if (count of windows) is greater than 0 then -- Get the position of the first window set windowPos to position of window 1 -- Use the first window if it's open set existingWindow to window 1 do script "~/thesis/code/scan.sh; sleep 2; ~/thesis/code/publish.sh; exit" in existingWindow else -- If no windows are open, create a new window and set its position to the last window's position set windowPos to {200, 200} -- Default position if no window was open previously do script "~/thesis/code/scan.sh; sleep 2; ~/thesis/code/publish.sh; exit" set position of the front window to windowPos end if activate end tell delay 3 -- Wait for the scripts to finish executing tell application "Terminal" -- Close all Terminal windows set terminalWindows to every window repeat with aWindow in terminalWindows if busy of aWindow is false then close aWindow end if end repeat end tell</string>
</dict>
<key>BundleIdentifier</key>
<string>com.apple.Automator.RunScript</string>
<key>CFBundleVersion</key>
<string>1.0.2</string>
<key>CanShowSelectedItemsWhenRun</key>
<false/>
<key>CanShowWhenRun</key>
<true/>
<key>Category</key>
<array>
<string>AMCategoryUtilities</string>
</array>
<key>Class Name</key>
<string>RunScriptAction</string>
<key>InputUUID</key>
<string>8C226B47-458F-4409-BF3A-8ABE247C1FF2</string>
<key>Keywords</key>
<array>
<string>Run</string>
</array>
<key>OutputUUID</key>
<string>F5590A01-ECCB-4790-974A-DA4BC1310717</string>
<key>UUID</key>
<string>45370EDA-E9D5-465E-B975-C2D83FE6A8D4</string>
<key>UnlocalizedApplications</key>
<array>
<string>Automator</string>
</array>
<key>arguments</key>
<dict>
<key>0</key>
<dict>
<key>default value</key>
<string>on run {input, parameters}
(* Your script goes here *)
return input
end run</string>
<key>name</key>
<string>source</string>
<key>required</key>
<string>0</string>
<key>type</key>
<string>0</string>
<key>uuid</key>
<string>0</string>
</dict>
</dict>
<key>isViewVisible</key>
<integer>1</integer>
<key>location</key>
<string>631.500000:680.000000</string>
<key>nibPath</key>
<string>/System/Library/Automator/Run AppleScript.action/Contents/Resources/Base.lproj/main.nib</string>
</dict>
<key>isViewVisible</key>
<integer>1</integer>
</dict>
</array>
<key>connectors</key>
<dict/>
<key>workflowMetaData</key>
<dict>
<key>workflowTypeIdentifier</key>
<string>com.apple.Automator.application</string>
</dict>
</dict>
</plist>

View file

432
archive_pipeline.py Normal file
View file

@ -0,0 +1,432 @@
# This script captures an image from the webcam, extracts' text using Tesseract or EasyOCR,
# generates a description using Ollama Vision, and uploads the text and image to Logseq.
# It also detects markers in the image using SIFT feature matching.
# The extracted text is checked for readability using textstat, and only a meaningful text is uploaded to Logseq.
# The original image is uploaded to Logseq as an asset.
# The annotated image is saved in the assets folder of the Logseq directory.
import locale
import urllib
import pytesseract
import easyocr
import ollama
import textstat
from logseq_api import Logseq, add_to_last_block_as_child
from ollama_ocr import OCRProcessor
from marker_detect import *
from camera_handler import scan
from image_analysis_pyobjc import recognize_text_pyobjc
import argparse
from datetime import datetime
from logger_config import logger
from pinry_handler import upload_and_create_pin
import time
import sys
from dotenv import load_dotenv
from success_message import show_image_with_message
import cv2
from PIL import Image
# Initialize OCR processors
MODEL_NAME = 'llama3.2-vision:11b'
ocr_processor = OCRProcessor(model_name=MODEL_NAME)
# Set locale to German
locale.setlocale(locale.LC_TIME, 'de_DE.UTF-8')
def get_image_description_ollama(image_path):
"""Uses an Ollama Vision model to generate a description of the image."""
try:
res = ollama.chat(
model=MODEL_NAME,
messages=[{'role': 'user', 'content': 'Describe this image.', 'images': [image_path]}]
)
return res['message']['content']
except Exception as e:
logger.error(f"❌ Ollama vision failed: {e}")
return ""
def recognize_text_easyocr(image_path):
"""Uses EasyOCR to extract text from the image."""
try:
reader = easyocr.Reader(['en', 'de'], gpu=True)
results = reader.readtext(image_path, detail=0, paragraph=True)
return ' '.join(results)
except Exception as e:
logger.error(f"❌ EasyOCR failed: {e}")
return ""
def recognize_text_tesseract(image_path):
"""Uses Tesseract OCR for text recognition."""
try:
img = Image.open(image_path)
gray = cv2.cvtColor(np.array(img), cv2.COLOR_RGB2GRAY)
return pytesseract.image_to_string(gray, config='--oem 1 --psm 6')
except Exception as e:
logger.error(f"❌ Tesseract OCR failed: {e}")
return ""
def is_meaningful_text(text):
"""Checks if extracted text is meaningful using readability metrics."""
if not text or len(text.split()) < 3:
return False
return textstat.flesch_reading_ease(text) > 40 or textstat.text_standard(text, float_output=True) > 30
def transfer_img_to_logseq(frame):
"""Saves an image and generates a timestamped path."""
try:
now = datetime.now()
timestamp = now.strftime('%Y-%m-%dT%H-%M-%S-%f')[:-2]
asset_path = f'/Users/aron/thesis/logseq/assets/scans/{timestamp}.jpg'
cv2.imwrite(asset_path, frame)
return asset_path
except Exception as e:
logger.error(f"❌ Image saving failed: {e}")
return None
def ollama_ocr(image_path):
"""Performs OCR using Ollama's Llama Vision model."""
try:
return ocr_processor.process_image(image_path=image_path,
format_type="markdown",
preprocess=True,
)
except Exception as e:
logger.error(f"❌ Ollama OCR failed: {e}")
return ""
def append_block_to_logseq_page(title, block_title, block_content, logseq):
"""Appends a block to a Logseq page."""
try:
logseq.Editor.appendBlockInPage(title, block_title)
add_to_last_block_as_child(title, block_content, logseq)
except Exception as e:
logger.error(f"❌ Logseq block append failed: {e}")
def upload_to_logseq(logseq,
title,
ocr_text,
caption,
image_path_list,
hashtags=[],
marker_list=None,
ollama_ocr_text=None):
"""Uploads extracted text and images to Logseq."""
try:
logseq.App.showMsg('Uploading to Logseq...')
logger.info(f" {image_path_list=}")
# insert markers as tags
marker_string = ""
# add the elems of hashtags to the marker_string
for hash in hashtags:
# strip the initial # from the hashtag
hash = hash[1:]
marker_list.append(hash)
if marker_list:
# append markers as tags to marker_string
marker_string = ', '.join(marker_list)
logseq.Editor.createPage(title, {'tags': f'{marker_string}, upload',
'public': 'true',
'author': 'Automated Archive',
'source': 'Camera',
'date': f'{datetime.now().strftime("%Y-%m-%d")}',
'timestamp': f'{datetime.now().strftime("%H:%M:%S")}',
})
# Commented out because the markers are now tags
# logseq.Editor.appendBlockInPage(title, "## Detected Markers:")
# add_to_last_block_as_child(title, marker_string, logseq)
else:
logseq.Editor.createPage(title, {'tags': 'upload',
'public': 'true',
'author': 'Automated Archive',
'source': 'Camera',
'date': f'{datetime.now().strftime("%Y-%m-%d")}',
'timestamp': f'{datetime.now().strftime("%H:%M:%S")}',
})
# logseq.Editor.appendBlockInPage(title, "## Detected Markers:")
# add_to_last_block_as_child(title, "No markers detected.", logseq)
# add classical ocr text
if ocr_text:
logseq.Editor.appendBlockInPage(title, "## OCR Text:")
add_to_last_block_as_child(title, ocr_text, logseq)
# add ollama ocr text
if ollama_ocr_text:
logseq.Editor.appendBlockInPage(title, "## Ollama OCR Text:")
add_to_last_block_as_child(title, ollama_ocr_text, logseq)
# upload images
if image_path_list:
logseq.Editor.appendBlockInPage(title, "## Scans:")
# convert to relative paths
relative_path_list = []
for path in image_path_list:
parts = path.split("assets/scans/", 1) # Split at "scans/"
if len(parts) > 1:
relative_path_list.append("./assets/scans/" + parts[1]) # Add "./" at the start
for i, rel_image_path in enumerate(relative_path_list):
block_property = 'annotated:: false' if i == 0 else 'annotated:: true'
logseq.Editor.appendBlockInPage(title, f"### Scan {i + 1}\n{block_property}")
add_to_last_block_as_child(title, f"![{rel_image_path}]({rel_image_path})", logseq)
# add extracted text from ollama
if caption:
logseq.Editor.appendBlockInPage(title, "Ollama Image Caption:")
add_to_last_block_as_child(title, caption, logseq)
logseq.App.showMsg('Upload complete!')
except Exception as e:
logger.error(f"❌ Logseq upload failed: {e}")
def str_to_bool(value):
"""Convert string values from .env to boolean."""
return value.lower() in ("true", "1", "yes")
def parse_arguments():
"""Parse and return command-line arguments."""
load_dotenv() # Load environment variables
# Load environment variables with fallbacks
default_camera = int(os.getenv("CAMERA_INDEX", 0))
default_upload = str_to_bool(os.getenv("UPLOAD", "True"))
default_ocr_processor = os.getenv("OCR_PROCESSOR", "pyobjc")
default_use_ollama_caption = str_to_bool(os.getenv("USE_OLLAMA_CAPTION", "False"))
default_use_ollama_ocr = str_to_bool(os.getenv("USE_OLLAMA_OCR", "False"))
default_check_markers = str_to_bool(os.getenv("CHECK_MARKERS", "True"))
default_upload_marked_image = str_to_bool(os.getenv("UPLOAD_MARKED_IMAGE", "False"))
parser = argparse.ArgumentParser(
description="Scan an image using a specific camera index or provide an image file.")
parser.add_argument("--camera", type=int, default=default_camera,
help=f"Set the camera index (integer value required) (default: {default_camera})")
parser.add_argument("--image", type=str, help="Optional image file path to use instead of capturing a new image")
parser.add_argument("--upload", action="store_true", default=default_upload,
help=f"Enable Logseq upload (default: {default_upload})")
parser.add_argument("--ocr-processor", choices=["easyocr", "tesseract", "pyobjc"],
default=default_ocr_processor,
help=f"Choose OCR processor (default: '{default_ocr_processor}')")
parser.add_argument("--use-ollama-caption", action="store_true", default=default_use_ollama_caption,
help=f"Use Ollama for image description (default: {default_use_ollama_caption})")
parser.add_argument("--use-ollama-ocr", action="store_true", default=default_use_ollama_ocr,
help=f"Use Ollama for OCR (default: {default_use_ollama_ocr})")
parser.add_argument("--check-markers", action="store_true", default=default_check_markers,
help=f"Check for markers in the image (default: {default_check_markers})")
parser.add_argument("--upload-marked-image", action="store_true", default=default_upload_marked_image,
help=f"Upload marked image if markers detected (default: {default_upload_marked_image})")
return parser.parse_args()
def log_configuration(args):
"""Log configuration settings."""
logger.info("🔧 Configuration:")
for key, value in vars(args).items():
logger.info(f" {key.capitalize()}: {value}")
logger.info("------------------------------------------------------\n")
def capture_image(camera_index):
"""Capture an image from the camera."""
snap_timestamp = datetime.now().strftime("%Y-%m-%d_%H-%M-%S-%f")[:-3]
image_path = f"./scans/snap_{snap_timestamp}.png"
frame, captured_image_path = scan(image_path=image_path, camera_index=camera_index, hold_preview=False,
preview_scale=0.5, timeout=30)
if frame is None or captured_image_path is None:
logger.error("❌ Image capture failed or timed out. Exiting...")
sys.exit(1)
return frame, captured_image_path
def extract_text(image_path, ocr_processor):
"""Extract text using the selected OCR processor."""
ocr_text = ""
hashtags = []
if ocr_processor == "easyocr":
ocr_text = recognize_text_easyocr(image_path)
elif ocr_processor == "tesseract":
ocr_text = recognize_text_tesseract(image_path)
elif ocr_processor == "pyobjc":
ocr_text, hashtags, barcode_info = recognize_text_pyobjc(image_path)
if barcode_info:
logger.info(f"🔍 Barcode/QR Code info: {barcode_info}")
ocr_text += f"\n\nBarcode/QR Code Info: {barcode_info}"
return ocr_text, hashtags
def generate_image_description(image_path, use_ollama_caption, use_ollama_ocr):
"""Generate image description and OCR text using Ollama if enabled."""
caption = get_image_description_ollama(image_path) if use_ollama_caption else ""
ollama_ocr_text = ollama_ocr(image_path) if use_ollama_ocr else ""
return caption, ollama_ocr_text
def detect_and_upload_markers(image_path, check_markers, upload_marked_image):
"""Detect markers in the image and upload marked image if enabled."""
marker_list = []
if check_markers:
markers, marked_image_path, keypoints_image_path = detect_markers(image_path, templates, min_area=500)
logger.info(f" {markers=}")
if markers:
logger.info(f"🎯 Detected markers: {markers}, #{len(markers)}")
marker_list.extend(markers)
if upload_marked_image:
marked_frame = cv2.imread(marked_image_path)
marked_asset_path, _ = transfer_img_to_logseq(marked_frame)
else:
logger.info("🛑 No markers detected.")
return marker_list, marked_image_path, keypoints_image_path
def upload_to_logseq_if_needed(upload,
ocr_text,
caption,
ollama_ocr_text,
frame,
marker_list,
marked_image_path,
hashtags=[]):
"""Upload to Logseq if required."""
if upload:
logger.info('🚀 Uploading to Logseq...')
logseq = Logseq(host="127.0.0.1", port=12315, token="vetinari")
asset_path_list = [transfer_img_to_logseq(frame)]
if marker_list:
marked_frame = cv2.imread(marked_image_path)
marked_asset_path = transfer_img_to_logseq(marked_frame)
asset_path_list.append(marked_asset_path)
pagetitle = f"Upload-{datetime.now().strftime('%Y-%m-%d_%H:%M:%S')}"
upload_to_logseq(logseq,
title=pagetitle,
ocr_text=ocr_text,
caption=caption,
image_path_list=asset_path_list,
marker_list=marker_list,
ollama_ocr_text=ollama_ocr_text,
hashtags=hashtags)
logger.info(f"✅ Upload complete.")
return pagetitle
return None
def main():
"""Main function to execute OCR workflow."""
try:
args = parse_arguments()
log_configuration(args)
start_time = time.time()
if args.image:
image_path = args.image
logger.info(f'📂 Using provided image file.: {image_path}')
if not os.path.exists(image_path):
logger.error(f"Error: Provided image file '{image_path}' does not exist.")
return
frame = cv2.imread(image_path)
if frame is None:
print(f"Error: Failed to load image from '{image_path}'")
return
else:
frame, image_path = capture_image(args.camera)
logger.info('🔍 Extracting text...')
ocr_text, hashtags = extract_text(image_path, args.ocr_processor)
# Check for meaningless text if OCR processor is easyocr or tesseract
if args.ocr_processor in ["easyocr", "tesseract"] and not is_meaningful_text(ocr_text):
logger.warning('❌ OCR output was not meaningful. Discarding...')
ocr_text = ""
logger.info(f"✅ Text extraction complete in {time.time() - start_time:.2f} seconds.\n")
logger.info('🖼️ Get image description...')
caption, ollama_ocr_text = generate_image_description(image_path, args.use_ollama_caption, args.use_ollama_ocr)
logger.info(f"✅ Image description generation complete in {time.time() - start_time:.2f} seconds.\n")
logger.info('🎯 Detecting markers...')
marker_list, marked_image_path, keypoints_image_path = detect_and_upload_markers(image_path,
args.check_markers,
args.upload_marked_image)
pagetitle = upload_to_logseq_if_needed(args.upload,
ocr_text,
caption,
ollama_ocr_text,
frame,
marker_list,
marked_image_path=marked_image_path,
hashtags=hashtags)
logseqpageurl = None
if pagetitle:
logger.info(f"✅ Upload complete. Page title: {pagetitle}")
# get the page URL from the title
baseurl = 'https://archive.petau.net/#/page/'
# make sure the pagetitle is URL-safe, a full URL looks like this
pagetitle_url = urllib.parse.quote(pagetitle, safe="/?#[]@&=")
logseqpageurl = f"{baseurl}{pagetitle_url}"
logger.info(f"Page URL: {logseqpageurl}")
pinry_tags = marker_list + hashtags
img_desc = caption if caption else None
# Call the wrapper function to upload and create the pin
pin_url = upload_and_create_pin(image_path, board_id=2, tags=pinry_tags, source_url=logseqpageurl,
description=img_desc)
if pin_url:
logger.info(f"✅ Pin URL: {pin_url}")
if pagetitle:
if args.upload:
logger.info('Adding Pin URL to Logseq...')
logseq = Logseq(host="127.0.0.1", port=12315, token="vetinari")
append_block_to_logseq_page(pagetitle, "## Pin URL:", f'[The Pinry URL for this upload]({pin_url})',
logseq)
else:
logger.error("❌ Pin creation failed.")
logger.info(f"🚀 Pipeline completed in {time.time() - start_time:.2f} seconds.")
logger.info("------------------------------------------------------\n")
success_message = "Scan Completed Successfully!"
url_to_open = "https://pinry.petau.net" # URL to open after timeout
# Show the image with the success message and open the URL after 5 seconds
show_image_with_message(keypoints_image_path, success_message, timeout=5, url=url_to_open)
except KeyboardInterrupt:
logger.info("\nGracefully shutting down... Interrupt received (Ctrl+C).")
sys.exit(0) # Exit gracefully with status code 0
if __name__ == "__main__":
main()

23
camera_debug.py Normal file
View file

@ -0,0 +1,23 @@
import cv2
camera = cv2.VideoCapture(0) # Change index if needed
if not camera.isOpened():
print("❌ Could not access the webcam")
else:
# Try setting 4K resolution
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 3840)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 2160)
# Read the actual resolution
actual_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
actual_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"Requested: 3840x2160 → Camera Output: {actual_width}x{actual_height}")
ret, frame = camera.read()
if ret:
print(f"Captured frame size: {frame.shape[1]}x{frame.shape[0]}")
cv2.imwrite("./debug_img.png", frame)
camera.release()

57
camera_finder.py Normal file
View file

@ -0,0 +1,57 @@
import cv2
import subprocess
import re
from logger_config import logger
def get_camera_names():
"""Gets a list of available camera names using ffmpeg (macOS only)."""
try:
result = subprocess.run(
["ffmpeg", "-f", "avfoundation", "-list_devices", "true", "-i", ""],
stderr=subprocess.PIPE,
stdout=subprocess.PIPE,
text=True
)
output = result.stderr # ffmpeg prints device list to stderr
# Extract camera names from ffmpeg output
camera_lines = re.findall(r'\[AVFoundation input device @ .*?] \".*\"', output)
cameras = [re.search(r'"(.*?)"', line).group(1) for line in camera_lines]
return cameras
except Exception as e:
logger.error(f"❌ Error getting camera names: {e}")
return []
def list_available_cameras():
"""Lists available camera indexes and their names, with a preview."""
logger.info("🔍 Scanning for available cameras...")
camera_names = get_camera_names()
found_cameras = []
for i in range(10): # Check the first 10 indexes
cap = cv2.VideoCapture(i, cv2.CAP_AVFOUNDATION)
if cap.isOpened():
name = camera_names[i] if i < len(camera_names) else f"Unknown Camera {i}"
logger.info(f"✅ Camera {i}: {name}")
found_cameras.append((i, name))
ret, frame = cap.read()
if ret:
cv2.imshow(f"Camera {i}: {name}", frame)
cv2.waitKey(1000) # Show preview for 1 second
cv2.destroyAllWindows()
cap.release()
else:
logger.warning(f"❌ No camera found at index {i}")
if found_cameras:
logger.info("\n🎥 Available Cameras:")
for index, name in found_cameras:
logger.info(f" {index}: {name}")
if __name__ == '__main__':
list_available_cameras()

116
camera_handler.py Normal file
View file

@ -0,0 +1,116 @@
import tkinter as tk
from PIL import Image, ImageTk, ImageEnhance
import cv2
import time
import os
def scan(image_path=None, camera_index=None, hold_preview=False, preview_scale=1.0, timeout=10):
"""
Captures an image with a beautiful, high-quality Tkinter preview.
Features a centered, large countdown before capture.
"""
assert image_path is not None, "Image path not provided."
if camera_index is None:
camera_index = 0 # Default camera index
# Open camera
camera = cv2.VideoCapture(camera_index)
if not camera.isOpened():
print("❌ Error: Could not access the webcam")
return None, None
# Get highest available resolution
camera.set(cv2.CAP_PROP_FRAME_WIDTH, 9999)
camera.set(cv2.CAP_PROP_FRAME_HEIGHT, 9999)
max_width = int(camera.get(cv2.CAP_PROP_FRAME_WIDTH))
max_height = int(camera.get(cv2.CAP_PROP_FRAME_HEIGHT))
print(f"🔍 Using resolution: {max_width}x{max_height}")
# Calculate preview dimensions
preview_width = int(max_width * preview_scale)
preview_height = int(max_height * preview_scale)
# Initialize Tkinter window
root = tk.Tk()
root.title("📸 Capture Preview")
root.geometry(f"{preview_width}x{preview_height}") # Adjust to preview size
root.configure(bg="black")
root.overrideredirect(True) # Borderless fullscreen effect
# Create canvas for overlay
canvas = tk.Canvas(root, width=preview_width, height=preview_height, highlightthickness=0)
canvas.pack()
countdown_label = tk.Label(root, text="4", font=("Helvetica", 100, "bold"), fg="white", bg="black")
countdown_label.place(x=preview_width // 2, y=preview_height // 2, anchor="center") # Center countdown
frame = None
countdown_start = time.time()
def update_preview():
"""Update the camera preview & countdown."""
nonlocal frame
ret, frame = camera.read()
if not ret:
print("❌ Error: Failed to read from camera.")
root.destroy()
return
# Scale down only for preview
frame_preview = cv2.resize(frame, (preview_width, preview_height))
frame_rgb = cv2.cvtColor(frame_preview, cv2.COLOR_BGR2RGB)
img = Image.fromarray(frame_rgb)
# Apply slight dimming effect
enhancer = ImageEnhance.Brightness(img)
img = enhancer.enhance(0.6)
img_tk = ImageTk.PhotoImage(img)
canvas.create_image(0, 0, anchor=tk.NW, image=img_tk)
canvas.img_tk = img_tk # Keep reference
# Update countdown timer
elapsed = int(time.time() - countdown_start)
remaining = max(0, 4 - elapsed)
countdown_label.config(text=str(remaining - 1))
if remaining == 0:
capture() # Capture automatically at 0
root.after(100, update_preview)
def capture(event=None):
"""Capture image and close window."""
nonlocal frame
if frame is not None:
cv2.imwrite(image_path, frame)
print(f"✅ Image saved: {image_path}")
camera.release()
root.quit()
def check_timeout():
"""Exit if hold_preview is enabled and timeout is reached."""
if hold_preview and time.time() - countdown_start > timeout:
print("⌛ Timeout reached. Exiting...")
root.quit()
root.after(1000, check_timeout)
# Bind Enter key to capture function
root.bind("<Return>", capture)
# Start preview loop
update_preview()
check_timeout()
root.mainloop()
return frame, image_path if os.path.exists(image_path) else (None, None)
if __name__ == '__main__':
scan("./scans/debug_img.png", hold_preview=False, preview_scale=0.5, timeout=10)

113
image_analysis_pyobjc.py Normal file
View file

@ -0,0 +1,113 @@
import Vision
import AppKit
import os
from logger_config import logger
import re # Regular expression module to find hashtags
def nsimage_to_cgimage(image):
"""Converts NSImage to CGImage."""
bitmap_rep = AppKit.NSBitmapImageRep.imageRepWithData_(image.TIFFRepresentation())
if not bitmap_rep:
logger.error("❌ Failed to get bitmap representation.")
return None
cg_image = bitmap_rep.CGImage()
if not cg_image:
logger.error("❌ Failed to convert to CGImage.")
return None
return cg_image
def recognize_text_pyobjc(image_path,
recognition_level=Vision.VNRequestTextRecognitionLevelAccurate,
uses_language_correction=True):
"""Uses macOS Vision framework to extract text and barcode/QR code data from an image."""
# Check if the file exists early
if not os.path.exists(image_path):
logger.error("❌ Image path does not exist: %s", image_path)
return "", [], ""
try:
# Load the image using AppKit
image = AppKit.NSImage.alloc().initWithContentsOfFile_(image_path)
if not image:
logger.error("❌ Failed to load image.")
return "", [], ""
# Log image size
size = image.size()
logger.info(f"Image size: {size.width} x {size.height}")
# Convert NSImage to CGImage
cg_image = nsimage_to_cgimage(image)
if not cg_image:
return "", [], ""
# Create a text recognition request
text_request = Vision.VNRecognizeTextRequest.alloc().init()
text_request.setRecognitionLevel_(recognition_level)
text_request.setUsesLanguageCorrection_(uses_language_correction)
# Create a barcode detection request
barcode_request = Vision.VNDetectBarcodesRequest.alloc().init()
# Create an image request handler
handler = Vision.VNImageRequestHandler.alloc().initWithCGImage_options_(cg_image, None)
# Perform the requests
success, error = handler.performRequests_error_([text_request, barcode_request], None)
if not success:
logger.error(f"❌ Vision OCR failed: {error}")
return "", [], ""
# Extract recognized text
text_results = text_request.results()
if not text_results:
logger.info("No text detected.")
extracted_text = "No text detected."
else:
extracted_text = "\n".join([result.text() for result in text_results])
# Extract hashtags
hashtags = extract_hashtags(extracted_text)
# Extract barcode/QR code information
barcode_info = extract_barcode_info(barcode_request.results())
return extracted_text, hashtags, barcode_info
except Exception as e:
logger.error(f"❌ Error: {e}")
return "", [], ""
def extract_hashtags(text):
"""Extracts hashtags from the given text."""
# Regular expression to find hashtags (words starting with # followed by alphanumeric characters)
hashtags = re.findall(r'#\w+', text)
return hashtags
def extract_barcode_info(barcode_results):
"""Extracts barcode or QR code information from the detection results."""
barcode_data = []
for barcode in barcode_results:
# Extract the string associated with the barcode
if hasattr(barcode, 'payloadString'):
data = barcode.payloadString()
if data:
barcode_data.append(data)
logger.info("Barcode/QR code data: %s", barcode_data)
return ', '.join(barcode_data) if barcode_data else ""
if __name__ == '__main__':
image_path = 'scans/snap_2025-02-22_18-12-04-435736.png'
extracted_text, hashtags, barcode_info = recognize_text_pyobjc(image_path)
logger.info("Extracted Text:\n%s", extracted_text)
logger.info("Hashtags found: %s", hashtags)
logger.info("Barcode/QR code Info: %s", barcode_info)

29
logger_config.py Normal file
View file

@ -0,0 +1,29 @@
import logging
from logging.handlers import RotatingFileHandler
# Create a logger
logger = logging.getLogger("archive_logger")
logger.setLevel(logging.INFO) # Set the logging level
# Create a rotating file handler
log_file = 'archive.log'
max_log_size = 5 * 1024 * 1024 # Max log file size (5 MB)
backup_count = 3 # Keep up to 3 backup files
rotating_handler = RotatingFileHandler(log_file, maxBytes=max_log_size, backupCount=backup_count)
rotating_handler.setLevel(logging.DEBUG)
# Create a stream handler (for console output)
console_handler = logging.StreamHandler()
console_handler.setLevel(logging.DEBUG)
# Create a formatter
formatter = logging.Formatter('%(asctime)s - %(levelname)s - %(message)s')
# Add formatter to handlers
rotating_handler.setFormatter(formatter)
console_handler.setFormatter(formatter)
# Add handlers to the logger
logger.addHandler(rotating_handler)
logger.addHandler(console_handler) # Add console handler

186
logseq_api.py Normal file
View file

@ -0,0 +1,186 @@
import requests
import json
# Doc site: https://plugins-doc.logseq.com/ .
# This list is auto-generated from https://github.com/logseq/plugins/tree/master/docs .
apis = [
'logseq.settings',
'logseq.updateSettings',
'logseq.once',
'logseq.toggleMainUI',
'logseq.listeners',
'logseq.ready',
'logseq.connected',
'logseq.removeListener',
'logseq.showMainUI',
'logseq.resolveResourceFullUrl',
'logseq.provideStyle',
'logseq.caller',
'logseq.addListener',
'logseq.hideSettingsUI',
'logseq.provideUI',
'logseq.setMainUIInlineStyle',
'logseq.emit',
'logseq.showSettingsUI',
'logseq.listenerCount',
'logseq.removeAllListeners',
'logseq.onSettingsChanged',
'logseq.provideTheme',
'logseq.Experiments',
'logseq.eventNames',
'logseq.FileStorage',
'logseq.provideModel',
'logseq.baseInfo',
'logseq.setMainUIAttrs',
'logseq.useSettingsSchema',
'logseq.hideMainUI',
'logseq.isMainUIVisible',
'logseq.beforeunload',
'logseq.UI.showMsg',
'logseq.UI.closeMsg',
'logseq.App.registerPageMenuItem',
'logseq.App.getUserInfo',
'logseq.App.setRightSidebarVisible',
'logseq.App.showMsg',
'logseq.App.quit',
'logseq.App.registerUIItem',
'logseq.App.setFullScreen',
'logseq.App.onMacroRendererSlotted',
'logseq.App.getInfo',
'logseq.App.onPageHeadActionsSlotted',
'logseq.App.onCurrentGraphChanged',
'logseq.App.registerCommandShortcut',
'logseq.App.getStateFromStore',
'logseq.App.onSidebarVisibleChanged',
'logseq.App.registerCommand',
'logseq.App.setLeftSidebarVisible',
'logseq.App.replaceState',
'logseq.App.setZoomFactor',
'logseq.App.execGitCommand',
'logseq.App.invokeExternalCommand',
'logseq.App.queryElementById',
'logseq.App.onThemeModeChanged',
'logseq.App.openExternalLink',
'logseq.App.pushState',
'logseq.App.getCurrentGraph',
'logseq.App.onRouteChanged',
'logseq.App.queryElementRect',
'logseq.App.registerCommandPalette',
'logseq.App.relaunch',
'logseq.App.getUserConfigs',
'logseq.App.onBlockRendererSlotted',
'logseq.DB.datascriptQuery',
'logseq.DB.onChanged',
'logseq.DB.q',
'logseq.DB.onBlockChanged',
'logseq.Assets.listFilesOfCurrentGraph',
'logseq.Editor.insertBatchBlock',
'logseq.Editor.getAllPages',
'logseq.Editor.createPage',
'logseq.Editor.getBlockProperty',
'logseq.Editor.getBlockProperties',
'logseq.Editor.insertAtEditingCursor',
'logseq.Editor.getCurrentPage',
'logseq.Editor.appendBlockInPage',
'logseq.Editor.getSelectedBlocks',
'logseq.Editor.insertBlock',
'logseq.Editor.getPagesTreeFromNamespace',
'logseq.Editor.onInputSelectionEnd',
'logseq.Editor.scrollToBlockInPage',
'logseq.Editor.moveBlock',
'logseq.Editor.getPreviousSiblingBlock',
'logseq.Editor.exitEditingMode',
'logseq.Editor.getPagesFromNamespace',
'logseq.Editor.getNextSiblingBlock',
'logseq.Editor.getPage',
'logseq.Editor.renamePage',
'logseq.Editor.prependBlockInPage',
'logseq.Editor.deletePage',
'logseq.Editor.editBlock',
'logseq.Editor.checkEditing',
'logseq.Editor.getCurrentPageBlocksTree',
'logseq.Editor.getCurrentBlock',
'logseq.Editor.upsertBlockProperty',
'logseq.Editor.registerSlashCommand',
'logseq.Editor.getPageBlocksTree',
'logseq.Editor.getPageLinkedReferences',
'logseq.Editor.updateBlock',
'logseq.Editor.registerBlockContextMenuItem',
'logseq.Editor.removeBlock',
'logseq.Editor.restoreEditingCursor',
'logseq.Editor.removeBlockProperty',
'logseq.Editor.getBlock',
'logseq.Editor.openInRightSidebar',
'logseq.Editor.setBlockCollapsed',
'logseq.Editor.getEditingBlockContent',
'logseq.Editor.getEditingCursorPosition',
'logseq.Git.saveIgnoreFile',
'logseq.Git.loadIgnoreFile',
'logseq.Git.execCommand']
def raw_api_call(host, port, token, method, args):
resp = requests.post(f'http://{host}:{port}/api', json={
'method': method,
'args': args
}, headers={
'Authorization': 'Bearer ' + token
})
try:
return resp.json()
except json.JSONDecodeError:
return resp.text
def _define_api(host, port, token, cls, method):
[_, *hier, name] = method.split('.')
@staticmethod
def _wrap(*args):
return raw_api_call(host, port, token, method, args)
if hier:
for ns in hier:
if not hasattr(cls, ns):
setattr(cls, ns, type(ns, (object,), {}))
cls = getattr(cls, ns)
setattr(cls, name, _wrap)
def _create_class(host, port, token):
class Logseq: pass
for api in apis:
_define_api(host, port, token, Logseq, api)
return Logseq
def Logseq(host, port, token):
return _create_class(host, port, token)()
def add_to_last_block_as_child(page, block_content, logseq):
# Step 1: Get all blocks from the page
blocks = logseq.Editor.getPageBlocksTree(page)
assert blocks, "No blocks found in the page"
# Step 2: Find the last block
if blocks and isinstance(blocks, list) and len(blocks) > 0:
last_block = blocks[-1] # Get the last block in the list
parent_block_uuid = last_block.get("uuid")
#print(f"Last block UUID: {parent_block_uuid}")
# Step 3: Insert a new block as a child of the last block
logseq.Editor.insertBlock(parent_block_uuid, block_content, {"children": "true"})
# Usage:
if __name__ == '__main__':
logseq = Logseq('127.0.0.1', '12315', 'vetinari')
logseq.Editor.deletePage('APITest')
logseq.Editor.createPage('APITest')
logseq.Editor.checkEditing()
logseq.Editor.appendBlockInPage('APITest', 'Parent block')
add_to_last_block_as_child('APITest', 'Child block', logseq)
logseq.App.showMsg('Testing API')

30
logseq_cleaning_script.py Normal file
View file

@ -0,0 +1,30 @@
import os
import shutil
# execute with care
assets_dir = '/thesis/logseq/assets'
journal_dir = './logseq/journals'
pages_dir = './logseq/pages'
to_delete_dir = './logseq/to_delete'
if not os.path.exists(to_delete_dir):
os.makedirs(to_delete_dir)
assets_files = os.listdir(assets_dir)
referenced_files = []
for dirname in [journal_dir, pages_dir]:
for filename in os.listdir(dirname):
if filename.endswith('.md'):
with open(os.path.join(dirname, filename)) as f:
for line in f:
for asset in assets_files:
if asset in line:
referenced_files.append(asset)
for asset in assets_files:
if asset not in referenced_files and not asset.endswith(".edn"):
print(asset)
shutil.move(os.path.join(assets_dir, asset), to_delete_dir)

166
marker_detect.py Normal file
View file

@ -0,0 +1,166 @@
import os
import cv2
import numpy as np
import time
import cairosvg
from logger_config import logger
TEMPLATE_DIR = "./markers/"
USE_ORB = False # Set to True to use ORB, False to use SIFT
# Load and convert SVG templates to grayscale images
def load_template(filename):
"""Load a template image, converting SVG to grayscale if necessary."""
template_path = os.path.join(TEMPLATE_DIR, filename)
if filename.endswith(".svg"):
# Convert SVG to PNG (grayscale)
png_data = cairosvg.svg2png(url=template_path)
np_arr = np.frombuffer(png_data, dtype=np.uint8)
template = cv2.imdecode(np_arr, cv2.IMREAD_GRAYSCALE)
else:
# Load JPG/PNG directly
template = cv2.imread(template_path, cv2.IMREAD_GRAYSCALE)
return template
# Load all templates
templates = {}
logger.info("🔄 Loading templates...")
start_time = time.time()
for filename in os.listdir(TEMPLATE_DIR):
if filename.endswith((".jpg", ".png", ".svg")):
template_name = os.path.splitext(filename)[0]
template = load_template(filename)
if template is not None:
templates[template_name] = template
else:
logger.error(f"❌ Failed to load template: {filename}")
logger.info(f"✅ Template loading complete in {time.time() - start_time:.2f} seconds.\n")
# Log the loaded templates once all are loaded
logger.info("Templates loaded: %s", ', '.join(templates.keys()))
# Initialize feature detector (SIFT or ORB)
if USE_ORB:
detector = cv2.ORB_create(nfeatures=500)
matcher = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
ratio_test = 0.6 # ORB ratio test
else:
detector = cv2.SIFT_create()
matcher = cv2.FlannBasedMatcher(dict(algorithm=1, trees=10), dict())
ratio_test = 0.6 # SIFT stricter ratio test
def is_valid_aspect_ratio(bounding_box, expected_aspect_ratio=1.0, tolerance=0.2):
"""Ensure detected bounding box is approximately square."""
x, y, w, h = bounding_box
aspect_ratio = w / float(h)
return (expected_aspect_ratio - tolerance) <= aspect_ratio <= (expected_aspect_ratio + tolerance)
def detect_markers(image_path, templates, min_matches=15, min_area=500):
"""Detects markers using feature matching and filters based on shape constraints."""
logger.info(f"🔄 Reading image: {image_path}")
image = cv2.imread(image_path)
if image is None:
logger.error("❌ Failed to load image")
return []
gray_image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
detected_markers = []
# Detect keypoints and descriptors in the input image
kp_image, des_image = detector.detectAndCompute(gray_image, None)
logger.info(f"🔍 Detected {len(kp_image)} keypoints in the input image.")
keypoints_image = cv2.drawKeypoints(image, kp_image, None, (0, 255, 0), cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
logger.info("🔄 Starting template matching...")
for name, template in templates.items():
logger.debug(f"🔍 Processing template: {name}")
# Detect keypoints and descriptors in the template
kp_template, des_template = detector.detectAndCompute(template, None)
if des_template is None or des_image is None:
continue
logger.debug(f"🔍 Found {len(kp_template)} keypoints in template {name}.")
# Match descriptors
if USE_ORB:
matches = matcher.match(des_template, des_image)
good_matches = sorted(matches, key=lambda x: x.distance)[:min_matches]
else:
raw_matches = matcher.knnMatch(des_template, des_image, k=2)
good_matches = [m for m, n in raw_matches if m.distance < ratio_test * n.distance]
if len(good_matches) >= min_matches:
src_pts = np.float32([kp_template[m.queryIdx].pt for m in good_matches]).reshape(-1, 1, 2)
dst_pts = np.float32([kp_image[m.trainIdx].pt for m in good_matches]).reshape(-1, 1, 2)
if USE_ORB:
x_min, y_min = np.min(dst_pts, axis=0)[0]
x_max, y_max = np.max(dst_pts, axis=0)[0]
bounding_box = (int(x_min), int(y_min), int(x_max - x_min), int(y_max - y_min))
if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area:
cv2.rectangle(image, (bounding_box[0], bounding_box[1]),
(bounding_box[0] + bounding_box[2], bounding_box[1] + bounding_box[3]),
(0, 255, 0), 2)
cv2.putText(image, name, (bounding_box[0], bounding_box[1] - 10),
cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0), 2)
detected_markers.append(name)
else:
logger.warning(f"{name} detected but doesn't meet square size constraints.")
else:
M, mask = cv2.findHomography(src_pts, dst_pts, cv2.RANSAC, 5.0)
if M is not None:
h, w = template.shape
pts = np.float32([[0, 0], [0, h - 1], [w - 1, h - 1], [w - 1, 0]]).reshape(-1, 1, 2)
dst = cv2.perspectiveTransform(pts, M)
bounding_box = cv2.boundingRect(dst)
if is_valid_aspect_ratio(bounding_box) and bounding_box[2] * bounding_box[3] >= min_area:
hull = cv2.convexHull(dst)
if len(hull) == 4:
detected_markers.append(name)
image = cv2.polylines(image, [np.int32(hull)], True, (0, 255, 0), 3)
x, y = dst[0][0]
cv2.putText(image, name, (int(x), int(y) - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.6, (0, 255, 0),
2)
else:
logger.warning(f"{name} detected but not forming a perfect quadrilateral.")
debug_image_path = "./scans/detected_markers.png"
keypoints_image_path = "./scans/keypoints.png"
cv2.imwrite(debug_image_path, image)
cv2.imwrite(keypoints_image_path, keypoints_image)
logger.info(f"📸 Debug image saved to {debug_image_path}")
logger.info(f"📸 Keypoints image saved to {keypoints_image_path}")
return detected_markers, debug_image_path, keypoints_image_path
if __name__ == '__main__':
image_path = "scans/snap_2025-02-23_17-10-06-836370.png"
logger.info(f"🔍 Detecting markers in image: {image_path}")
markers, debug_img, kp_img = detect_markers(image_path, templates)
logger.info(f"🔍 Detected markers: {markers}")
if markers:
logger.info(f"📍 Total markers detected: {len(markers)}")
else:
logger.warning("🛑 No markers detected.")

BIN
markers/HDPE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
markers/LDPE.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
markers/O.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 11 KiB

BIN
markers/PET.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
markers/PP.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 12 KiB

BIN
markers/PS.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
markers/PVC.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
markers/bond strength.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

BIN
markers/cnc.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 15 KiB

BIN
markers/color.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 30 KiB

BIN
markers/composite.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
markers/density.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 37 KiB

BIN
markers/flexibility.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 35 KiB

BIN
markers/haptics.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 10 KiB

BIN
markers/hot air.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
markers/joinery.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
markers/observation.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
markers/opacity.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

BIN
markers/press.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 16 KiB

BIN
markers/process.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 14 KiB

BIN
markers/shape.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

BIN
markers/shredder.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

BIN
markers/smell.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

BIN
markers/structural.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

BIN
markers/surface.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

BIN
markers/thermals.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 26 KiB

BIN
markers/tool.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 13 KiB

2
poetry.toml Normal file
View file

@ -0,0 +1,2 @@
[virtualenvs]
in-project = true

14
publish.sh Executable file
View file

@ -0,0 +1,14 @@
#!/bin/bash
echo "Trying to publish all changes to archive.petau.net"
echo "Getting HTML from Logseq"
cd ~/thesis/logseq || exit
logseq-publish-spa ~/thesis/docs/ --theme-mode dark --accent-color orange --static-directory ~/logseq/static
echo "Publishing to archive.petau.net"
cd ~/thesis || exit
git add .
git commit -m "CI: Publish to archive.petau.net"
git push
echo "Publish Complete"
exit 0

22
pyproject.toml Normal file
View file

@ -0,0 +1,22 @@
[project]
name = "code"
version = "0.1.0"
description = ""
authors = [
{name = "arontaupe",email = "aron@petau.net"}
]
readme = "README.md"
requires-python = ">=3.12"
dependencies = [
"pytesseract (>=0.3.13,<0.4.0)",
"pillow (>=11.1.0,<11.2.0)",
"easyocr (>=1.7.2,<1.8.0)",
"textstat (>=0.7.4,<0.8.0)",
"opencv-python (>=4.11.0.86,<4.12.0.0)",
"requests (>=2.32.3,<2.33.0)"
]
[build-system]
requires = ["poetry-core>=2.0.0,<3.0.0"]
build-backend = "poetry.core.masonry.api"

30
render_mermaid.sh Executable file
View file

@ -0,0 +1,30 @@
#!/bin/bash
# Ensure Mermaid CLI is installed
if ! command -v mmdc &> /dev/null
then
echo "Mermaid CLI (mmdc) not found. Install it with: npm install -g @mermaid-js/mermaid-cli"
exit 1
fi
# Create output directory if it doesn't exist
mkdir -p typst/assets/images
# Loop through all .mmd files in the mermaid folder
for file in typst/assets/mermaid/*.mmd; do
# Extract filename without extension
filename=$(basename -- "$file" .mmd)
# Convert to PNG
echo "Rendering $file -> typst/assets/images/$filename.png"
mmdc -i "$file" -o "typst/assets/images/$filename.png" -t neutral -b transparent -s 3
# Check if conversion was successful
if [ $? -eq 0 ]; then
echo "Successfully rendered: $filename.png"
else
echo "Failed to render: $filename.png"
fi
done
echo "All Mermaid diagrams processed!"

15
requirements.txt Normal file
View file

@ -0,0 +1,15 @@
requests~=2.32.3
opencv-python~=4.11.0.86
pytesseract~=0.3.13
numpy~=2.2.2
pillow~=11.1.0
easyocr~=1.7.2
ollama~=0.4.7
textstat~=0.7.4
ollama-ocr
mpmath~=1.3.0
CairoSVG~=2.7.1
python-dotenv~=1.0.1
ocrmac~=1.0.0
pyobjc-framework-Vision~=11.0
pyobjc-framework-Cocoa~=11.0

39
scan.sh Executable file
View file

@ -0,0 +1,39 @@
#!/bin/bash
# Check if the script is already running
if pgrep -f "archive_pipeline.py" > /dev/null; then
echo "Scan already in progress. Ignoring new request."
exit 1
fi
# Check if Logseq is running
if ! pgrep -x "Logseq" > /dev/null; then
echo "⚠️ Logseq is not running. Starting Logseq..."
open -a "Logseq"
else
echo "✅ Logseq is running, continuing..."
fi
echo "Scanning to Archive"
cd ~/thesis/code || exit
# Activate virtual environment
source ~/thesis/code/.venv/bin/activate
# Default camera index is 0
CAMERA_INDEX=0
# Check if a camera index was provided as an argument
if [ ! -z "$1" ]; then
CAMERA_INDEX=$1
fi
echo "Starting scan with camera index $CAMERA_INDEX..."
~/thesis/code/.venv/bin/python ~/thesis/code/archive_pipeline.py --camera "$CAMERA_INDEX"
echo "Scan Complete"
echo "Type 'scan' and hit enter to trigger an upload. You can also type 'scan --help' for more options."
echo "Start the *Scan to Archive* App if unsure."
exit 0

48
success_message.py Normal file
View file

@ -0,0 +1,48 @@
import cv2
import webbrowser
import time
def show_image_with_message(image_path, message, timeout=5, url=None):
"""Display image with a centered success message using OpenCV."""
# Read the image using OpenCV
img = cv2.imread(image_path)
if img is None:
print("Error: Unable to load image.")
return
# Get image dimensions (height, width)
height, width, _ = img.shape
# Add a text overlay with the success message
font = cv2.FONT_HERSHEY_SIMPLEX
font_scale = 1.5
font_thickness = 3 # Thicker text for a "bold" effect
text_size = cv2.getTextSize(message, font, font_scale, font_thickness)[0]
# Calculate the position to center the text
text_x = (width - text_size[0]) // 2
text_y = (height + text_size[1]) // 2
# Add the text to the image with black color for the text (for a "bold" effect)
cv2.putText(img, message, (text_x, text_y), font, font_scale, (0, 0, 0), font_thickness, lineType=cv2.LINE_AA)
# Display the image with the message
cv2.imshow("Success", img)
# Wait for the specified timeout (in milliseconds)
cv2.waitKey(timeout * 1000) # Convert seconds to milliseconds
cv2.destroyAllWindows()
# If a URL is provided, open it in the default browser
if url:
webbrowser.open(url)
# Example usage
if __name__ == "__main__":
image_path = "scans/detected_markers.png" # Replace with your actual image path
success_message = "✅ Scan Completed Successfully!"
url_to_open = "https://pinry.petau.net" # URL to open after timeout
# Show the image with the success message and open the URL after 5 seconds
show_image_with_message(image_path, success_message, timeout=5, url=url_to_open)

15
vision_test_ocrmac.py Normal file
View file

@ -0,0 +1,15 @@
from ocrmac import ocrmac
# OCR test
def recognize_text_macos(image_path):
annotations = ocrmac.OCR(image_path,
language_preference=['en-US'],
framework="livetext"
).recognize()
print(annotations)
return annotations
if __name__ == '__main__':
recognize_text_macos('scans/snap_2025-02-22_18-12-04-435736.png')