From cc0e88c3f455df1c4816c5f9d884fd8949cace8d Mon Sep 17 00:00:00 2001 From: adam Date: Wed, 8 Nov 2023 11:01:17 -0500 Subject: [PATCH] generally works --- bash/cartographize.sh | 71 +++++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 19 deletions(-) diff --git a/bash/cartographize.sh b/bash/cartographize.sh index 72b14b1..42acaf3 100755 --- a/bash/cartographize.sh +++ b/bash/cartographize.sh @@ -1,8 +1,8 @@ #!/bin/bash -# -# phase 1: take screenshots -# +echo "#" +echo "# phase 1: take screenshots" +echo "#" if [ ! -d "screenshots" ]; then mkdir screenshots @@ -13,12 +13,12 @@ do filename=$(basename -- "$file") #extension="${filename##*.}" filename="${filename%.*}" - ffmpeg -ss 0 -i $file -vf fps=0.5 ./screenshots/$filename%d.png + ffmpeg -ss 0 -i $file -vf fps=0.5 ./screenshots/$filename-%d.png done -# -# phase 2: img preprocess -# +echo "#" +echo "# phase 2: img preprocess" +echo "#" if [ ! -d "preproccessed" ]; then mkdir preproccessed @@ -28,12 +28,12 @@ do filename=$(basename -- "$file") extension="${filename##*.}" filename="${filename%.*}" - convert "$file" -gravity SouthWest -crop 560x60+0+0 -fuzz 12% +transparent "#edf1c9" "preproccessed/$filename.png" + convert "$file" -gravity SouthWest -crop 560x48+0+8 -fuzz 12% +transparent "#edf1c9" "preproccessed/$filename.png" done -# -# phase 3: ocr -# +echo "#" +echo "# phase 3: ocr" +echo "#" if [ ! -d "ocr" ]; then mkdir ocr @@ -43,14 +43,14 @@ do filename=$(basename -- "$file") extension="${filename##*.}" filename="${filename%.*}" - tesseract $file "ocr/$filename" + tesseract $file "ocr/$filename" -c tessedit_char_whitelist=" 0123456789KM/HNW." + sed -i 's/ //g' "ocr/$filename.txt" + done -# -# phase 4: prep for geotagging -# - - +echo "#" +echo "# phase 4: prep for geotagging" +echo "#" if [ ! -d "tagged" ]; then mkdir tagged fi @@ -60,5 +60,38 @@ do extension="${filename##*.}" filename="${filename%.*}" convert $file "tagged/$filename.jpg" - exiftool -overwrite_original -TagsFromFile ./knowngood.jpg -All:All "tagged/$filename.jpg" -done \ No newline at end of file +done +exiftool -overwrite_original -TagsFromFile ./knowngood.jpg -All:All "tagged/*.jpg" + +echo "#" +echo "# phase 5: if ocr was good, geotag. Otherwise, reject." +echo "#" +goodreads=0 +badreads=0 +if [ ! -d "badread" ]; then + mkdir badread +fi +find "ocr" -iname "*.txt" -print0 | while read -d $'\0' file +do + filename=$(basename -- "$file") + extension="${filename##*.}" + filename="${filename%.*}" + + n=$(grep -Poh "(?<=N)[0-9]{2}\\.[0-9]+" "$file") + w=$(grep -Poh "(?<=W)[0-9]{2}\\.[0-9]+" "$file") + + if [ -n "$n" ] && [ -n "$w" ]; then + goodreads=$(($goodreads+1)) + echo "$filename was OCR'd well; $goodreads good so far" + exiftool -overwrite_original -gpslatitude="$n" -gpslongitude="-$w" -GPSLatitudeRef="North" -GPSLongitudeRef="West" "tagged/$filename.jpg" + else + badreads=$(($badreads+1)) + echo "$filename is not perfect; $badreads bad so far" + mv "tagged/$filename.jpg" "badread/$filename.jpg" + mv "ocr/$filename.txt" "badread/$filename.txt" + mv "preproccessed/$filename.png" "badread/$filename.png" + fi +done + +echo "$goodreads good, $badreads bad. accuracy:" +bc <<< "scale=4; goodreads=$goodreads; badreads=$badreads; ratio=100*goodreads/(goodreads+badreads);ratio"