website/removeBadSymbols.sh

403 lines
No EOL
12 KiB
Bash
Executable file
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

#!/bin/bash
# this is a shebang. It guarantees that this script is executed with bash, not the currently open shell
# problem with shanging to sh: I can't get test to work with * wildcards :(
#todo: also handle hidden files -- actually doable but not a good idea
#todo: bei jeder Aenderung auch usage (doc) anpassen
#todo: translate entire script into english
# options
# -h --help display help
# -a --noask --no-interactive do not prompt the user to confirm renaming
# -l --onlylog just logs what would have been done but not doing anything
# the following two MUST be present # TODO: not making them mandatory
# -o [file] --outputfile [file] specifies the output (log) file (standard is rn.log)
# -v [file] --voutputfile [file] specifies the verbose output log file (standard rnv.log)
# -rm removes files of the form AlbumArt_*, Folder.jpg, Thumbs.db, desktop.ini
# -c converts file names to lower case
#Constants
# weil im folgenden ein Apostroph vorkommt, kann man diesen String nicht einfach einfuegen
STRICHSTR=s/\'//g
# auch hier muss " escaped werden
ANFUERSTR=s/\"//g
# und hier ist es `
ANDSTRSTR=s/\`//g
# und hier ist es &
AMBERSSTR=s/\&/_und_/g
# und hier ist es *
STERNXSTR=s/\*/x/g
#Functions
vLog()
{
echo "$1" >> "$VLOGFILE"
}
log()
{
echo "$1" >> "$NVLOGFILE"
echo "$1" >> "$VLOGFILE"
}
rename()
# moves $1 to $2 if rn is 1,
# not anymore: otherwise set variable new to $1
{
log "rn \"$1\" to \"$2\""
if test $rn -eq 1
then
mv -- "$1" "$2"
# else # not necessary since $new is not used anymore
# new="$1"
fi
}
remove()
# deletes $1 if rn is 1, otherwise only logging
{
if test -f "$1"
then
#echo "$1"
log "rm \"$1\""
if test $rn -eq 1
then
trash "$1"
fi
fi
}
usage()
# displays help message and exit
{
echo ""
echo "help page for $0"
echo "---------------------------------"
echo "this script should not be called directly but by the Wrapper script WrapperRemoveBadSymbols.sh"
echo "Usage: bash $0 [OPTIONS] DIR"
echo " $0 removes a bunch of symbols that should not be in file names in all subdirs and files of DIR:"
echo " (with lyrics) -> remove"
echo " lyrics -> remove"
echo " blank -> _"
echo " ' - ' -> '-'"
echo " ' (Apostroph) -> remove"
echo " '\"'' (Anfuerungsstriche) -> remove"
echo " ´ -> remove"
echo " \& -> _und_"
echo " * -> x"
echo " # -> _"
echo " | -> l (small L since it looks similar)"
echo " ~ -> -"
echo " : -> _"
echo " , -> _"
echo " (){}[] -> remove"
echo " € -> EUR"
echo " $ -> USD"
echo " ä, ö, ü, Ä, Ü, Ö, ß -> ae, ..."
echo " remove various accents (not all, you can add missing ones)"
echo " consecutive _ and - and _ around - -> remove"
echo ""
echo "options"
echo " -h, --help display this help"
echo " -a, --noask --no-interactive do not prompt the user to confirm renaming (standard is asking for each file)"
echo " -l, --onlylog just logs what would have been done but not doing anything"
echo " -o [file], --outputfile [file] specifies the output (log) file (standard is rn.log)"
echo " -v [file], --voutputfile [file] specifies the verbose output log file (standard rnv.log)"
echo " -rm removes files of the form AlbumArt* and Folder.jpg and Thumbs.db"
echo " -c change everything to lowercase (doesn't work for foreign letters if diacritics remain"
echo " -- finishes the list of options, next argument is taken as the dir"
#echo " -i, --image adds date taken in beginning of file name if jpg, .png, (not yet: .mp4, .mov, ...)"
echo ""
echo "error handling"
echo " if DIR is no valid directory, the scripts eith error code 1"
exit
}
folderloc()
# returns the argument as absolute path
# arg1: if arg1 starts with /, return arg1, otherwise return $(pwd)/arg1
{
case "$1" in
/* ) echo "$1"
break
;;
* ) echo "$(pwd)/$1"
break
;;
esac
}
# considering options
# saving all options to give them recursively
ops=""
# standard behavior: ask the user is file is to be renamed
# 0 meens not asking
ask=1
# standard behavior: actually rename
# 0 meens not renaming but just log what would have be done
rn=1
# remove album pictures
# standard behavior: not removing
rmAlbumPics=0
# if file names should be converted to lower case
# standard behavior: no
lowercase=0
# the log file for verbose logging
#VLOGFILE="$1"
VLOGFILE="$(pwd)"/../rnv.log
# the log file for short logging
#NVLOGFILE="$2"
NVLOGFILE="$(pwd)"/../rn.log
#directory to be checked
dir="$(pwd)" # standard if not specified
while [ "$1" != "" ]; do
case $1 in
-h | --help ) usage;;
-o | --outputfile ) # ops="$ops -o ../$2"
shift
NVLOGFILE="$(folderloc "$1")"
;;
-v | --voutputfile ) #ops="$ops -v ../$2"
shift
VLOGFILE="$(folderloc "$1")"
;;
-a | --noask | --no-interactive ) ops="$ops -a"
ask=0
;;
-l | --onlylog ) ops="$ops -l"
rn=0
;;
-rm ) ops="$ops -rm"
rmAlbumPics=1
;;
-c ) lowercase=1
;;
-- ) shift
dir="$(folderloc "$1")"
break
;;
* ) dir="$(folderloc "$1")"
break
;;
esac
shift # must be done in any case: the next argument is $1
done
# echo "options"
# echo "asking=$ask"
# echo "rename=$rn"
# echo "image=$image"
# echo "log file=$NVLOGFILE"
# echo "verbose log file=$VLOGFILE"
# echo "direc=$dir"
#for testing purposes:
# exit
improveNames()
# this function does the work for one folder and goes into subfolders recursively
# assumes it is already in the right place
{
vLog "start with folder: $(pwd)"
printf '.'
# delete unnecessary album pictures and other useless files
if test $rmAlbumPics -eq 1
then
for alb in AlbumArt*.jpg
do
remove "$alb"
done
remove Folder.jpg
remove Thumbs.db
remove desktop.ini
fi
for fil in ./* # ../{.[^.],..?,}*
# that is: .NOTDOTanything &
# ..anything but something &
# something, not starting with dot
# also working on hidden files is not good, e.g. there are .git and many config files
do
# * als einziger Dateiname kommt auch bei leerem Ordner vor
if test -e "$fil" # file to be moved exists
then
:
# do nothing
else
continue # continue with next file
fi
# with lyrics entfernen
new="$(echo "$fil" | sed 's/(with lyrics)//g')"
# lyrics entfernen
new="$(echo "$new" | sed 's/lyrics//g')"
# Leerzeichen entfernen:
new="$(echo "$new" | sed 's/ /_/g')"
# Apostroph entfernen:
new="$(echo "$new" | sed $STRICHSTR)"
# anderes Apostroph entfernen:
new="$(echo "$new" | sed $ANDSTRSTR)"
# Anfuerungstriche entfernen:
new="$(echo "$new" | sed $ANFUERSTR)"
# wieder anderes Apostroph entfernen:
new="$(echo "$new" | sed 's/´//g')"
# Ambersands (&) entfernen:
new="$(echo "$new" | sed $AMBERSSTR)"
# Sternchen (*) entfernen:
new="$(echo "$new" | sed $STERNXSTR)"
# Rauten als Teil von Csharp:
new="$(echo "$new" | sed 's/C#/C_sharp/g')"
new="$(echo "$new" | sed 's/c#/c_sharp/g')"
# Rauten entfernen:
new="$(echo "$new" | sed 's/#/_/g')"
# vertical bars |
new="$(echo "$new" | sed 's/|/l/g')"
# comma, entfernen
new="$(echo "$new" | sed 's/,/_/g')"
# parenthesis
new="$(echo "$new" | sed 's/{//g')"
new="$(echo "$new" | sed 's/}//g')"
new="$(echo "$new" | sed 's/(//g')"
new="$(echo "$new" | sed 's/)//g')"
# [] need escaping since they are part of regexps
new="$(echo "$new" | sed 's/\[//g')"
new="$(echo "$new" | sed 's/\]//g')"
# Tilde ~
new="$(echo "$new" | sed 's/~/-/g')"
# :
new="$(echo "$new" | sed 's/:/_/g')"
# diacritics
# y as sed command says replace one character with the corresponding
# here you might want to add letters when the need arises
while true
do
new2=$new
new="$(echo "$new" | sed 'y/ãāáǎàēéěèȩêīíǐìĩïōóǒòũūúǔùǖǘǚǜşļĻķĶḩģĢḨņŅŗŖĀÁǍÀĒÉĚÈĪÍǏÌŌÓǑÒŪÚǓÙǕǗǙǛ/aaaaaeeeeeeiiiiiioooouuuuuüüüüslLkKhgGHnNrRAAAAEEEEIIIIOOOOUUUUÜÜÜÜ/')"
if test "$new2" = "$new"
then
break
fi
done
# äa
new="$(echo "$new" | sed 's/ä/ae/g')"
# Äa
new="$(echo "$new" | sed 's/Ä/Ae/g')"
# öa
new="$(echo "$new" | sed 's/ö/oe/g')"
# Öa
new="$(echo "$new" | sed 's/Ö/Oe/g')"
# üa
new="$(echo "$new" | sed 's/ü/ue/g')"
# Üa
new="$(echo "$new" | sed 's/Ü/Ue/g')"
# ßs
new="$(echo "$new" | sed 's/ß/ss/g')"
# $
new="$(echo "$new" | sed 's/\$/USD/g')"
# €
new="$(echo "$new" | sed 's/€/EUR/g')"
# different jpeg extensions:
new="$(echo "$new" | sed 's/\.JPG/.jpg/g')"
new="$(echo "$new" | sed 's/\.JPEG/.jpg/g')"
new="$(echo "$new" | sed 's/\.jpeg/.jpg/g')"
if test $lowercase -eq 1 # if should be converted to upper case
then
new="$(echo "$new" | tr '[:upper:]' '[:lower:]')"
fi
#log "before complicated: $fil -> $new"
for i in "_" "\." "-"
do
# remove _ around i
isymbostr="s/_$i/$i/g"
new="$(echo "$new" | sed $isymbostr)"
#log "step 1 $i: $new"
# _ um i entfernen:
isymbostr="s/$i""_/$i/g"
#echo $isymbostr
new="$(echo "$new" | sed $isymbostr)"
#log "step 2 $i: $new; $isymbostr"
# Remove several consecutive
while true
do
isymbostr="s/$i$i/$i/g"
new2="$(echo "$new" | sed $isymbostr)"
if [ $new2 = $new ]
then break
else
new=$new2
fi
done
#log "step 3 $i: $new"
done
if test "$new" = "." -o "$new" = ".."
then
log "dont rename bc would result in $new : $(pwd)/$fil"
elif test "$fil" = "$new"
then
vLog "dont rename bc no change: $fil"
elif [[ "$fil" == ./__*__.py || "$fil" == ./__*__.html || "$fil" == ./__*__.txt ]]
then
vLog "dont rename bc probably special python file: $fil"
elif [[ "$fil" == *.class ]]
then
# sollten unproblematisch sein, beinhalten aber $s
vLog "dont rename bc probably special java class file: $fil"
# existiert eine Datei mit dem neuen Namen bereits?
elif test -e "$new" # bisher: -f (file ex. and is regular file but folders are problematic as well?!)
then
log "a file with name \"$new\" already exists, -> dont rename: $(pwd)/$fil"
elif test $ask -eq 0
then
rename "$fil" "$new"
else
# Abfrage ob man wirklich umbenennen will
echo "rename $fil to $new? enter, y, yes = yes"
read q
if [ "$q" = "" -o "$q" = "y" -o "$q" = "yes" ]
then
rename "$fil" "$new"
fi
fi
done
# renaming done, now iterate over new names
for dir in ./*; do #./{.[^.],..?,}*; do
if test -d "$dir"
then
cd "$dir"
improveNames # arbeite im Unterordner
cd ..
vLog "went back to folder $(pwd)"
fi
done
}
#actual script
# check if the argument is correct:
if [ ! -d "$dir" ]
then
log "exiting bc there is no such dir"
#echo "exiting bc there is no such dir"
exit 1
fi
# vLog "move to $dir"
cd "$dir"
# reset log files
echo "" > "$NVLOGFILE"
echo "" > "$VLOGFILE"
improveNames
# merke: in Bedingungsklammern [] von if muss am Anfang und am Ende ein " " (blank) stehen!!
# merke: Hinweise zu Bedingungen in if abrufbar unter "man test"
# merke: bei Zuweisungen mit = darf zwischen Variablenname und = kein Leerzweichen sein!
# (sonst wird Variablenname als command interpretiert)