A requirement for my business was to capture information for every song I had. The information required was the Composer (the person/s that wrote the song), the Year and the Record Label. I have over 13,000 songs and doing this on a manual basis would be an administration nightmare.
So I used my PowerShell skills and set out to achieve what I found was a very reliable manual process, but turned into an automatic process.
The most reliable process that I can find to date is:
- To grab the Record Label information from iTunes and a site http://staff.australian-charts.com/ to be used as a backup in the event iTunes didn’t know about the particular song or have the Label information.
- To grab the Composer and Year information of the song from the same site http://staff.australian-charts.com/ which had about 95% of my music collection anyway.
A couple of points I will share, this is not 100% perfect, it’s about 95% reliable. Also too, any songs where it can’t find the information for a particular category e.g. Composer, it won’t write that information. In other words, this script will only write the particular information if it can find the particular information. It’s completely non destructive.
The library you will need and what I used to add the all the MP3 tagging information was this library here taglib-sharp however as this library (.dll) allowed me to edit every other ID3 information related to the MP3 except for the Publisher information. I had to modify this library from GitHub, then recompile this in Visual Studio to allow for the addition of editing the publisher information for each song as per this article.
The script is below, edit the top line ($mp3s) to tell it where your music lives, then edit the next line ($taglib) to tell it where to find “taglib-sharp.dll“, then kick it off, you’ll see the output on the screen as it works its way through each song. You might see some Red errors at some points, not to worry, this is where it can’t find particular information for these songs.
The other thing is that this is assumed that your music library is set in a format as Artist – Title.mp3
By the way, the very small percentage of songs it can’t find the information for, you can manually fix these using a site like Wikipedia.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#Specify the directory here of the music in which you want to update on a song by song basis | |
$mp3s = Get-ChildItem –Path "C:\Music" –Recurse | ? {$_.Extension -eq '.mp3' -and $_.Directory -match 'AnyDirectoryName'} | |
$taglib = "..\taglib-sharp.dll" | |
[system.reflection.assembly]::loadfile($taglib) | |
Function CheckSong ($song, $against){ | |
$Results = @() | |
$1stResults = @() | |
$2ndResults = @() | |
$against | % { | |
$h = new-object psObject | select DistanceNumber,Song | |
$h.DistanceNumber = (Get-LongestCommonSubstring $song ($_ -replace '\(.*\)','')).length | |
$h.Song = $_ | |
$1stResults += $h | |
} | |
($1stResults | sort DistanceNumber | select –Last 50).song | % { | |
$x = new-object psObject | select DistanceNumber,Song | |
$x.DistanceNumber = Get-FuzzyMatchScore $song ($_ -replace '\(.*\)','') | |
$x.Song = $_ | |
$2ndResults += $x | |
} | |
if((($2ndResults | sort –Descending DistanceNumber)[0]).DistanceNumber -lt '1600'){ | |
$null = (($song -split ' – ')[0]) -match '[0-9A-Za-z\x2d]+';$artist = $matches[0] | |
$title = (($song -split ' – ')[1]) | |
foreach($2ndresult in $2ndResults){ | |
if(($2ndresult.Song -split ' – ')[0] -match $artist -and ($2ndresult.Song -split ' – ')[1] -match $title) | |
{$Results = $2ndresult} | |
} | |
} | |
if((!($Results))) {$Results = $2ndResults} | |
$Results | |
} | |
Function Get-DamerauLevenshteinDistance { | |
param ($String1,$String2) | |
$Length1 = $String1.length | |
$Length2 = $String2.length | |
$Global:Hash_Matrix = @{} # Create Matrix | |
for ($i=0;$i -lt ($Length1 + 1);$i++) {$Hash_Matrix[$i] = [array]::CreateInstance([int],($Length2 + 1))} | |
for ($i=0;$i -lt $Length1; $i++) {$Hash_Matrix[$i][0] = $i } | |
for ($j=0;$j -lt $Length2; $j++) {$Hash_Matrix[0][$j] = $j} | |
for ($i=1;$i -lt $Length1;$i++) | |
{for ($j=1;$j -lt $Length2;$j++){ | |
if ($String1[$i -1] -eq $String2[$j -1]) | |
{$Cost = 0} else {$Cost = 1} | |
$Hash_Matrix[$i][$j] = (@(($Hash_Matrix[$i–1][$j]) + 1;($Hash_Matrix[$i][$j–1]) + 1;($Hash_Matrix[$i–1][$j–1]) + $Cost) | sort-object)[0] | |
if (($i -gt 1) -AND ($j -gt 1) -AND ($String1[$i -1] -eq $String2[$j -2]) -AND ($String1[$i -2] -eq $String2[$j -1])){$Hash_Matrix[$i][$j] = (@(($Hash_Matrix[$i][$j]) | |
($Hash_Matrix[$i–2][$j–2] + $Cost))|sort-object)[0] | |
} | |
} | |
} | |
$Hash_Matrix[$Length1 -1][$Length2 -1] | |
} | |
function Get-FuzzyMatchScore { | |
[CmdletBinding()] | |
param ( | |
[Parameter(Position = 0)] | |
[string] $Search, | |
[Parameter(Position = 1)] | |
[string] $String | |
) | |
$score = 100 | |
# Use approximate string matching to get some values needed to calculate the score of the result | |
$longestCommonSubstring = Get-LongestCommonSubstring –String1 $String –String2 $Search | |
$levenshteinDistance = Get-LevenshteinDistance –String1 $String –String2 $Search | |
$commonPrefix = Get-CommonPrefix –String1 $String –String2 $Search | |
# By running the result through this regex pattern we get the length of the match as well as the | |
# the index of where the match starts. The shorter the match length and the index, the more | |
# score will be added for the match. | |
$regexMatchFilter = $Search.ToCharArray() -join '.*?' | |
$match = Select-String –InputObject $String –Pattern $regexMatchFilter –AllMatches | |
$matchLength = ($match.Matches | Sort-Object Length | Select-Object –First 1).Value.Length | |
$matchIndex = ($match.Matches | Sort-Object Length | Select-Object –First 1).Index | |
# Calculate score | |
$score = $score – $levenshteinDistance | |
$score = $score * $longestCommonSubstring.Length | |
$score = $score – $matchLength | |
$score = $score – $matchIndex | |
if ($commonPrefix) { | |
$score = $score + $commonPrefix.Length | |
} | |
Write-Output $score | |
} | |
function Get-HammingDistance { | |
<# | |
.SYNOPSIS | |
Get the Hamming Distance between two strings or two positive integers. | |
.DESCRIPTION | |
The Hamming distance between two strings of equal length is the number of positions at which the | |
corresponding symbols are different. In another way, it measures the minimum number of substitutions | |
required to change one string into the other, or the minimum number of errors that could have | |
transformed one string into the other. Note! Even though the original Hamming algorithm only works for | |
strings of equal length, this function supports strings of unequal length as well. | |
The function also calculates the Hamming distance between two positive integers (considered as binary | |
values); that is, it calculates the number of bit substitutions required to change one integer into | |
the other. | |
.EXAMPLE | |
Get-HammingDistance 'karolin' 'kathrin' | |
Calculate the Hamming distance between the two strings. The result is 3. | |
.EXAMPLE | |
Get-HammingDistance 'karolin' 'kathrin' -NormalizedOutput | |
Calculate the normalized Hamming distance between the two strings. The result is 0.571428571428571. | |
.EXAMPLE | |
Get-HammingDistance -Int1 61 -Int2 15 | |
Calculate the hamming distance between 61 and 15. The result is 3. | |
.LINK | |
http://en.wikipedia.org/wiki/Hamming_distance | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding(DefaultParameterSetName = 'String')] | |
param ( | |
[Parameter(Position = 0, Mandatory = $true, ParameterSetName = 'String')] | |
[ValidateNotNullOrEmpty()] | |
[string] $String1, | |
[Parameter(Position = 1, Mandatory = $true, ParameterSetName = 'String')] | |
[ValidateNotNullOrEmpty()] | |
[string] $String2, | |
[Parameter(Position = 0, Mandatory = $true, ParameterSetName = 'Integer')] | |
[ValidateNotNullOrEmpty()] | |
[uint32] $Int1, | |
[Parameter(Position = 1, Mandatory = $true, ParameterSetName = 'Integer')] | |
[ValidateNotNullOrEmpty()] | |
[uint32] $Int2, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter(ParameterSetName = 'String')] | |
[switch] $CaseSensitive, | |
# Normalize the output value. When the output is not normalized the maximum value is the length of the longest string, and the minimum value is 0, | |
# meaning that a value of 0 is a 100% match. When the output is normalized you get a value between 0 and 1, where 1 indicates a 100% match. | |
[Parameter(ParameterSetName = 'String')] | |
[switch] $NormalizeOutput | |
) | |
try { | |
if ($PSCmdlet.ParameterSetName -eq 'String') { | |
# handle case insensitivity | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
# set initial distance | |
$distance = 0 | |
# get max and min length of the input strings | |
$maxLength = [Math]::Max($String1.Length,$String2.Length) | |
$minLength = [Math]::Min($String1.Length,$String2.Length) | |
# calculate distance for the length of the shortest string | |
for ($i = 0; $i -lt $minLength; $i++) { | |
if (-not($String1[$i] -ceq $String2[$i])) { | |
$distance++ | |
} | |
} | |
# add the remaining length to the distance | |
$distance = $distance + ($maxLength – $minLength) | |
if ($NormalizeOutput) { | |
Write-Output (1 – ($distance / $maxLength)) | |
} | |
else { | |
Write-Output $distance | |
} | |
} | |
else { | |
$distance = 0 | |
$value = $Int1 -bxor $Int2 | |
while ($value -ne 0) { | |
$distance++ | |
$value = $value -band ($value – 1) | |
} | |
Write-Output $distance | |
} | |
} | |
catch { | |
Write-Warning $_.Exception.Message | |
} | |
} | |
function Get-LevenshteinDistance { | |
<# | |
.SYNOPSIS | |
Get the Levenshtein distance between two strings. | |
.DESCRIPTION | |
The Levenshtein Distance is a way of quantifying how dissimilar two strings (e.g., words) are to one another by counting the minimum number of operations required to transform one string into the other. | |
.EXAMPLE | |
Get-LevenshteinDistance 'kitten' 'sitting' | |
.LINK | |
http://en.wikibooks.org/wiki/Algorithm_Implementation/Strings/Levenshtein_distance#C.23 | |
http://en.wikipedia.org/wiki/Edit_distance | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 07.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding()] | |
param( | |
[Parameter(Position = 0)] | |
[string]$String1, | |
[Parameter(Position = 1)] | |
[string]$String2, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter()] | |
[switch] $CaseSensitive, | |
# A normalized output will fall in the range 0 (perfect match) to 1 (no match). | |
[Parameter()] | |
[switch] $NormalizeOutput | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$d = New-Object 'Int[,]' ($String1.Length + 1), ($String2.Length + 1) | |
try { | |
for ($i = 0; $i -le $d.GetUpperBound(0); $i++) { | |
$d[$i,0] = $i | |
} | |
for ($i = 0; $i -le $d.GetUpperBound(1); $i++) { | |
$d[0,$i] = $i | |
} | |
for ($i = 1; $i -le $d.GetUpperBound(0); $i++) { | |
for ($j = 1; $j -le $d.GetUpperBound(1); $j++) { | |
$cost = [Convert]::ToInt32((-not($String1[$i–1] -ceq $String2[$j–1]))) | |
$min1 = $d[($i–1),$j] + 1 | |
$min2 = $d[$i,($j–1)] + 1 | |
$min3 = $d[($i–1),($j–1)] + $cost | |
$d[$i,$j] = [Math]::Min([Math]::Min($min1,$min2),$min3) | |
} | |
} | |
$distance = ($d[$d.GetUpperBound(0),$d.GetUpperBound(1)]) | |
if ($NormalizeOutput) { | |
Write-Output (1 – ($distance) / ([Math]::Max($String1.Length,$String2.Length))) | |
} | |
else { | |
Write-Output $distance | |
} | |
} | |
catch { | |
Write-Warning $_.Exception.Message | |
} | |
} | |
function Get-LongestCommonSubstring { | |
<# | |
.SYNOPSIS | |
Get the longest common substring of two strings. | |
.DESCRIPTION | |
Get the longest common substring of two strings. | |
.EXAMPLE | |
Get-LongestCommonSubstring 'Karolin' 'kathrin' -CaseSensitive | |
.LINK | |
https://fuzzystring.codeplex.com/ | |
http://en.wikipedia.org/wiki/Longest_common_substring_problem | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.NOTES | |
Adapted to PowerShell from code by Kevin Jones (https://fuzzystring.codeplex.com/) | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version: 1.0 | |
#> | |
[CmdletBinding()] | |
param ( | |
[Parameter(Position = 0)] | |
[string] $String1, | |
[Parameter(Position = 1)] | |
[string] $String2, | |
[Parameter()] | |
[switch] $CaseSensitive | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$array = New-Object 'Object[,]' $String1.Length, $String2.Length | |
$stringBuilder = New-Object System.Text.StringBuilder | |
$maxLength = 0 | |
$lastSubsBegin = 0 | |
for ($i = 0; $i -lt $String1.Length; $i++) { | |
for ($j = 0; $j -lt $String2.Length; $j++) { | |
if ($String1[$i] -cne $String2[$j]) { | |
$array[$i,$j] = 0 | |
} | |
else { | |
if (($i -eq 0) -or ($j -eq 0)) { | |
$array[$i,$j] = 1 | |
} | |
else { | |
$array[$i,$j] = 1 + $array[($i – 1),($j – 1)] | |
} | |
if ($array[$i,$j] -gt $maxLength) { | |
$maxLength = $array[$i,$j] | |
$thisSubsBegin = $i – $array[$i,$j] + 1 | |
if($lastSubsBegin -eq $thisSubsBegin) { | |
[void]$stringBuilder.Append($String1[$i]) | |
} | |
else { | |
$lastSubsBegin = $thisSubsBegin | |
$stringBuilder.Length = 0 | |
[void]$stringBuilder.Append($String1.Substring($lastSubsBegin, (($i + 1) – $lastSubsBegin))) | |
} | |
} | |
} | |
} | |
} | |
Write-Output $stringBuilder.ToString() | |
} | |
function Get-CommonPrefix { | |
<# | |
.SYNOPSIS | |
Find the common prefix of two strings. | |
.DESCRIPTION | |
This function will get the common prefix of two strings; that is, all | |
the letters that they share, starting from the beginning of the strings. | |
.EXAMPLE | |
Get-CommonPrefix 'Card' 'Cartoon' | |
Will get the common prefix of both string. Should output 'car'. | |
.LINK | |
https://communary.wordpress.com/ | |
https://github.com/gravejester/Communary.PASM | |
.INPUTS | |
System.String | |
.OUTPUTS | |
System.String | |
.NOTES | |
Author: Øyvind Kallstad | |
Date: 03.11.2014 | |
Version 1.1 | |
Dependencies: none | |
#> | |
[CmdletBinding()] | |
param( | |
[Parameter(Mandatory = $true, Position = 0)] | |
[ValidateNotNullOrEmpty()] | |
[string]$String1, | |
[Parameter(Mandatory = $true, Position = 1)] | |
[ValidateNotNullOrEmpty()] | |
[string]$String2, | |
# Maximum length of the returned prefix. | |
[Parameter()] | |
[int]$MaxPrefixLength, | |
# Makes matches case-sensitive. By default, matches are not case-sensitive. | |
[Parameter()] | |
[switch] $CaseSensitive | |
) | |
if (-not($CaseSensitive)) { | |
$String1 = $String1.ToLowerInvariant() | |
$String2 = $String2.ToLowerInvariant() | |
} | |
$outputString = New-Object 'System.Text.StringBuilder' | |
$shortestStringLength = [Math]::Min($String1.Length,$String2.Length) | |
# Let the maximum prefix length be the same as the length of the shortest of | |
# the two input strings, unless defined by the MaxPrefixLength parameter. | |
if (($shortestStringLength -lt $MaxPrefixLength) -or ($MaxPrefixLength -eq 0)) { | |
$MaxPrefixLength = $shortestStringLength | |
} | |
# Loop from the start and add any characters found that are equal | |
for ($i = 0; $i -lt $MaxPrefixLength; $i++) { | |
if ($String1[$i] -ceq $String2[$i]) { | |
[void]$outputString.Append($String1[$i]) | |
} | |
else { break } | |
} | |
Write-Output $outputString.ToString() | |
} | |
foreach($mp3 in ($mp3s | ? {$_.FullName -match ' – '} | select –Skip 0)){ | |
$song = $mp3.BaseName | |
Write-Host $song | |
#$song = "Billy Idol – Flesh For Fanasy" | |
#$song = "All Saints – Never ever" | |
#$song = 'Armand Van Helden – My My My' | |
$iTunesSearchSong = $song -replace '\(' -replace '\)' -replace ' – ', ' ' -replace '\s','+' | |
$SearchSong = $song[0..49] -join "" -replace '\(' -replace '\)' -replace ' – ', ' ' -replace '\s','+' -replace "'", "%27" | |
$MatchSong = $song -replace ' – ', '&titel=' | |
$Composer = $null;$year = $null;$Label = $null | |
$media = [TagLib.File]::Create($mp3.FullName) | |
#Check iTunes for music Label information | |
$uri = "https://itunes.apple.com/search?term=$iTunesSearchSong&country=au&entity=song" | |
$x = Invoke-WebRequest –Uri $uri | |
$iTunesResults = ($x.Content | ConvertFrom-Json).results | |
if($iTunesResults) | |
{ | |
$y = Invoke-WebRequest –Uri $iTunesResults[0].trackViewUrl | |
$iTunesSongCopyright = ($y.ParsedHtml.getElementsByTagName('li') | ? {$_.ClassName -eq 'copyright'}).innerText -replace '℗ ' | |
$null = $iTunesSongCopyright -match '(?<!\d)[a-z|A-Z].*';$Label = $matches[0] | |
} | |
#The check australian-charts for Composer & Year infomation | |
$domain = 'http://staff.australian-charts.com/' | |
$uri = $domain + "search.asp?cat=s&search=$SearchSong" | |
$x = Invoke-WebRequest –Uri $uri | |
# Check all possible results – generally starts at 462 | |
$PossibleResults = $x.AllElements | select –Skip 400 –First 122 | ? {$_.class -eq 'text' -and $_.tagname -eq 'td' -and $_.outerText -match '[a-z|A-Z]{1,}'} | |
if($PossibleResults) | |
{ | |
# Match the search results | |
$MatchedResults = CheckSong $Matchsong ($PossibleResults.innerHTML | sort –Descending) | |
# Find the best search result | |
$BestResult = ($MatchedResults | sort DistanceNumber –Descending | select –First 1).song | |
# Best Element | |
$BestElement = $x.AllElements | select –Skip 400 –First 122 | ? {$_.class -eq 'text' -and $_.tagname -eq 'td' -and $_.innerHTML -eq $BestResult} | |
# Find the correct URL | |
$null = $BestElement.outerHTML -match 'A.href="(.*)"';$resultURL = $domain + $Matches[1] | |
$resultURL = $resultURL -replace("&","&") -replace('"','"') | |
$y = Invoke-WebRequest –Uri $resultURL | |
$Element = ($y.AllElements | ? {$_.tagName -eq 'HTML'}) | |
if($Element.innerText -match 'Music\/Lyrics:(.*)') | |
{ | |
$startpos = $Element.innertext.IndexOf("Lyrics:") + 7 | |
$error.clear() | |
try | |
{ | |
$endpos = $Element.innertext.IndexOf("Producer:") -3 | |
$null = $Element.innertext.substring($endpos) | |
} | |
catch { "Error occured" } | |
if ($error) | |
{ | |
$endpos = $Element.innertext.IndexOf("World wide:") -3 | |
$null = $Element.innertext.substring($endpos) | |
} | |
$composer = $Element.innertext.substring($startpos,($endpos – $startpos)) | |
#even the below line will give the same result as the above line if uncommented | |
#$composer = $Element.innertext[$startpos..$endpos] -join "" | |
$composer = $composer -replace '\n', ', ' | |
if($composer){$media.tag.Composers = $composer;Write-Host $Composer} | |
} else { | |
$Composer = $null | |
} | |
if($Element.innerText -match 'Year:(.*)') | |
{ | |
$null = $Element.innerText -match 'Year:(.*)' | |
$Year = $Matches[1] | |
if($Year){$media.tag.year = $Year;Write-Host $Year} | |
} else { | |
$Year = $null | |
} | |
if(!($iTunesResults)) | |
{ | |
if($Element.innerText -match 'Label:(.*)') | |
{ | |
$Element.innerText -match 'Label:(.*)' | |
$Label = $Matches[1] | |
} else | |
{ | |
$Label = $null | |
} | |
} | |
} | |
if($Label){$media.tag.Publisher = $Label;Write-Host $Label`n} | |
$media.Save() | |
Start-Sleep –Seconds 1 | |
} |