From 531a4676e93989320235bca3459e83f4f96480f4 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Tue, 27 Jun 2023 22:31:24 +0200 Subject: [PATCH 01/10] First implementation of parralel checksumming --- src/Chksum/chksum.cs | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 8f0f1a6..e6af881 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -1,4 +1,6 @@ using System.Reflection; +using System.Security.Cryptography; +using System.Threading.Tasks; using Microsoft.Data.Sqlite; namespace Chksum.Utils; public class ChksumUtils { @@ -62,13 +64,19 @@ public class ChksumUtils { } } - private string CalculateMD5(string filename) { - using (var md5 = System.Security.Cryptography.MD5.Create()) { - using (var stream = File.OpenRead(filename)) { - var hash = md5.ComputeHash(stream); - return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); + private string[] CalculateMD5(string[] filenames) { + string[] checksums = new string[filenames.Length]; + + Parallel.ForEach(filenames, (filename, state, index) => { + using (var md5 = MD5.Create()) { + using (var stream = File.OpenRead(filename)) { + var hash = md5.ComputeHash(stream); + checksums[index] = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); + } } - } + }); + + return checksums; } public void doTheThing() { @@ -76,14 +84,22 @@ public class ChksumUtils { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { Directory.SetCurrentDirectory(directory); // Set new root if (getFileCount() >= 1) { - DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory()); - FileInfo[] files = dir.GetFiles(); - foreach (FileInfo file in files) { - string fileName = file.Name; - string absolutePathToFile = Path.GetFullPath(fileName); + string[] filenames = Directory.GetFiles(directory); + string[] fileHashes = CalculateMD5(filenames); + //DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory()); + //FileInfo[] files = dir.GetFiles(); + int index = 0; + foreach (string file in filenames) { + index++; + string absolutePathToFile = file; string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); - string fileHash = CalculateMD5(fileName); - + string fileHash = ""; + if (index < fileHashes.Length) { + //Console.WriteLine("Index at: " + index); + fileHash = fileHashes.GetValue(index).ToString(); + } + + string fileName = Path.GetFileName(absolutePathToFile); if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { connection.Open(); From 763cde4e2df68a5d538d95177bea9bd76ebbbb8e Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Tue, 27 Jun 2023 22:58:07 +0200 Subject: [PATCH 02/10] Use dictionary --- src/Chksum/chksum.cs | 32 +++++++++++++++++++++++--------- 1 file changed, 23 insertions(+), 9 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index e6af881..647f146 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -79,27 +79,41 @@ public class ChksumUtils { return checksums; } + private Dictionary CalculateChecksums(string[] filenames) { + Dictionary checksums = new Dictionary(); + + Parallel.ForEach(filenames, (filename, state) => { + using (var md5 = MD5.Create()) { + using (var stream = File.OpenRead(filename)) { + var hash = md5.ComputeHash(stream); + var checksum = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); + + lock (checksums) { + checksums.Add(filename, checksum); + } + } + } + }); + + return checksums; + } + public void doTheThing() { foreach (var directory in Directory.GetDirectories(Directory.GetCurrentDirectory())) using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { Directory.SetCurrentDirectory(directory); // Set new root if (getFileCount() >= 1) { string[] filenames = Directory.GetFiles(directory); - string[] fileHashes = CalculateMD5(filenames); + Dictionary fileHashes = CalculateChecksums(filenames); //DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory()); //FileInfo[] files = dir.GetFiles(); int index = 0; - foreach (string file in filenames) { - index++; - string absolutePathToFile = file; + foreach (var file in fileHashes) { + string absolutePathToFile = file.Key; + string fileName = Path.GetFileName(absolutePathToFile); string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); string fileHash = ""; - if (index < fileHashes.Length) { - //Console.WriteLine("Index at: " + index); - fileHash = fileHashes.GetValue(index).ToString(); - } - string fileName = Path.GetFileName(absolutePathToFile); if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { connection.Open(); From e12117fba889b4b65458b82119a4720c4e807d96 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Tue, 27 Jun 2023 23:12:50 +0200 Subject: [PATCH 03/10] Small fix --- src/Chksum/chksum.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 647f146..0e6cdc8 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -112,7 +112,7 @@ public class ChksumUtils { string absolutePathToFile = file.Key; string fileName = Path.GetFileName(absolutePathToFile); string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); - string fileHash = ""; + string fileHash = file.Value; if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { connection.Open(); From 7f6f4c5253fd2d6c2edfe77dea201133161958db Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Tue, 27 Jun 2023 23:56:43 +0200 Subject: [PATCH 04/10] Streamline code --- src/Chksum/chksum.cs | 40 ++++++++++++---------------------------- 1 file changed, 12 insertions(+), 28 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 0e6cdc8..afff74b 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -1,13 +1,19 @@ using System.Reflection; using System.Security.Cryptography; -using System.Threading.Tasks; using Microsoft.Data.Sqlite; namespace Chksum.Utils; public class ChksumUtils { - private int getFileCount() { - int fileCount = Directory.GetFiles(Directory.GetCurrentDirectory()).Length; // Get file count in current directory - return fileCount; + private int getTotalFileCount() { + int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length; + return totalFileCount - 3; // Remove the program, datbase and library from the totalFileCount + } + + private string[] indexFiles() { + string[] indexedFiles = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories); + string[] filesToExclude = { "Chksum", "chksum.db", "libe_sqlite3.so" }; + indexedFiles = indexedFiles.Where(file => !filesToExclude.Contains(Path.GetFileName(file))).ToArray(); + return indexedFiles; } public string DatabaseRoot { get; set; } = string.Empty; @@ -64,21 +70,6 @@ public class ChksumUtils { } } - private string[] CalculateMD5(string[] filenames) { - string[] checksums = new string[filenames.Length]; - - Parallel.ForEach(filenames, (filename, state, index) => { - using (var md5 = MD5.Create()) { - using (var stream = File.OpenRead(filename)) { - var hash = md5.ComputeHash(stream); - checksums[index] = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); - } - } - }); - - return checksums; - } - private Dictionary CalculateChecksums(string[] filenames) { Dictionary checksums = new Dictionary(); @@ -99,15 +90,9 @@ public class ChksumUtils { } public void doTheThing() { - foreach (var directory in Directory.GetDirectories(Directory.GetCurrentDirectory())) using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { - Directory.SetCurrentDirectory(directory); // Set new root - if (getFileCount() >= 1) { - string[] filenames = Directory.GetFiles(directory); - Dictionary fileHashes = CalculateChecksums(filenames); - //DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory()); - //FileInfo[] files = dir.GetFiles(); - int index = 0; + if (getTotalFileCount() >= 1) { + Dictionary fileHashes = CalculateChecksums(indexFiles()); foreach (var file in fileHashes) { string absolutePathToFile = file.Key; string fileName = Path.GetFileName(absolutePathToFile); @@ -130,7 +115,6 @@ public class ChksumUtils { } } } - doTheThing(); } } From be8180a60d278c617706c8e01bae6edf288ea63a Mon Sep 17 00:00:00 2001 From: AustrianToast Date: Wed, 28 Jun 2023 00:41:38 +0200 Subject: [PATCH 05/10] refactor doTheThing --- src/Chksum/chksum.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index afff74b..c8a271f 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -92,6 +92,7 @@ public class ChksumUtils { public void doTheThing() { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { if (getTotalFileCount() >= 1) { + connection.Open(); Dictionary fileHashes = CalculateChecksums(indexFiles()); foreach (var file in fileHashes) { string absolutePathToFile = file.Key; @@ -100,8 +101,6 @@ public class ChksumUtils { string fileHash = file.Value; if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { - connection.Open(); - var command = connection.CreateCommand(); command.CommandText = @" From 2b4019d7cfc85c2876b9b4c84d6ab7b1723fa823 Mon Sep 17 00:00:00 2001 From: AustrianToast Date: Wed, 28 Jun 2023 00:48:14 +0200 Subject: [PATCH 06/10] move return outside of using --- src/Chksum/chksum.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index c8a271f..d70148d 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -183,8 +183,8 @@ public class ChksumUtils { Console.WriteLine($"\tto \t{pathtofile}\n"); wasMoved = true; } - return wasMoved; } + return wasMoved; } public void checkIfFileWasDeleted() { From 7d7e9bac6c8cced2e60630e614e642e84618d8a2 Mon Sep 17 00:00:00 2001 From: AustrianToast Date: Wed, 28 Jun 2023 00:54:30 +0200 Subject: [PATCH 07/10] refactor checkIfFileWasDeleted --- src/Chksum/chksum.cs | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index d70148d..eb2c050 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -203,19 +203,20 @@ public class ChksumUtils { while (reader.Read()) { pathToFile = reader.GetString(0); - if (!File.Exists(pathToFile)) { - var deleteCommand = connection.CreateCommand(); - deleteCommand.CommandText = - @" - DELETE FROM file - WHERE pathtofile = $pathtofile - "; - deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); - deleteCommand.ExecuteNonQuery(); - - Console.WriteLine("File deleted:"); - Console.WriteLine($"\t{pathToFile}\n"); + if (File.Exists(pathToFile)) { + continue; } + var deleteCommand = connection.CreateCommand(); + deleteCommand.CommandText = + @" + DELETE FROM file + WHERE pathtofile = $pathtofile + "; + deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); + deleteCommand.ExecuteNonQuery(); + + Console.WriteLine("File deleted:"); + Console.WriteLine($"\t{pathToFile}\n"); } } } From 787721381d33cbf456e7e794fdfbd2466ec3ac6a Mon Sep 17 00:00:00 2001 From: AustrianToast Date: Wed, 28 Jun 2023 00:54:55 +0200 Subject: [PATCH 08/10] refactor doTheThing --- src/Chksum/chksum.cs | 44 +++++++++++++++++++++++--------------------- 1 file changed, 23 insertions(+), 21 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index eb2c050..d0254e7 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -91,27 +91,29 @@ public class ChksumUtils { public void doTheThing() { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { - if (getTotalFileCount() >= 1) { - connection.Open(); - Dictionary fileHashes = CalculateChecksums(indexFiles()); - foreach (var file in fileHashes) { - string absolutePathToFile = file.Key; - string fileName = Path.GetFileName(absolutePathToFile); - string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); - string fileHash = file.Value; - - if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { - var command = connection.CreateCommand(); - command.CommandText = - @" - INSERT INTO file (filehash, filename, pathtofile) - VALUES ($filehash, $filename, $pathtofile) - "; - command.Parameters.AddWithValue("$filehash", fileHash); - command.Parameters.AddWithValue("$filename", fileName); - command.Parameters.AddWithValue("$pathtofile", pathToFile); - command.ExecuteNonQuery(); - } + if (getTotalFileCount() < 1) { + return; + } + connection.Open(); + Dictionary fileHashes = CalculateChecksums(indexFiles()); + + foreach (var file in fileHashes) { + string absolutePathToFile = file.Key; + string fileName = Path.GetFileName(absolutePathToFile); + string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); + string fileHash = file.Value; + + if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { + var command = connection.CreateCommand(); + command.CommandText = + @" + INSERT INTO file (filehash, filename, pathtofile) + VALUES ($filehash, $filename, $pathtofile) + "; + command.Parameters.AddWithValue("$filehash", fileHash); + command.Parameters.AddWithValue("$filename", fileName); + command.Parameters.AddWithValue("$pathtofile", pathToFile); + command.ExecuteNonQuery(); } } } From 42320ebf8d2f2a274ded46a6b507e4af04e57300 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Wed, 28 Jun 2023 13:18:33 +0200 Subject: [PATCH 09/10] Use ConcurrentDictionary --- src/Chksum/chksum.cs | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index d0254e7..ca6e32a 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -1,3 +1,4 @@ +using System.Collections.Concurrent; using System.Reflection; using System.Security.Cryptography; using Microsoft.Data.Sqlite; @@ -71,7 +72,7 @@ public class ChksumUtils { } private Dictionary CalculateChecksums(string[] filenames) { - Dictionary checksums = new Dictionary(); + ConcurrentDictionary checksums = new ConcurrentDictionary(); Parallel.ForEach(filenames, (filename, state) => { using (var md5 = MD5.Create()) { @@ -80,13 +81,13 @@ public class ChksumUtils { var checksum = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant(); lock (checksums) { - checksums.Add(filename, checksum); + checksums.TryAdd(filename, checksum); } } } }); - return checksums; + return new Dictionary(checksums); } public void doTheThing() { From f403b7786445adfa176cad95c1e8489386b343af Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Thu, 29 Jun 2023 02:03:41 +0200 Subject: [PATCH 10/10] Code cleanup --- src/Chksum/chksum.cs | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index ca6e32a..40a0694 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -181,7 +181,7 @@ public class ChksumUtils { command2.Parameters.AddWithValue("$filehash", fileHash); command2.ExecuteNonQuery(); - Console.WriteLine("File moved:"); + Console.WriteLine("File moved or is a duplicate:"); Console.WriteLine($"\tfrom\t{pathToFile}"); Console.WriteLine($"\tto \t{pathtofile}\n"); wasMoved = true; @@ -252,7 +252,6 @@ public class ChksumUtils { public void compareDatabases(string filePathToOtherDatabase) { List filesThatDoNotExistsInTheRemote = getFilehashesFromDatabase("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly").Except(getFilehashesFromDatabase("Data Source=" + filePathToOtherDatabase + ";Mode=ReadOnly")).ToList(); - //List filesThatDoNotExistsInTheOrigin = filehashesOfRemoteDatabase.Except(filehashesOfOriginDatabase).ToList(); foreach (string file in filesThatDoNotExistsInTheRemote) { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly")) {