From f65108425cd8fb3817a4887ab3fd2a035fcee721 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Thu, 29 Jun 2023 19:53:00 +0200 Subject: [PATCH 1/7] New hashing algorithm --- src/Chksum/Chksum.csproj | 1 + src/Chksum/chksum.cs | 43 +++++++++++++++++++++++++++++++++++----- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/src/Chksum/Chksum.csproj b/src/Chksum/Chksum.csproj index c583bd8..8fbf809 100644 --- a/src/Chksum/Chksum.csproj +++ b/src/Chksum/Chksum.csproj @@ -18,6 +18,7 @@ + diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 928e160..1a9847f 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -4,6 +4,7 @@ using System.Security.Cryptography; using Microsoft.Data.Sqlite; using Serilog; using Serilog.Events; +using MurmurHash.Net; namespace Chksum.Utils; public class ChksumUtils { @@ -107,6 +108,38 @@ public class ChksumUtils { return new Dictionary(checksums); } + private Dictionary CalculateChecksumsWithMurmur(string[] filenames) { + ConcurrentDictionary checksums = new ConcurrentDictionary(); + + Parallel.ForEach(filenames, (filename, state) => { + using (var stream = File.OpenRead(filename)) { + var hash = CalculateMurmurHash32(stream); + lock (checksums) { + checksums.TryAdd(filename, hash); + } + } + }); + + logger.Debug("All files were checksummed"); + return new Dictionary(checksums); + } + + private uint CalculateMurmurHash32(Stream stream) { + const int bufferSize = 4096; // Adjust the buffer size as needed + const uint seed = 123456U; // Adjust the seed value as needed + + var buffer = new byte[bufferSize]; + uint hash = seed; + + int bytesRead; + + while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) { + hash = MurmurHash3.Hash32(buffer, seed: 123456U); + } + return hash; + } + + public void doTheThing() { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { if (getTotalFileCount() < 1) { @@ -114,13 +147,13 @@ public class ChksumUtils { return; } connection.Open(); - Dictionary fileHashes = CalculateChecksums(indexFiles()); + Dictionary fileHashes = CalculateChecksumsWithMurmur(indexFiles()); foreach (var file in fileHashes) { string absolutePathToFile = file.Key; string fileName = Path.GetFileName(absolutePathToFile); string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); - string fileHash = file.Value; + var fileHash = file.Value; if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { var command = connection.CreateCommand(); @@ -140,7 +173,7 @@ public class ChksumUtils { } } - private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) { + private bool checkIfFileAlreadyExistsInDatabase(uint fileHash, string pathToFile) { string filehash = string.Empty; string pathtofile = string.Empty; bool doesExist = false; @@ -164,14 +197,14 @@ public class ChksumUtils { logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash); } - if (fileHash == filehash) { + if (fileHash.ToString() == filehash) { logger.Verbose("File with filehash {filehash} already exists in the database", filehash); doesExist = true; } return doesExist; } - private bool checkIfFileMovedAndUpdatePathToFile(string fileHash, string fileName, string pathToFile) { + private bool checkIfFileMovedAndUpdatePathToFile(uint fileHash, string fileName, string pathToFile) { string pathtofile = string.Empty; bool wasMoved = false; From 9d6b1385c8c3961380c2c1363e6a48e7db28dac1 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Sun, 2 Jul 2023 15:20:13 +0200 Subject: [PATCH 2/7] Add new hashing algorithms Add XxHash Code cleanup --- src/Chksum/Chksum.csproj | 1 + src/Chksum/Program.cs | 1 + src/Chksum/chksum.cs | 93 +++++++++++++++++++++++++++++++--------- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/Chksum/Chksum.csproj b/src/Chksum/Chksum.csproj index 8fbf809..cfc5d7f 100644 --- a/src/Chksum/Chksum.csproj +++ b/src/Chksum/Chksum.csproj @@ -22,6 +22,7 @@ + diff --git a/src/Chksum/Program.cs b/src/Chksum/Program.cs index 613c886..3969926 100644 --- a/src/Chksum/Program.cs +++ b/src/Chksum/Program.cs @@ -58,6 +58,7 @@ public class Program { static void PrintAvailableOptions() { String[] options = { "checksum", + "compareDatabases", "compareChecksums", "createDB", "checkIfFileWasDeleted", diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 1a9847f..eafbd03 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -5,6 +5,7 @@ using Microsoft.Data.Sqlite; using Serilog; using Serilog.Events; using MurmurHash.Net; +using Standart.Hash.xxHash; namespace Chksum.Utils; public class ChksumUtils { @@ -17,7 +18,7 @@ public class ChksumUtils { private int getTotalFileCount() { int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length; logger.Debug("Total file count is {totalFileCount}", totalFileCount); - return totalFileCount - 3; // Remove the program, datbase and library from the totalFileCount + return totalFileCount - 4; // Remove the program, datbase, log and library from the totalFileCount } private string[] indexFiles() { @@ -125,29 +126,81 @@ public class ChksumUtils { } private uint CalculateMurmurHash32(Stream stream) { - const int bufferSize = 4096; // Adjust the buffer size as needed - const uint seed = 123456U; // Adjust the seed value as needed + const int bufferSize = 4096; + const uint seed = 123456U; var buffer = new byte[bufferSize]; uint hash = seed; int bytesRead; + ReadOnlySpan span = buffer; while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) { - hash = MurmurHash3.Hash32(buffer, seed: 123456U); + hash = MurmurHash3.Hash32(bytes: span, seed: 123456U); } return hash; } + private Dictionary CalculateChecksumsWithXxHash3(string[] filenames) { + ConcurrentDictionary checksums = new ConcurrentDictionary(); - public void doTheThing() { + Parallel.ForEach(filenames, (filename, state) => { + using (var stream = File.OpenRead(filename)) { + var hash = CalculateXxHash3(stream); + checksums.TryAdd(filename, hash); + } + }); + + return new Dictionary(checksums); + } + + private ulong CalculateXxHash3(Stream stream) { + const int bufferSize = 4096; + const ulong seed = 123456U; + + var buffer = new byte[bufferSize]; + ulong hash = seed; + + int bytesRead; + + while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) { + hash = xxHash3.ComputeHash(buffer, buffer.Length); + } + + return hash; + } + + + public void doTheThing(string hashalgo, int bufferSize) { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { if (getTotalFileCount() < 1) { logger.Information("There were no files to checksum"); return; } connection.Open(); - Dictionary fileHashes = CalculateChecksumsWithMurmur(indexFiles()); + + Dictionary fileHashes; + Dictionary fileHashesXxHash3; + Dictionary fileHashesMurmur; + Dictionary fileHashesMD5; + + switch (hashalgo) { + case "MD5": + fileHashesMD5 = CalculateChecksums(indexFiles()); + fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "Murmur": + fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles()); + fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "XxHash": + fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles()); + fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + default: + logger.Error("No valid hash algorithm was selected"); + throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash"); + } foreach (var file in fileHashes) { string absolutePathToFile = file.Key; @@ -173,7 +226,7 @@ public class ChksumUtils { } } - private bool checkIfFileAlreadyExistsInDatabase(uint fileHash, string pathToFile) { + private bool checkIfFileAlreadyExistsInDatabase(object fileHash, string pathToFile) { string filehash = string.Empty; string pathtofile = string.Empty; bool doesExist = false; @@ -186,7 +239,7 @@ public class ChksumUtils { @" SELECT filehash, pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash); + command.Parameters.AddWithValue("$filehash", fileHash.ToString()); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -194,7 +247,7 @@ public class ChksumUtils { pathtofile = reader.GetString(1); } } - logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash); + logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash.ToString()); } if (fileHash.ToString() == filehash) { @@ -204,7 +257,7 @@ public class ChksumUtils { return doesExist; } - private bool checkIfFileMovedAndUpdatePathToFile(uint fileHash, string fileName, string pathToFile) { + private bool checkIfFileMovedAndUpdatePathToFile(object fileHash, string fileName, string pathToFile) { string pathtofile = string.Empty; bool wasMoved = false; @@ -216,7 +269,7 @@ public class ChksumUtils { @" SELECT pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash); + command.Parameters.AddWithValue("$filehash", fileHash.ToString()); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -233,15 +286,15 @@ public class ChksumUtils { WHERE filehash = $filehash "; command2.Parameters.AddWithValue("$newpathtofile", pathToFile); - command2.Parameters.AddWithValue("$filehash", fileHash); + command2.Parameters.AddWithValue("$filehash", fileHash.ToString()); command2.ExecuteNonQuery(); - Console.WriteLine("File moved or is a duplicate:"); - Console.WriteLine($"\tfrom\t{pathToFile}"); - Console.WriteLine($"\tto \t{pathtofile}\n"); + //Console.WriteLine("File moved or is a duplicate:"); + //Console.WriteLine($"\tfrom\t{pathToFile}"); + //Console.WriteLine($"\tto \t{pathtofile}\n"); wasMoved = true; } - logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash); + logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash.ToString()); } return wasMoved; } @@ -275,9 +328,9 @@ public class ChksumUtils { deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); deleteCommand.ExecuteNonQuery(); - Console.WriteLine("File deleted:"); - Console.WriteLine($"\t{pathToFile}\n"); - logger.Verbose("File deleted: {pathToFile}", pathToFile); + //Console.WriteLine("File deleted:"); + //Console.WriteLine($"\t{pathToFile}\n"); + logger.Information("File deleted: {pathToFile}", pathToFile); } } logger.Information("All deleted files were successfully removed from the database"); @@ -337,7 +390,7 @@ public class ChksumUtils { Console.WriteLine("File not found in remote:"); Console.WriteLine($"\t{filename}\n"); - logger.Verbose("{filename} could not be found in the remote database", filename); + logger.Information("{filename} could not be found in the remote database", filename); } } } From d80a5f5e6b4326455945b9fa16aa75e9c4f74fe8 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Sun, 2 Jul 2023 15:32:46 +0200 Subject: [PATCH 3/7] Add option for choosing hash algo --- src/Chksum/Program.cs | 14 +++++++++++--- src/Chksum/chksum.cs | 20 ++++++++++---------- 2 files changed, 21 insertions(+), 13 deletions(-) diff --git a/src/Chksum/Program.cs b/src/Chksum/Program.cs index 3969926..43a118a 100644 --- a/src/Chksum/Program.cs +++ b/src/Chksum/Program.cs @@ -8,7 +8,7 @@ public class Program { Console.WriteLine("Please specify an option."); PrintAvailableOptions(); return; - } else if (args.Length > 1 && args[0] != "compareDatabases") { + } else if (args.Length > 3) { Console.WriteLine("Too many options."); return; } @@ -25,7 +25,15 @@ public class Program { Console.WriteLine("Starting the checksum process."); Console.ResetColor(); - utils.doTheThing(); + try { + int bufferSize = int.Parse(args[2]); + utils.doTheThing(args[1], bufferSize); + } + catch (FormatException) { + Console.ForegroundColor = ConsoleColor.Red; + Console.WriteLine("Buffer was not a valid integer value. Please specify a valid integer value for the buffer size"); + Console.ResetColor(); + } Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Checksum process finished"); @@ -57,7 +65,7 @@ public class Program { static void PrintAvailableOptions() { String[] options = { - "checksum", + "checksum - MD5, Murmur and XxHash", "compareDatabases", "compareChecksums", "createDB", diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index eafbd03..5ce92c7 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -109,12 +109,12 @@ public class ChksumUtils { return new Dictionary(checksums); } - private Dictionary CalculateChecksumsWithMurmur(string[] filenames) { + private Dictionary CalculateChecksumsWithMurmur(string[] filenames, int userDefinedBufferSize) { ConcurrentDictionary checksums = new ConcurrentDictionary(); Parallel.ForEach(filenames, (filename, state) => { using (var stream = File.OpenRead(filename)) { - var hash = CalculateMurmurHash32(stream); + var hash = CalculateMurmurHash32(stream, userDefinedBufferSize); lock (checksums) { checksums.TryAdd(filename, hash); } @@ -125,8 +125,8 @@ public class ChksumUtils { return new Dictionary(checksums); } - private uint CalculateMurmurHash32(Stream stream) { - const int bufferSize = 4096; + private uint CalculateMurmurHash32(Stream stream, int userDefinedBufferSize) { + int bufferSize = userDefinedBufferSize; const uint seed = 123456U; var buffer = new byte[bufferSize]; @@ -141,12 +141,12 @@ public class ChksumUtils { return hash; } - private Dictionary CalculateChecksumsWithXxHash3(string[] filenames) { + private Dictionary CalculateChecksumsWithXxHash3(string[] filenames, int userDefinedBufferSize) { ConcurrentDictionary checksums = new ConcurrentDictionary(); Parallel.ForEach(filenames, (filename, state) => { using (var stream = File.OpenRead(filename)) { - var hash = CalculateXxHash3(stream); + var hash = CalculateXxHash3(stream, userDefinedBufferSize); checksums.TryAdd(filename, hash); } }); @@ -154,8 +154,8 @@ public class ChksumUtils { return new Dictionary(checksums); } - private ulong CalculateXxHash3(Stream stream) { - const int bufferSize = 4096; + private ulong CalculateXxHash3(Stream stream, int userDefinedBufferSize) { + int bufferSize = userDefinedBufferSize; const ulong seed = 123456U; var buffer = new byte[bufferSize]; @@ -190,11 +190,11 @@ public class ChksumUtils { fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value); break; case "Murmur": - fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles()); + fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize); fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value); break; case "XxHash": - fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles()); + fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize); fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value); break; default: From 02a0bddd7e270554748c7fff8decdf390fdcfa79 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Mon, 3 Jul 2023 00:59:50 +0200 Subject: [PATCH 4/7] Major advancments New options Switched to wal (Write ahead log) Now using redis to cache filehashes New option to dump redis data into sqlite database --- src/Chksum/Chksum.csproj | 1 + src/Chksum/Program.cs | 9 +-- src/Chksum/chksum.cs | 151 +++++++++++++++++++++++++-------------- 3 files changed, 105 insertions(+), 56 deletions(-) diff --git a/src/Chksum/Chksum.csproj b/src/Chksum/Chksum.csproj index cfc5d7f..1c180c7 100644 --- a/src/Chksum/Chksum.csproj +++ b/src/Chksum/Chksum.csproj @@ -22,6 +22,7 @@ + diff --git a/src/Chksum/Program.cs b/src/Chksum/Program.cs index 43a118a..adcaf54 100644 --- a/src/Chksum/Program.cs +++ b/src/Chksum/Program.cs @@ -38,14 +38,15 @@ public class Program { Console.ForegroundColor = ConsoleColor.Green; Console.WriteLine("Checksum process finished"); break; + case "saveToSqlite": + Console.ResetColor(); + utils.saveToSqlite(); + break; case "compareDatabases": Console.ResetColor(); utils.compareDatabases(args[1]); break; - case "createDB": - utils.initializeDB(); - break; case "checkIfFileWasDeleted": Console.ResetColor(); utils.checkIfFileWasDeleted(); @@ -68,7 +69,7 @@ public class Program { "checksum - MD5, Murmur and XxHash", "compareDatabases", "compareChecksums", - "createDB", + "saveToSqlite", "checkIfFileWasDeleted", "help" }; diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 5ce92c7..7212865 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -6,6 +6,7 @@ using Serilog; using Serilog.Events; using MurmurHash.Net; using Standart.Hash.xxHash; +using StackExchange.Redis; namespace Chksum.Utils; public class ChksumUtils { @@ -52,7 +53,7 @@ public class ChksumUtils { } } - public void initializeDB() { + private void initializeDB() { if (File.Exists("chksum.db")) { logger.Information("A database already exits"); return; @@ -73,6 +74,14 @@ public class ChksumUtils { ); "; command.ExecuteNonQuery(); + + var walCommand = connection.CreateCommand(); + walCommand.CommandText = + @" + PRAGMA journal_mode = 'wal' + "; + walCommand.ExecuteNonQuery(); + logger.Information("Database was successfully created"); } } @@ -105,7 +114,6 @@ public class ChksumUtils { } }); - logger.Debug("All files were checksummed"); return new Dictionary(checksums); } @@ -121,7 +129,6 @@ public class ChksumUtils { } }); - logger.Debug("All files were checksummed"); return new Dictionary(checksums); } @@ -172,61 +179,95 @@ public class ChksumUtils { public void doTheThing(string hashalgo, int bufferSize) { + + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost"); + IDatabase db = redis.GetDatabase(); + + + if (getTotalFileCount() < 1) { + logger.Information("There were no files to checksum"); + return; + } + + Dictionary fileHashes; + Dictionary fileHashesXxHash3; + Dictionary fileHashesMurmur; + Dictionary fileHashesMD5; + + switch (hashalgo) { + case "MD5": + fileHashesMD5 = CalculateChecksums(indexFiles()); + fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "Murmur": + fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize); + fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "XxHash": + fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize); + fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + default: + logger.Error("No valid hash algorithm was selected"); + throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash"); + } + + logger.Information("All files were checksummed"); + + HashEntry[] hashEntries = fileHashes.Select(kv => new HashEntry(kv.Key, kv.Value.ToString())).ToArray(); + string hashKey = "fileHashes"; + db.HashSet(hashKey, hashEntries); + logger.Information("Dictionary inserted into Redis."); + } + + public void saveToSqlite() { + + initializeDB(); + + ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost"); + IDatabase db = redis.GetDatabase(); + + HashEntry[] fileHashes = db.HashGetAll("fileHashes"); + logger.Information("Retrived all values from redis"); + using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { - if (getTotalFileCount() < 1) { - logger.Information("There were no files to checksum"); - return; - } - connection.Open(); - - Dictionary fileHashes; - Dictionary fileHashesXxHash3; - Dictionary fileHashesMurmur; - Dictionary fileHashesMD5; - - switch (hashalgo) { - case "MD5": - fileHashesMD5 = CalculateChecksums(indexFiles()); - fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value); - break; - case "Murmur": - fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize); - fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value); - break; - case "XxHash": - fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize); - fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value); - break; - default: - logger.Error("No valid hash algorithm was selected"); - throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash"); - } - foreach (var file in fileHashes) { - string absolutePathToFile = file.Key; - string fileName = Path.GetFileName(absolutePathToFile); - string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); - var fileHash = file.Value; - if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { - var command = connection.CreateCommand(); - command.CommandText = + var absolutePathToFile = file.Name.ToString(); + string fileName = Path.GetFileName(absolutePathToFile.ToString()); + string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile.ToString()); + var fileHash = file.Value.ToString(); + + if (!checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) && !checkIfFileAlreadyExistsInDatabase(fileHash, fileName)) { + connection.Open(); + var InsertCommand = connection.CreateCommand(); + InsertCommand.CommandText = @" INSERT INTO file (filehash, filename, pathtofile) VALUES ($filehash, $filename, $pathtofile) "; - command.Parameters.AddWithValue("$filehash", fileHash); - command.Parameters.AddWithValue("$filename", fileName); - command.Parameters.AddWithValue("$pathtofile", pathToFile); - command.ExecuteNonQuery(); + InsertCommand.Parameters.AddWithValue("$filehash", fileHash); + InsertCommand.Parameters.AddWithValue("$filename", fileName); + InsertCommand.Parameters.AddWithValue("$pathtofile", pathToFile); + InsertCommand.ExecuteNonQuery(); logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully inserted into the database", fileName, pathToFile, fileHash); } } - logger.Information("All files were successfully written to the database"); } + logger.Information("All filehashes were successfully inserted into the database"); + + var keys = db.Execute("KEYS", "*"); + if (keys == null) { + logger.Error("No values found in redis"); + return; + } + foreach (var key in (RedisValue[])keys) { + db.KeyDelete((RedisKey)key.ToString()); + } + logger.Information("Redis was successfully cleared of any remaining data"); } - private bool checkIfFileAlreadyExistsInDatabase(object fileHash, string pathToFile) { + private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) { string filehash = string.Empty; string pathtofile = string.Empty; bool doesExist = false; @@ -239,7 +280,7 @@ public class ChksumUtils { @" SELECT filehash, pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash.ToString()); + command.Parameters.AddWithValue("$filehash", fileHash); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -247,17 +288,17 @@ public class ChksumUtils { pathtofile = reader.GetString(1); } } - logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash.ToString()); + logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash); } - if (fileHash.ToString() == filehash) { + if (fileHash == filehash) { logger.Verbose("File with filehash {filehash} already exists in the database", filehash); doesExist = true; } return doesExist; } - private bool checkIfFileMovedAndUpdatePathToFile(object fileHash, string fileName, string pathToFile) { + private bool checkIfFileMovedAndUpdatePathToFile(string fileHash, string fileName, string pathToFile) { string pathtofile = string.Empty; bool wasMoved = false; @@ -269,7 +310,7 @@ public class ChksumUtils { @" SELECT pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash.ToString()); + command.Parameters.AddWithValue("$filehash", fileHash); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -286,7 +327,7 @@ public class ChksumUtils { WHERE filehash = $filehash "; command2.Parameters.AddWithValue("$newpathtofile", pathToFile); - command2.Parameters.AddWithValue("$filehash", fileHash.ToString()); + command2.Parameters.AddWithValue("$filehash", fileHash); command2.ExecuteNonQuery(); //Console.WriteLine("File moved or is a duplicate:"); @@ -294,12 +335,15 @@ public class ChksumUtils { //Console.WriteLine($"\tto \t{pathtofile}\n"); wasMoved = true; } - logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash.ToString()); + logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash); } return wasMoved; } public void checkIfFileWasDeleted() { + + saveToSqlite(); + string pathToFile = string.Empty; using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { @@ -364,6 +408,9 @@ public class ChksumUtils { } public void compareDatabases(string filePathToOtherDatabase) { + + saveToSqlite(); + if (!File.Exists(filePathToOtherDatabase)) { logger.Error("No database could be found at {filePathToOtherDatabase}", filePathToOtherDatabase); throw new Exception("No database could be found at " + filePathToOtherDatabase); From b011f081720707b4d0195b59d5a7613d4ec7c88c Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Mon, 3 Jul 2023 21:50:38 +0000 Subject: [PATCH 5/7] feature/progressBar (#4) # Add progress bar Added a progress bar so that one can see how far along the program is. Reviewed-on: https://gitea.hopeless-cloud.xyz/ProfessionalUwU/chksum/pulls/4 Co-authored-by: ProfessionalUwU Co-committed-by: ProfessionalUwU --- src/Chksum/Program.cs | 5 ++++- src/Chksum/chksum.cs | 29 ++++++++++++++++++++++++----- 2 files changed, 28 insertions(+), 6 deletions(-) diff --git a/src/Chksum/Program.cs b/src/Chksum/Program.cs index adcaf54..8ce3406 100644 --- a/src/Chksum/Program.cs +++ b/src/Chksum/Program.cs @@ -26,6 +26,9 @@ public class Program { Console.ResetColor(); try { + if (args[1] == "MD5") { + utils.doTheThing(args[1]); + } int bufferSize = int.Parse(args[2]); utils.doTheThing(args[1], bufferSize); } @@ -66,7 +69,7 @@ public class Program { static void PrintAvailableOptions() { String[] options = { - "checksum - MD5, Murmur and XxHash", + "checksum - MD5, Murmur and XxHash - Default buffer size is 4096", "compareDatabases", "compareChecksums", "saveToSqlite", diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 7212865..d024ec1 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -98,9 +98,19 @@ public class ChksumUtils { } } + private void UpdateProgressBar(int current, int total) { + int progress = (int)((double)current / total * 100); + string progressText = $"Progress: {progress}% [{current}/{total}]"; + + Console.Write("\r" + progressText.PadRight(Console.WindowWidth)); + } + private Dictionary CalculateChecksums(string[] filenames) { ConcurrentDictionary checksums = new ConcurrentDictionary(); + int totalFiles = filenames.Length; + int processedFiles = 0; + Parallel.ForEach(filenames, (filename, state) => { using (var md5 = MD5.Create()) { using (var stream = File.OpenRead(filename)) { @@ -111,6 +121,8 @@ public class ChksumUtils { checksums.TryAdd(filename, checksum); } } + Interlocked.Increment(ref processedFiles); + UpdateProgressBar(processedFiles, totalFiles); } }); @@ -120,12 +132,17 @@ public class ChksumUtils { private Dictionary CalculateChecksumsWithMurmur(string[] filenames, int userDefinedBufferSize) { ConcurrentDictionary checksums = new ConcurrentDictionary(); + int totalFiles = filenames.Length; + int processedFiles = 0; + Parallel.ForEach(filenames, (filename, state) => { using (var stream = File.OpenRead(filename)) { var hash = CalculateMurmurHash32(stream, userDefinedBufferSize); lock (checksums) { checksums.TryAdd(filename, hash); } + Interlocked.Increment(ref processedFiles); + UpdateProgressBar(processedFiles, totalFiles); } }); @@ -151,11 +168,16 @@ public class ChksumUtils { private Dictionary CalculateChecksumsWithXxHash3(string[] filenames, int userDefinedBufferSize) { ConcurrentDictionary checksums = new ConcurrentDictionary(); + int totalFiles = filenames.Length; + int processedFiles = 0; + Parallel.ForEach(filenames, (filename, state) => { using (var stream = File.OpenRead(filename)) { var hash = CalculateXxHash3(stream, userDefinedBufferSize); checksums.TryAdd(filename, hash); } + Interlocked.Increment(ref processedFiles); + UpdateProgressBar(processedFiles, totalFiles); }); return new Dictionary(checksums); @@ -177,8 +199,7 @@ public class ChksumUtils { return hash; } - - public void doTheThing(string hashalgo, int bufferSize) { + public void doTheThing(string hashalgo, int bufferSize = 4096) { ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost"); IDatabase db = redis.GetDatabase(); @@ -372,9 +393,7 @@ public class ChksumUtils { deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); deleteCommand.ExecuteNonQuery(); - //Console.WriteLine("File deleted:"); - //Console.WriteLine($"\t{pathToFile}\n"); - logger.Information("File deleted: {pathToFile}", pathToFile); + logger.Information("File deleted:\n\t{pathToFile}", pathToFile); } } logger.Information("All deleted files were successfully removed from the database"); From 336eb5b73d8220d630823e588a585e35f023deba Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Mon, 3 Jul 2023 23:56:27 +0200 Subject: [PATCH 6/7] Fix logging --- src/Chksum/chksum.cs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index d024ec1..b3279ca 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -204,7 +204,6 @@ public class ChksumUtils { ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost"); IDatabase db = redis.GetDatabase(); - if (getTotalFileCount() < 1) { logger.Information("There were no files to checksum"); return; @@ -351,9 +350,7 @@ public class ChksumUtils { command2.Parameters.AddWithValue("$filehash", fileHash); command2.ExecuteNonQuery(); - //Console.WriteLine("File moved or is a duplicate:"); - //Console.WriteLine($"\tfrom\t{pathToFile}"); - //Console.WriteLine($"\tto \t{pathtofile}\n"); + logger.Verbose("File moved or is a duplicate:\n\tfrom\t{pathToFile}\n\tto \t{pathtofile}\n", pathToFile, pathtofile); wasMoved = true; } logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash); @@ -466,6 +463,6 @@ public class ChksumUtils { public void cleanup() { File.Delete(libraryPath); - logger.Debug("Successfully deleted libe_sqlite3.so"); + logger.Information("Successfully deleted libe_sqlite3.so"); } } \ No newline at end of file From d7aa7a6d986de2f2472c59d173118c641765f75b Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Tue, 4 Jul 2023 00:18:09 +0200 Subject: [PATCH 7/7] Runtime dependency --- README.md | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/README.md b/README.md index b06788a..c3352db 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,17 @@ Checksums every file under the current directory +## Runtime dependency + +If you use version 2.0.0 or above you will need redis! + +```bash +pacman -S redis && systemctl start redis +``` + +With this redis will be downloaded and started. +Don't forget to enable the service if you don't want to start it every time you run the program. + ## Run Locally Clone the project