From 9d6b1385c8c3961380c2c1363e6a48e7db28dac1 Mon Sep 17 00:00:00 2001 From: ProfessionalUwU Date: Sun, 2 Jul 2023 15:20:13 +0200 Subject: [PATCH] Add new hashing algorithms Add XxHash Code cleanup --- src/Chksum/Chksum.csproj | 1 + src/Chksum/Program.cs | 1 + src/Chksum/chksum.cs | 93 +++++++++++++++++++++++++++++++--------- 3 files changed, 75 insertions(+), 20 deletions(-) diff --git a/src/Chksum/Chksum.csproj b/src/Chksum/Chksum.csproj index 8fbf809..cfc5d7f 100644 --- a/src/Chksum/Chksum.csproj +++ b/src/Chksum/Chksum.csproj @@ -22,6 +22,7 @@ + diff --git a/src/Chksum/Program.cs b/src/Chksum/Program.cs index 613c886..3969926 100644 --- a/src/Chksum/Program.cs +++ b/src/Chksum/Program.cs @@ -58,6 +58,7 @@ public class Program { static void PrintAvailableOptions() { String[] options = { "checksum", + "compareDatabases", "compareChecksums", "createDB", "checkIfFileWasDeleted", diff --git a/src/Chksum/chksum.cs b/src/Chksum/chksum.cs index 1a9847f..eafbd03 100644 --- a/src/Chksum/chksum.cs +++ b/src/Chksum/chksum.cs @@ -5,6 +5,7 @@ using Microsoft.Data.Sqlite; using Serilog; using Serilog.Events; using MurmurHash.Net; +using Standart.Hash.xxHash; namespace Chksum.Utils; public class ChksumUtils { @@ -17,7 +18,7 @@ public class ChksumUtils { private int getTotalFileCount() { int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length; logger.Debug("Total file count is {totalFileCount}", totalFileCount); - return totalFileCount - 3; // Remove the program, datbase and library from the totalFileCount + return totalFileCount - 4; // Remove the program, datbase, log and library from the totalFileCount } private string[] indexFiles() { @@ -125,29 +126,81 @@ public class ChksumUtils { } private uint CalculateMurmurHash32(Stream stream) { - const int bufferSize = 4096; // Adjust the buffer size as needed - const uint seed = 123456U; // Adjust the seed value as needed + const int bufferSize = 4096; + const uint seed = 123456U; var buffer = new byte[bufferSize]; uint hash = seed; int bytesRead; + ReadOnlySpan span = buffer; while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) { - hash = MurmurHash3.Hash32(buffer, seed: 123456U); + hash = MurmurHash3.Hash32(bytes: span, seed: 123456U); } return hash; } + private Dictionary CalculateChecksumsWithXxHash3(string[] filenames) { + ConcurrentDictionary checksums = new ConcurrentDictionary(); - public void doTheThing() { + Parallel.ForEach(filenames, (filename, state) => { + using (var stream = File.OpenRead(filename)) { + var hash = CalculateXxHash3(stream); + checksums.TryAdd(filename, hash); + } + }); + + return new Dictionary(checksums); + } + + private ulong CalculateXxHash3(Stream stream) { + const int bufferSize = 4096; + const ulong seed = 123456U; + + var buffer = new byte[bufferSize]; + ulong hash = seed; + + int bytesRead; + + while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) { + hash = xxHash3.ComputeHash(buffer, buffer.Length); + } + + return hash; + } + + + public void doTheThing(string hashalgo, int bufferSize) { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { if (getTotalFileCount() < 1) { logger.Information("There were no files to checksum"); return; } connection.Open(); - Dictionary fileHashes = CalculateChecksumsWithMurmur(indexFiles()); + + Dictionary fileHashes; + Dictionary fileHashesXxHash3; + Dictionary fileHashesMurmur; + Dictionary fileHashesMD5; + + switch (hashalgo) { + case "MD5": + fileHashesMD5 = CalculateChecksums(indexFiles()); + fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "Murmur": + fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles()); + fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + case "XxHash": + fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles()); + fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value); + break; + default: + logger.Error("No valid hash algorithm was selected"); + throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash"); + } foreach (var file in fileHashes) { string absolutePathToFile = file.Key; @@ -173,7 +226,7 @@ public class ChksumUtils { } } - private bool checkIfFileAlreadyExistsInDatabase(uint fileHash, string pathToFile) { + private bool checkIfFileAlreadyExistsInDatabase(object fileHash, string pathToFile) { string filehash = string.Empty; string pathtofile = string.Empty; bool doesExist = false; @@ -186,7 +239,7 @@ public class ChksumUtils { @" SELECT filehash, pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash); + command.Parameters.AddWithValue("$filehash", fileHash.ToString()); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -194,7 +247,7 @@ public class ChksumUtils { pathtofile = reader.GetString(1); } } - logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash); + logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash.ToString()); } if (fileHash.ToString() == filehash) { @@ -204,7 +257,7 @@ public class ChksumUtils { return doesExist; } - private bool checkIfFileMovedAndUpdatePathToFile(uint fileHash, string fileName, string pathToFile) { + private bool checkIfFileMovedAndUpdatePathToFile(object fileHash, string fileName, string pathToFile) { string pathtofile = string.Empty; bool wasMoved = false; @@ -216,7 +269,7 @@ public class ChksumUtils { @" SELECT pathtofile FROM file WHERE filehash = $filehash "; - command.Parameters.AddWithValue("$filehash", fileHash); + command.Parameters.AddWithValue("$filehash", fileHash.ToString()); using (var reader = command.ExecuteReader()) { while (reader.Read()) { @@ -233,15 +286,15 @@ public class ChksumUtils { WHERE filehash = $filehash "; command2.Parameters.AddWithValue("$newpathtofile", pathToFile); - command2.Parameters.AddWithValue("$filehash", fileHash); + command2.Parameters.AddWithValue("$filehash", fileHash.ToString()); command2.ExecuteNonQuery(); - Console.WriteLine("File moved or is a duplicate:"); - Console.WriteLine($"\tfrom\t{pathToFile}"); - Console.WriteLine($"\tto \t{pathtofile}\n"); + //Console.WriteLine("File moved or is a duplicate:"); + //Console.WriteLine($"\tfrom\t{pathToFile}"); + //Console.WriteLine($"\tto \t{pathtofile}\n"); wasMoved = true; } - logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash); + logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash.ToString()); } return wasMoved; } @@ -275,9 +328,9 @@ public class ChksumUtils { deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); deleteCommand.ExecuteNonQuery(); - Console.WriteLine("File deleted:"); - Console.WriteLine($"\t{pathToFile}\n"); - logger.Verbose("File deleted: {pathToFile}", pathToFile); + //Console.WriteLine("File deleted:"); + //Console.WriteLine($"\t{pathToFile}\n"); + logger.Information("File deleted: {pathToFile}", pathToFile); } } logger.Information("All deleted files were successfully removed from the database"); @@ -337,7 +390,7 @@ public class ChksumUtils { Console.WriteLine("File not found in remote:"); Console.WriteLine($"\t{filename}\n"); - logger.Verbose("{filename} could not be found in the remote database", filename); + logger.Information("{filename} could not be found in the remote database", filename); } } }