Compare commits

...

2 Commits

Author SHA1 Message Date
9d6b1385c8
Add new hashing algorithms
Add XxHash
Code cleanup
2023-07-02 15:20:13 +02:00
f65108425c
New hashing algorithm 2023-06-29 19:53:00 +02:00
3 changed files with 108 additions and 19 deletions

View File

@ -18,9 +18,11 @@
<ItemGroup> <ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="7.0.8" /> <PackageReference Include="Microsoft.Data.Sqlite" Version="7.0.8" />
<PackageReference Include="MurmurHash.Net" Version="0.0.2" />
<PackageReference Include="Serilog" Version="3.0.1" /> <PackageReference Include="Serilog" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" /> <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.0" /> <PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
<PackageReference Include="Standart.Hash.xxHash" Version="4.0.5" />
</ItemGroup> </ItemGroup>
</Project> </Project>

View File

@ -58,6 +58,7 @@ public class Program {
static void PrintAvailableOptions() { static void PrintAvailableOptions() {
String[] options = { String[] options = {
"checksum", "checksum",
"compareDatabases",
"compareChecksums", "compareChecksums",
"createDB", "createDB",
"checkIfFileWasDeleted", "checkIfFileWasDeleted",

View File

@ -4,6 +4,8 @@ using System.Security.Cryptography;
using Microsoft.Data.Sqlite; using Microsoft.Data.Sqlite;
using Serilog; using Serilog;
using Serilog.Events; using Serilog.Events;
using MurmurHash.Net;
using Standart.Hash.xxHash;
namespace Chksum.Utils; namespace Chksum.Utils;
public class ChksumUtils { public class ChksumUtils {
@ -16,7 +18,7 @@ public class ChksumUtils {
private int getTotalFileCount() { private int getTotalFileCount() {
int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length; int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length;
logger.Debug("Total file count is {totalFileCount}", totalFileCount); logger.Debug("Total file count is {totalFileCount}", totalFileCount);
return totalFileCount - 3; // Remove the program, datbase and library from the totalFileCount return totalFileCount - 4; // Remove the program, datbase, log and library from the totalFileCount
} }
private string[] indexFiles() { private string[] indexFiles() {
@ -107,20 +109,104 @@ public class ChksumUtils {
return new Dictionary<string, string>(checksums); return new Dictionary<string, string>(checksums);
} }
public void doTheThing() { private Dictionary<string, uint> CalculateChecksumsWithMurmur(string[] filenames) {
ConcurrentDictionary<string, uint> checksums = new ConcurrentDictionary<string, uint>();
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateMurmurHash32(stream);
lock (checksums) {
checksums.TryAdd(filename, hash);
}
}
});
logger.Debug("All files were checksummed");
return new Dictionary<string, uint>(checksums);
}
private uint CalculateMurmurHash32(Stream stream) {
const int bufferSize = 4096;
const uint seed = 123456U;
var buffer = new byte[bufferSize];
uint hash = seed;
int bytesRead;
ReadOnlySpan<byte> span = buffer;
while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) {
hash = MurmurHash3.Hash32(bytes: span, seed: 123456U);
}
return hash;
}
private Dictionary<string, ulong> CalculateChecksumsWithXxHash3(string[] filenames) {
ConcurrentDictionary<string, ulong> checksums = new ConcurrentDictionary<string, ulong>();
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateXxHash3(stream);
checksums.TryAdd(filename, hash);
}
});
return new Dictionary<string, ulong>(checksums);
}
private ulong CalculateXxHash3(Stream stream) {
const int bufferSize = 4096;
const ulong seed = 123456U;
var buffer = new byte[bufferSize];
ulong hash = seed;
int bytesRead;
while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) {
hash = xxHash3.ComputeHash(buffer, buffer.Length);
}
return hash;
}
public void doTheThing(string hashalgo, int bufferSize) {
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
if (getTotalFileCount() < 1) { if (getTotalFileCount() < 1) {
logger.Information("There were no files to checksum"); logger.Information("There were no files to checksum");
return; return;
} }
connection.Open(); connection.Open();
Dictionary<string, string> fileHashes = CalculateChecksums(indexFiles());
Dictionary<string, object> fileHashes;
Dictionary<string, ulong> fileHashesXxHash3;
Dictionary<string, uint> fileHashesMurmur;
Dictionary<string, string> fileHashesMD5;
switch (hashalgo) {
case "MD5":
fileHashesMD5 = CalculateChecksums(indexFiles());
fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "Murmur":
fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles());
fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "XxHash":
fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles());
fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
default:
logger.Error("No valid hash algorithm was selected");
throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash");
}
foreach (var file in fileHashes) { foreach (var file in fileHashes) {
string absolutePathToFile = file.Key; string absolutePathToFile = file.Key;
string fileName = Path.GetFileName(absolutePathToFile); string fileName = Path.GetFileName(absolutePathToFile);
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile); string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile);
string fileHash = file.Value; var fileHash = file.Value;
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) { if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) {
var command = connection.CreateCommand(); var command = connection.CreateCommand();
@ -140,7 +226,7 @@ public class ChksumUtils {
} }
} }
private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) { private bool checkIfFileAlreadyExistsInDatabase(object fileHash, string pathToFile) {
string filehash = string.Empty; string filehash = string.Empty;
string pathtofile = string.Empty; string pathtofile = string.Empty;
bool doesExist = false; bool doesExist = false;
@ -153,7 +239,7 @@ public class ChksumUtils {
@" @"
SELECT filehash, pathtofile FROM file WHERE filehash = $filehash SELECT filehash, pathtofile FROM file WHERE filehash = $filehash
"; ";
command.Parameters.AddWithValue("$filehash", fileHash); command.Parameters.AddWithValue("$filehash", fileHash.ToString());
using (var reader = command.ExecuteReader()) { using (var reader = command.ExecuteReader()) {
while (reader.Read()) { while (reader.Read()) {
@ -161,17 +247,17 @@ public class ChksumUtils {
pathtofile = reader.GetString(1); pathtofile = reader.GetString(1);
} }
} }
logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash); logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash.ToString());
} }
if (fileHash == filehash) { if (fileHash.ToString() == filehash) {
logger.Verbose("File with filehash {filehash} already exists in the database", filehash); logger.Verbose("File with filehash {filehash} already exists in the database", filehash);
doesExist = true; doesExist = true;
} }
return doesExist; return doesExist;
} }
private bool checkIfFileMovedAndUpdatePathToFile(string fileHash, string fileName, string pathToFile) { private bool checkIfFileMovedAndUpdatePathToFile(object fileHash, string fileName, string pathToFile) {
string pathtofile = string.Empty; string pathtofile = string.Empty;
bool wasMoved = false; bool wasMoved = false;
@ -183,7 +269,7 @@ public class ChksumUtils {
@" @"
SELECT pathtofile FROM file WHERE filehash = $filehash SELECT pathtofile FROM file WHERE filehash = $filehash
"; ";
command.Parameters.AddWithValue("$filehash", fileHash); command.Parameters.AddWithValue("$filehash", fileHash.ToString());
using (var reader = command.ExecuteReader()) { using (var reader = command.ExecuteReader()) {
while (reader.Read()) { while (reader.Read()) {
@ -200,15 +286,15 @@ public class ChksumUtils {
WHERE filehash = $filehash WHERE filehash = $filehash
"; ";
command2.Parameters.AddWithValue("$newpathtofile", pathToFile); command2.Parameters.AddWithValue("$newpathtofile", pathToFile);
command2.Parameters.AddWithValue("$filehash", fileHash); command2.Parameters.AddWithValue("$filehash", fileHash.ToString());
command2.ExecuteNonQuery(); command2.ExecuteNonQuery();
Console.WriteLine("File moved or is a duplicate:"); //Console.WriteLine("File moved or is a duplicate:");
Console.WriteLine($"\tfrom\t{pathToFile}"); //Console.WriteLine($"\tfrom\t{pathToFile}");
Console.WriteLine($"\tto \t{pathtofile}\n"); //Console.WriteLine($"\tto \t{pathtofile}\n");
wasMoved = true; wasMoved = true;
} }
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash); logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash.ToString());
} }
return wasMoved; return wasMoved;
} }
@ -242,9 +328,9 @@ public class ChksumUtils {
deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
deleteCommand.ExecuteNonQuery(); deleteCommand.ExecuteNonQuery();
Console.WriteLine("File deleted:"); //Console.WriteLine("File deleted:");
Console.WriteLine($"\t{pathToFile}\n"); //Console.WriteLine($"\t{pathToFile}\n");
logger.Verbose("File deleted: {pathToFile}", pathToFile); logger.Information("File deleted: {pathToFile}", pathToFile);
} }
} }
logger.Information("All deleted files were successfully removed from the database"); logger.Information("All deleted files were successfully removed from the database");
@ -304,7 +390,7 @@ public class ChksumUtils {
Console.WriteLine("File not found in remote:"); Console.WriteLine("File not found in remote:");
Console.WriteLine($"\t{filename}\n"); Console.WriteLine($"\t{filename}\n");
logger.Verbose("{filename} could not be found in the remote database", filename); logger.Information("{filename} could not be found in the remote database", filename);
} }
} }
} }