23 Commits

Author SHA1 Message Date
7b00a87620 Merge pull request 'feature/NewHashingAlgorithms' (#6) from feature/NewHashingAlgorithms into main
Reviewed-on: #6
2023-07-03 22:34:15 +00:00
86111732c2 Merge pull request 'Runtime dependency' (#5) from feature/BackwardsCompatibility into feature/NewHashingAlgorithms
Reviewed-on: #5
2023-07-03 22:20:25 +00:00
d7aa7a6d98 Runtime dependency 2023-07-04 00:18:09 +02:00
336eb5b73d Fix logging 2023-07-03 23:56:27 +02:00
b011f08172 feature/progressBar (#4)
# Add progress bar
Added a progress bar so that one can see how far along the program is.

Reviewed-on: #4
Co-authored-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
Co-committed-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
2023-07-03 21:50:38 +00:00
02a0bddd7e Major advancments
New options
Switched to wal (Write ahead log)
Now using redis to cache filehashes
New option to dump redis data into sqlite database
2023-07-03 00:59:50 +02:00
d80a5f5e6b Add option for choosing hash algo 2023-07-02 15:32:46 +02:00
9d6b1385c8 Add new hashing algorithms
Add XxHash
Code cleanup
2023-07-02 15:20:13 +02:00
f65108425c New hashing algorithm 2023-06-29 19:53:00 +02:00
f9cdac5f92 Merge pull request 'feature/logging' (#3) from feature/logging into main
Reviewed-on: #3
2023-06-29 17:01:40 +00:00
e99ca810f6 Add instructions for verbose output 2023-06-29 03:58:28 +02:00
184e525adb Add logging 2023-06-29 03:49:13 +02:00
1ee5114dab Merge pull request 'feature/multi-threading' (#2) from feature/multi-threading into main
Reviewed-on: #2
2023-06-29 00:15:11 +00:00
f403b77864 Code cleanup 2023-06-29 02:03:41 +02:00
42320ebf8d Use ConcurrentDictionary 2023-06-28 13:18:33 +02:00
787721381d refactor doTheThing 2023-06-28 00:54:55 +02:00
7d7e9bac6c refactor checkIfFileWasDeleted 2023-06-28 00:54:30 +02:00
2b4019d7cf move return outside of using 2023-06-28 00:48:14 +02:00
be8180a60d refactor doTheThing 2023-06-28 00:41:38 +02:00
7f6f4c5253 Streamline code 2023-06-27 23:56:43 +02:00
e12117fba8 Small fix 2023-06-27 23:12:50 +02:00
763cde4e2d Use dictionary 2023-06-27 22:58:07 +02:00
531a4676e9 First implementation of parralel checksumming 2023-06-27 22:31:24 +02:00
5 changed files with 307 additions and 62 deletions

View File

@ -2,6 +2,17 @@
Checksums every file under the current directory
## Runtime dependency
If you use version 2.0.0 or above you will need redis!
```bash
pacman -S redis && systemctl start redis
```
With this redis will be downloaded and started.
Don't forget to enable the service if you don't want to start it every time you run the program.
## Run Locally
Clone the project
@ -44,3 +55,18 @@ Run executable
```bash
./Chksum
```
## Enabling verbose output for troubleshooting
1. Open the file called chksum.cs with your editor of choice.
2. At the top there will be the logger configuration which you can change. Should look like this.
```cs
private ILogger logger = new LoggerConfiguration()
.MinimumLevel.Debug()
.WriteTo.Console(restrictedToMinimumLevel: LogEventLevel.Error)
.WriteTo.File("chksum.log")
.CreateLogger();
```
3. Change the minimum level of the logger to Verbose.
4. Compile the program
5. Profit. Now you will be able to see how what the program is doing in detail.

View File

@ -18,6 +18,12 @@
<ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="7.0.8" />
<PackageReference Include="MurmurHash.Net" Version="0.0.2" />
<PackageReference Include="Serilog" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
<PackageReference Include="StackExchange.Redis" Version="2.6.116" />
<PackageReference Include="Standart.Hash.xxHash" Version="4.0.5" />
</ItemGroup>
</Project>

View File

@ -8,7 +8,7 @@ public class Program {
Console.WriteLine("Please specify an option.");
PrintAvailableOptions();
return;
} else if (args.Length > 1 && args[0] != "compareDatabases") {
} else if (args.Length > 3) {
Console.WriteLine("Too many options.");
return;
}
@ -25,19 +25,31 @@ public class Program {
Console.WriteLine("Starting the checksum process.");
Console.ResetColor();
utils.doTheThing();
try {
if (args[1] == "MD5") {
utils.doTheThing(args[1]);
}
int bufferSize = int.Parse(args[2]);
utils.doTheThing(args[1], bufferSize);
}
catch (FormatException) {
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Buffer was not a valid integer value. Please specify a valid integer value for the buffer size");
Console.ResetColor();
}
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Checksum process finished");
break;
case "saveToSqlite":
Console.ResetColor();
utils.saveToSqlite();
break;
case "compareDatabases":
Console.ResetColor();
utils.compareDatabases(args[1]);
break;
case "createDB":
utils.initializeDB();
break;
case "checkIfFileWasDeleted":
Console.ResetColor();
utils.checkIfFileWasDeleted();
@ -57,9 +69,10 @@ public class Program {
static void PrintAvailableOptions() {
String[] options = {
"checksum",
"checksum - MD5, Murmur and XxHash - Default buffer size is 4096",
"compareDatabases",
"compareChecksums",
"createDB",
"saveToSqlite",
"checkIfFileWasDeleted",
"help"
};

View File

@ -1,16 +1,39 @@
using System.Collections.Concurrent;
using System.Reflection;
using System.Security.Cryptography;
using Microsoft.Data.Sqlite;
using Serilog;
using Serilog.Events;
using MurmurHash.Net;
using Standart.Hash.xxHash;
using StackExchange.Redis;
namespace Chksum.Utils;
public class ChksumUtils {
private ILogger logger = new LoggerConfiguration()
.MinimumLevel.Debug()
.WriteTo.Console(restrictedToMinimumLevel: LogEventLevel.Error)
.WriteTo.File("chksum.log")
.CreateLogger();
private int getFileCount() {
int fileCount = Directory.GetFiles(Directory.GetCurrentDirectory()).Length; // Get file count in current directory
return fileCount;
private int getTotalFileCount() {
int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length;
logger.Debug("Total file count is {totalFileCount}", totalFileCount);
return totalFileCount - 4; // Remove the program, datbase, log and library from the totalFileCount
}
private string[] indexFiles() {
string[] indexedFiles = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories);
string[] filesToExclude = { "Chksum", "chksum.db", "libe_sqlite3.so" };
indexedFiles = indexedFiles.Where(file => !filesToExclude.Contains(Path.GetFileName(file))).ToArray();
logger.Information("All files were indexed");
return indexedFiles;
}
public string DatabaseRoot { get; set; } = string.Empty;
public void getBaseDir() {
DatabaseRoot = AppDomain.CurrentDomain.BaseDirectory;
logger.Debug("DatabaseRoot is {DatabaseRoot}", DatabaseRoot);
}
public string libraryPath { get; set; } = string.Empty;
@ -22,14 +45,17 @@ public class ChksumUtils {
byte[] buffer = new byte[resourceStream.Length];
resourceStream.Read(buffer, 0, buffer.Length);
File.WriteAllBytes(libraryPath, buffer);
logger.Debug("libe_sqlite3.so was successfully created");
} else {
logger.Error("libe_sqlite3.so could not be loaded");
throw new Exception(libraryPath + " could not be loaded");
}
}
}
public void initializeDB() {
private void initializeDB() {
if (File.Exists("chksum.db")) {
logger.Information("A database already exits");
return;
}
@ -48,6 +74,15 @@ public class ChksumUtils {
);
";
command.ExecuteNonQuery();
var walCommand = connection.CreateCommand();
walCommand.CommandText =
@"
PRAGMA journal_mode = 'wal'
";
walCommand.ExecuteNonQuery();
logger.Information("Database was successfully created");
}
}
@ -59,49 +94,197 @@ public class ChksumUtils {
vacuum;
";
command.ExecuteNonQuery();
logger.Debug("Database was successfully vacuumed");
}
}
private string CalculateMD5(string filename) {
using (var md5 = System.Security.Cryptography.MD5.Create()) {
private void UpdateProgressBar(int current, int total) {
int progress = (int)((double)current / total * 100);
string progressText = $"Progress: {progress}% [{current}/{total}]";
Console.Write("\r" + progressText.PadRight(Console.WindowWidth));
}
private Dictionary<string, string> CalculateChecksums(string[] filenames) {
ConcurrentDictionary<string, string> checksums = new ConcurrentDictionary<string, string>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => {
using (var md5 = MD5.Create()) {
using (var stream = File.OpenRead(filename)) {
var hash = md5.ComputeHash(stream);
return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant();
var checksum = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant();
lock (checksums) {
checksums.TryAdd(filename, checksum);
}
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
}
});
return new Dictionary<string, string>(checksums);
}
public void doTheThing() {
foreach (var directory in Directory.GetDirectories(Directory.GetCurrentDirectory()))
private Dictionary<string, uint> CalculateChecksumsWithMurmur(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, uint> checksums = new ConcurrentDictionary<string, uint>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateMurmurHash32(stream, userDefinedBufferSize);
lock (checksums) {
checksums.TryAdd(filename, hash);
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
}
});
return new Dictionary<string, uint>(checksums);
}
private uint CalculateMurmurHash32(Stream stream, int userDefinedBufferSize) {
int bufferSize = userDefinedBufferSize;
const uint seed = 123456U;
var buffer = new byte[bufferSize];
uint hash = seed;
int bytesRead;
ReadOnlySpan<byte> span = buffer;
while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) {
hash = MurmurHash3.Hash32(bytes: span, seed: 123456U);
}
return hash;
}
private Dictionary<string, ulong> CalculateChecksumsWithXxHash3(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, ulong> checksums = new ConcurrentDictionary<string, ulong>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateXxHash3(stream, userDefinedBufferSize);
checksums.TryAdd(filename, hash);
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
});
return new Dictionary<string, ulong>(checksums);
}
private ulong CalculateXxHash3(Stream stream, int userDefinedBufferSize) {
int bufferSize = userDefinedBufferSize;
const ulong seed = 123456U;
var buffer = new byte[bufferSize];
ulong hash = seed;
int bytesRead;
while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) {
hash = xxHash3.ComputeHash(buffer, buffer.Length);
}
return hash;
}
public void doTheThing(string hashalgo, int bufferSize = 4096) {
ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
IDatabase db = redis.GetDatabase();
if (getTotalFileCount() < 1) {
logger.Information("There were no files to checksum");
return;
}
Dictionary<string, object> fileHashes;
Dictionary<string, ulong> fileHashesXxHash3;
Dictionary<string, uint> fileHashesMurmur;
Dictionary<string, string> fileHashesMD5;
switch (hashalgo) {
case "MD5":
fileHashesMD5 = CalculateChecksums(indexFiles());
fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "Murmur":
fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize);
fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "XxHash":
fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize);
fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
default:
logger.Error("No valid hash algorithm was selected");
throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash");
}
logger.Information("All files were checksummed");
HashEntry[] hashEntries = fileHashes.Select(kv => new HashEntry(kv.Key, kv.Value.ToString())).ToArray();
string hashKey = "fileHashes";
db.HashSet(hashKey, hashEntries);
logger.Information("Dictionary inserted into Redis.");
}
public void saveToSqlite() {
initializeDB();
ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
IDatabase db = redis.GetDatabase();
HashEntry[] fileHashes = db.HashGetAll("fileHashes");
logger.Information("Retrived all values from redis");
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
Directory.SetCurrentDirectory(directory); // Set new root
if (getFileCount() >= 1) {
DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory());
FileInfo[] files = dir.GetFiles();
foreach (FileInfo file in files) {
string fileName = file.Name;
string absolutePathToFile = Path.GetFullPath(fileName);
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile);
string fileHash = CalculateMD5(fileName);
foreach (var file in fileHashes) {
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) {
var absolutePathToFile = file.Name.ToString();
string fileName = Path.GetFileName(absolutePathToFile.ToString());
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile.ToString());
var fileHash = file.Value.ToString();
if (!checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) && !checkIfFileAlreadyExistsInDatabase(fileHash, fileName)) {
connection.Open();
var command = connection.CreateCommand();
command.CommandText =
var InsertCommand = connection.CreateCommand();
InsertCommand.CommandText =
@"
INSERT INTO file (filehash, filename, pathtofile)
VALUES ($filehash, $filename, $pathtofile)
";
command.Parameters.AddWithValue("$filehash", fileHash);
command.Parameters.AddWithValue("$filename", fileName);
command.Parameters.AddWithValue("$pathtofile", pathToFile);
command.ExecuteNonQuery();
InsertCommand.Parameters.AddWithValue("$filehash", fileHash);
InsertCommand.Parameters.AddWithValue("$filename", fileName);
InsertCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
InsertCommand.ExecuteNonQuery();
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully inserted into the database", fileName, pathToFile, fileHash);
}
}
}
doTheThing();
logger.Information("All filehashes were successfully inserted into the database");
var keys = db.Execute("KEYS", "*");
if (keys == null) {
logger.Error("No values found in redis");
return;
}
foreach (var key in (RedisValue[])keys) {
db.KeyDelete((RedisKey)key.ToString());
}
logger.Information("Redis was successfully cleared of any remaining data");
}
private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) {
@ -125,9 +308,11 @@ public class ChksumUtils {
pathtofile = reader.GetString(1);
}
}
logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash);
}
if (fileHash == filehash) {
logger.Verbose("File with filehash {filehash} already exists in the database", filehash);
doesExist = true;
}
return doesExist;
@ -165,16 +350,18 @@ public class ChksumUtils {
command2.Parameters.AddWithValue("$filehash", fileHash);
command2.ExecuteNonQuery();
Console.WriteLine("File moved:");
Console.WriteLine($"\tfrom\t{pathToFile}");
Console.WriteLine($"\tto \t{pathtofile}\n");
logger.Verbose("File moved or is a duplicate:\n\tfrom\t{pathToFile}\n\tto \t{pathtofile}\n", pathToFile, pathtofile);
wasMoved = true;
}
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash);
}
return wasMoved;
}
}
public void checkIfFileWasDeleted() {
saveToSqlite();
string pathToFile = string.Empty;
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
@ -190,7 +377,10 @@ public class ChksumUtils {
while (reader.Read()) {
pathToFile = reader.GetString(0);
if (!File.Exists(pathToFile)) {
if (File.Exists(pathToFile)) {
logger.Verbose("{pathToFile} exists", pathToFile);
continue;
}
var deleteCommand = connection.CreateCommand();
deleteCommand.CommandText =
@"
@ -200,11 +390,10 @@ public class ChksumUtils {
deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
deleteCommand.ExecuteNonQuery();
Console.WriteLine("File deleted:");
Console.WriteLine($"\t{pathToFile}\n");
}
logger.Information("File deleted:\n\t{pathToFile}", pathToFile);
}
}
logger.Information("All deleted files were successfully removed from the database");
}
}
@ -230,12 +419,20 @@ public class ChksumUtils {
}
}
logger.Debug("All filehashes were successfully retrived from the database");
return filehashesFromDatabase;
}
public void compareDatabases(string filePathToOtherDatabase) {
saveToSqlite();
if (!File.Exists(filePathToOtherDatabase)) {
logger.Error("No database could be found at {filePathToOtherDatabase}", filePathToOtherDatabase);
throw new Exception("No database could be found at " + filePathToOtherDatabase);
}
List<string> filesThatDoNotExistsInTheRemote = getFilehashesFromDatabase("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly").Except(getFilehashesFromDatabase("Data Source=" + filePathToOtherDatabase + ";Mode=ReadOnly")).ToList();
//List<string> filesThatDoNotExistsInTheOrigin = filehashesOfRemoteDatabase.Except(filehashesOfOriginDatabase).ToList();
foreach (string file in filesThatDoNotExistsInTheRemote) {
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly")) {
@ -256,13 +453,16 @@ public class ChksumUtils {
Console.WriteLine("File not found in remote:");
Console.WriteLine($"\t{filename}\n");
logger.Information("{filename} could not be found in the remote database", filename);
}
}
}
}
logger.Information("Compared both databases successfully");
}
public void cleanup() {
File.Delete(libraryPath);
logger.Information("Successfully deleted libe_sqlite3.so");
}
}