Compare commits

...

25 Commits
v1.0.0 ... main

Author SHA1 Message Date
61e25685e5
Add build badge
Signed-off-by: Gitea <gitea@hopeless-cloud.xyz>
2023-07-06 17:17:17 +00:00
90706e7796
code cleanup
made garbage code less garbage
2023-07-04 22:39:59 +02:00
7b00a87620
Merge pull request 'feature/NewHashingAlgorithms' (#6) from feature/NewHashingAlgorithms into main
Reviewed-on: #6
2023-07-03 22:34:15 +00:00
86111732c2
Merge pull request 'Runtime dependency' (#5) from feature/BackwardsCompatibility into feature/NewHashingAlgorithms
Reviewed-on: #5
2023-07-03 22:20:25 +00:00
d7aa7a6d98
Runtime dependency 2023-07-04 00:18:09 +02:00
336eb5b73d
Fix logging 2023-07-03 23:56:27 +02:00
b011f08172
feature/progressBar (#4)
# Add progress bar
Added a progress bar so that one can see how far along the program is.

Reviewed-on: #4
Co-authored-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
Co-committed-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
2023-07-03 21:50:38 +00:00
02a0bddd7e
Major advancments
New options
Switched to wal (Write ahead log)
Now using redis to cache filehashes
New option to dump redis data into sqlite database
2023-07-03 00:59:50 +02:00
d80a5f5e6b
Add option for choosing hash algo 2023-07-02 15:32:46 +02:00
9d6b1385c8
Add new hashing algorithms
Add XxHash
Code cleanup
2023-07-02 15:20:13 +02:00
f65108425c
New hashing algorithm 2023-06-29 19:53:00 +02:00
f9cdac5f92
Merge pull request 'feature/logging' (#3) from feature/logging into main
Reviewed-on: #3
2023-06-29 17:01:40 +00:00
e99ca810f6
Add instructions for verbose output 2023-06-29 03:58:28 +02:00
184e525adb
Add logging 2023-06-29 03:49:13 +02:00
1ee5114dab
Merge pull request 'feature/multi-threading' (#2) from feature/multi-threading into main
Reviewed-on: #2
2023-06-29 00:15:11 +00:00
f403b77864
Code cleanup 2023-06-29 02:03:41 +02:00
42320ebf8d
Use ConcurrentDictionary 2023-06-28 13:18:33 +02:00
787721381d
refactor doTheThing 2023-06-28 00:54:55 +02:00
7d7e9bac6c
refactor checkIfFileWasDeleted 2023-06-28 00:54:30 +02:00
2b4019d7cf
move return outside of using 2023-06-28 00:48:14 +02:00
be8180a60d
refactor doTheThing 2023-06-28 00:41:38 +02:00
7f6f4c5253
Streamline code 2023-06-27 23:56:43 +02:00
e12117fba8
Small fix 2023-06-27 23:12:50 +02:00
763cde4e2d
Use dictionary 2023-06-27 22:58:07 +02:00
531a4676e9
First implementation of parralel checksumming 2023-06-27 22:31:24 +02:00
5 changed files with 319 additions and 68 deletions

View File

@ -1,7 +1,20 @@
[![status-badge](https://woodpecker.hopeless-cloud.xyz/api/badges/ProfessionalUwU/chksum/status.svg)](https://woodpecker.hopeless-cloud.xyz/ProfessionalUwU/chksum)
# chksum
Checksums every file under the current directory
## Runtime dependency
If you use version 2.0.0 or above you will need redis!
```bash
pacman -S redis && systemctl start redis
```
With this redis will be downloaded and started.
Don't forget to enable the service if you don't want to start it every time you run the program.
## Run Locally
Clone the project
@ -43,4 +56,19 @@ Run executable
```bash
./Chksum
```
```
## Enabling verbose output for troubleshooting
1. Open the file called chksum.cs with your editor of choice.
2. At the top there will be the logger configuration which you can change. Should look like this.
```cs
private ILogger logger = new LoggerConfiguration()
.MinimumLevel.Debug()
.WriteTo.Console(restrictedToMinimumLevel: LogEventLevel.Error)
.WriteTo.File("chksum.log")
.CreateLogger();
```
3. Change the minimum level of the logger to Verbose.
4. Compile the program
5. Profit. Now you will be able to see how what the program is doing in detail.

View File

@ -18,6 +18,12 @@
<ItemGroup>
<PackageReference Include="Microsoft.Data.Sqlite" Version="7.0.8" />
<PackageReference Include="MurmurHash.Net" Version="0.0.2" />
<PackageReference Include="Serilog" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
<PackageReference Include="StackExchange.Redis" Version="2.6.116" />
<PackageReference Include="Standart.Hash.xxHash" Version="4.0.5" />
</ItemGroup>
</Project>

View File

@ -8,7 +8,7 @@ public class Program {
Console.WriteLine("Please specify an option.");
PrintAvailableOptions();
return;
} else if (args.Length > 1 && args[0] != "compareDatabases") {
} else if (args.Length > 3) {
Console.WriteLine("Too many options.");
return;
}
@ -19,26 +19,41 @@ public class Program {
utils.ExtractEmbeddedLibrary();
var option = args[0].ToLower();
Console.ForegroundColor = ConsoleColor.Green;
switch (args[0]) {
switch (option) {
case "checksum":
Console.WriteLine("Starting the checksum process.");
Console.ResetColor();
utils.doTheThing();
var hashAlgo = args[1].ToLower();
if (hashAlgo.Equals("md5")) {
utils.doTheThing(hashAlgo);
break;
}
try {
var bufferSize = int.Parse(args[2]);
utils.doTheThing(hashAlgo, bufferSize);
} catch (FormatException) {
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Buffer was not a valid integer value. Please specify a valid integer value for the buffer size");
Console.ResetColor();
}
Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Checksum process finished");
break;
case "compareDatabases":
case "savetosqlite":
Console.ResetColor();
utils.saveToSqlite();
break;
case "comparedatabases":
Console.ResetColor();
utils.compareDatabases(args[1]);
break;
case "createDB":
utils.initializeDB();
break;
case "checkIfFileWasDeleted":
case "checkiffilewasdeleted":
Console.ResetColor();
utils.checkIfFileWasDeleted();
break;
@ -57,16 +72,16 @@ public class Program {
static void PrintAvailableOptions() {
String[] options = {
"checksum",
"compareChecksums",
"createDB",
"checksum - MD5, Murmur and XxHash - Default buffer size is 4096",
"compareDatabases",
"saveToSqlite",
"checkIfFileWasDeleted",
"help"
};
Console.ResetColor();
Console.WriteLine("usage: chksum [option] \nHere is a list of all available options:");
foreach (String option in options) {
foreach (var option in options) {
Console.WriteLine("\t" + option);
}
}

View File

@ -1,16 +1,39 @@
using System.Collections.Concurrent;
using System.Reflection;
using System.Security.Cryptography;
using Microsoft.Data.Sqlite;
using Serilog;
using Serilog.Events;
using MurmurHash.Net;
using Standart.Hash.xxHash;
using StackExchange.Redis;
namespace Chksum.Utils;
public class ChksumUtils {
private ILogger logger = new LoggerConfiguration()
.MinimumLevel.Debug()
.WriteTo.Console(restrictedToMinimumLevel: LogEventLevel.Error)
.WriteTo.File("chksum.log")
.CreateLogger();
private int getFileCount() {
int fileCount = Directory.GetFiles(Directory.GetCurrentDirectory()).Length; // Get file count in current directory
return fileCount;
private int getTotalFileCount() {
int totalFileCount = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories).Length;
logger.Debug("Total file count is {totalFileCount}", totalFileCount);
return totalFileCount - 4; // Remove the program, datbase, log and library from the totalFileCount
}
private string[] indexFiles() {
string[] indexedFiles = Directory.GetFiles(Directory.GetCurrentDirectory(), "*", SearchOption.AllDirectories);
string[] filesToExclude = { "Chksum", "chksum.db", "libe_sqlite3.so" };
indexedFiles = indexedFiles.Where(file => !filesToExclude.Contains(Path.GetFileName(file))).ToArray();
logger.Information("All files were indexed");
return indexedFiles;
}
public string DatabaseRoot { get; set; } = string.Empty;
public void getBaseDir() {
DatabaseRoot = AppDomain.CurrentDomain.BaseDirectory;
logger.Debug("DatabaseRoot is {DatabaseRoot}", DatabaseRoot);
}
public string libraryPath { get; set; } = string.Empty;
@ -22,14 +45,17 @@ public class ChksumUtils {
byte[] buffer = new byte[resourceStream.Length];
resourceStream.Read(buffer, 0, buffer.Length);
File.WriteAllBytes(libraryPath, buffer);
logger.Debug("libe_sqlite3.so was successfully created");
} else {
logger.Error("libe_sqlite3.so could not be loaded");
throw new Exception(libraryPath + " could not be loaded");
}
}
}
public void initializeDB() {
private void initializeDB() {
if (File.Exists("chksum.db")) {
logger.Information("A database already exits");
return;
}
@ -48,6 +74,15 @@ public class ChksumUtils {
);
";
command.ExecuteNonQuery();
var walCommand = connection.CreateCommand();
walCommand.CommandText =
@"
PRAGMA journal_mode = 'wal'
";
walCommand.ExecuteNonQuery();
logger.Information("Database was successfully created");
}
}
@ -59,49 +94,198 @@ public class ChksumUtils {
vacuum;
";
command.ExecuteNonQuery();
logger.Debug("Database was successfully vacuumed");
}
}
private string CalculateMD5(string filename) {
using (var md5 = System.Security.Cryptography.MD5.Create()) {
using (var stream = File.OpenRead(filename)) {
var hash = md5.ComputeHash(stream);
return BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant();
}
}
private void UpdateProgressBar(int current, int total) {
var progress = (int)((double)current / total * 100);
string progressText = $"Progress: {progress}% [{current}/{total}]";
Console.Write("\r" + progressText.PadRight(Console.WindowWidth));
}
public void doTheThing() {
foreach (var directory in Directory.GetDirectories(Directory.GetCurrentDirectory()))
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
Directory.SetCurrentDirectory(directory); // Set new root
if (getFileCount() >= 1) {
DirectoryInfo dir = new DirectoryInfo(Directory.GetCurrentDirectory());
FileInfo[] files = dir.GetFiles();
foreach (FileInfo file in files) {
string fileName = file.Name;
string absolutePathToFile = Path.GetFullPath(fileName);
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile);
string fileHash = CalculateMD5(fileName);
private Dictionary<string, string> CalculateChecksums(string[] filenames) {
ConcurrentDictionary<string, string> checksums = new ConcurrentDictionary<string, string>();
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) {
connection.Open();
int totalFiles = filenames.Length;
int processedFiles = 0;
var command = connection.CreateCommand();
command.CommandText =
@"
INSERT INTO file (filehash, filename, pathtofile)
VALUES ($filehash, $filename, $pathtofile)
";
command.Parameters.AddWithValue("$filehash", fileHash);
command.Parameters.AddWithValue("$filename", fileName);
command.Parameters.AddWithValue("$pathtofile", pathToFile);
command.ExecuteNonQuery();
Parallel.ForEach(filenames, (filename, state) => {
using (var md5 = MD5.Create()) {
using (var stream = File.OpenRead(filename)) {
var hash = md5.ComputeHash(stream);
var checksum = BitConverter.ToString(hash).Replace("-", "").ToLowerInvariant();
lock (checksums) {
checksums.TryAdd(filename, checksum);
}
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
}
doTheThing();
});
return new Dictionary<string, string>(checksums);
}
private Dictionary<string, uint> CalculateChecksumsWithMurmur(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, uint> checksums = new ConcurrentDictionary<string, uint>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateMurmurHash32(stream, userDefinedBufferSize);
lock (checksums) {
checksums.TryAdd(filename, hash);
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
}
});
return new Dictionary<string, uint>(checksums);
}
private uint CalculateMurmurHash32(Stream stream, int userDefinedBufferSize) {
int bufferSize = userDefinedBufferSize;
const uint seed = 123456U;
var buffer = new byte[bufferSize];
uint hash = seed;
int bytesRead;
ReadOnlySpan<byte> span = buffer;
while ((bytesRead = stream.Read(buffer, 0, bufferSize)) > 0) {
hash = MurmurHash3.Hash32(bytes: span, seed: 123456U);
}
return hash;
}
private Dictionary<string, ulong> CalculateChecksumsWithXxHash3(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, ulong> checksums = new ConcurrentDictionary<string, ulong>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) {
var hash = CalculateXxHash3(stream, userDefinedBufferSize);
checksums.TryAdd(filename, hash);
}
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
});
return new Dictionary<string, ulong>(checksums);
}
private ulong CalculateXxHash3(Stream stream, int userDefinedBufferSize) {
int bufferSize = userDefinedBufferSize;
const ulong seed = 123456U;
var buffer = new byte[bufferSize];
ulong hash = seed;
int bytesRead;
while ((bytesRead = stream.Read(buffer, 0, buffer.Length)) > 0) {
hash = xxHash3.ComputeHash(buffer, buffer.Length);
}
return hash;
}
public void doTheThing(string hashAlgo, int bufferSize = 4096) {
ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
IDatabase db = redis.GetDatabase();
if (getTotalFileCount() < 1) {
logger.Information("There were no files to checksum");
return;
}
Dictionary<string, object> fileHashes;
Dictionary<string, ulong> fileHashesXxHash3;
Dictionary<string, uint> fileHashesMurmur;
Dictionary<string, string> fileHashesMD5;
switch (hashAlgo) {
case "md5":
fileHashesMD5 = CalculateChecksums(indexFiles());
fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "murmur":
fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize);
fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "xxhash":
fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize);
fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
default:
logger.Error("No valid hash algorithm was selected");
throw new Exception($"{hashAlgo} is not a valid option. Valid options are MD5, Murmur and XxHash");
}
logger.Information("All files were checksummed");
HashEntry[] hashEntries = fileHashes.Select(kv => new HashEntry(kv.Key, kv.Value.ToString())).ToArray();
string hashKey = "fileHashes";
db.HashSet(hashKey, hashEntries);
logger.Information("Dictionary inserted into Redis.");
}
public void saveToSqlite() {
initializeDB();
ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
IDatabase db = redis.GetDatabase();
HashEntry[] fileHashes = db.HashGetAll("fileHashes");
logger.Information("Retrived all values from redis");
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
connection.Open();
foreach (var file in fileHashes) {
var absolutePathToFile = file.Name.ToString();
string fileName = Path.GetFileName(absolutePathToFile.ToString());
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile.ToString());
var fileHash = file.Value.ToString();
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) || checkIfFileAlreadyExistsInDatabase(fileHash, fileName)) {
continue;
}
var InsertCommand = connection.CreateCommand();
InsertCommand.CommandText =
@"
INSERT INTO file (filehash, filename, pathtofile)
VALUES ($filehash, $filename, $pathtofile)
";
InsertCommand.Parameters.AddWithValue("$filehash", fileHash);
InsertCommand.Parameters.AddWithValue("$filename", fileName);
InsertCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
InsertCommand.ExecuteNonQuery();
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully inserted into the database", fileName, pathToFile, fileHash);
}
}
logger.Information("All filehashes were successfully inserted into the database");
var keys = db.Execute("KEYS", "*");
if (keys == null) {
logger.Error("No values found in redis");
return;
}
foreach (var key in (RedisValue[])keys) {
db.KeyDelete((RedisKey)key.ToString());
}
logger.Information("Redis was successfully cleared of any remaining data");
}
private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) {
@ -125,9 +309,11 @@ public class ChksumUtils {
pathtofile = reader.GetString(1);
}
}
logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash);
}
if (fileHash == filehash) {
logger.Verbose("File with filehash {filehash} already exists in the database", filehash);
doesExist = true;
}
return doesExist;
@ -165,16 +351,18 @@ public class ChksumUtils {
command2.Parameters.AddWithValue("$filehash", fileHash);
command2.ExecuteNonQuery();
Console.WriteLine("File moved:");
Console.WriteLine($"\tfrom\t{pathToFile}");
Console.WriteLine($"\tto \t{pathtofile}\n");
logger.Verbose("File moved or is a duplicate:\n\tfrom\t{pathToFile}\n\tto \t{pathtofile}\n", pathToFile, pathtofile);
wasMoved = true;
}
return wasMoved;
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash);
}
return wasMoved;
}
public void checkIfFileWasDeleted() {
saveToSqlite();
string pathToFile = string.Empty;
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
@ -190,27 +378,29 @@ public class ChksumUtils {
while (reader.Read()) {
pathToFile = reader.GetString(0);
if (!File.Exists(pathToFile)) {
var deleteCommand = connection.CreateCommand();
deleteCommand.CommandText =
@"
DELETE FROM file
WHERE pathtofile = $pathtofile
";
deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
deleteCommand.ExecuteNonQuery();
Console.WriteLine("File deleted:");
Console.WriteLine($"\t{pathToFile}\n");
if (File.Exists(pathToFile)) {
logger.Verbose("{pathToFile} exists", pathToFile);
continue;
}
var deleteCommand = connection.CreateCommand();
deleteCommand.CommandText =
@"
DELETE FROM file
WHERE pathtofile = $pathtofile
";
deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
deleteCommand.ExecuteNonQuery();
logger.Information("File deleted:\n\t{pathToFile}", pathToFile);
}
}
logger.Information("All deleted files were successfully removed from the database");
}
}
private List<string> getFilehashesFromDatabase(string connectionString) {
List<string> filehashesFromDatabase = new List<string>();
using (var connection = new SqliteConnection(connectionString)) {
string filehash = string.Empty;
@ -230,12 +420,20 @@ public class ChksumUtils {
}
}
logger.Debug("All filehashes were successfully retrived from the database");
return filehashesFromDatabase;
}
public void compareDatabases(string filePathToOtherDatabase) {
saveToSqlite();
if (!File.Exists(filePathToOtherDatabase)) {
logger.Error("No database could be found at {filePathToOtherDatabase}", filePathToOtherDatabase);
throw new Exception("No database could be found at " + filePathToOtherDatabase);
}
List<string> filesThatDoNotExistsInTheRemote = getFilehashesFromDatabase("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly").Except(getFilehashesFromDatabase("Data Source=" + filePathToOtherDatabase + ";Mode=ReadOnly")).ToList();
//List<string> filesThatDoNotExistsInTheOrigin = filehashesOfRemoteDatabase.Except(filehashesOfOriginDatabase).ToList();
foreach (string file in filesThatDoNotExistsInTheRemote) {
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadOnly")) {
@ -256,13 +454,17 @@ public class ChksumUtils {
Console.WriteLine("File not found in remote:");
Console.WriteLine($"\t{filename}\n");
logger.Information("{filename} could not be found in the remote database", filename);
}
}
}
}
logger.Information("Compared both databases successfully");
}
public void cleanup() {
File.Delete(libraryPath);
logger.Information("Successfully deleted libe_sqlite3.so");
Console.ResetColor();
}
}