9 Commits

Author SHA1 Message Date
61e25685e5 Add build badge
Signed-off-by: Gitea <gitea@hopeless-cloud.xyz>
2023-07-06 17:17:17 +00:00
90706e7796 code cleanup
made garbage code less garbage
2023-07-04 22:39:59 +02:00
7b00a87620 Merge pull request 'feature/NewHashingAlgorithms' (#6) from feature/NewHashingAlgorithms into main
Reviewed-on: #6
2023-07-03 22:34:15 +00:00
86111732c2 Merge pull request 'Runtime dependency' (#5) from feature/BackwardsCompatibility into feature/NewHashingAlgorithms
Reviewed-on: #5
2023-07-03 22:20:25 +00:00
d7aa7a6d98 Runtime dependency 2023-07-04 00:18:09 +02:00
336eb5b73d Fix logging 2023-07-03 23:56:27 +02:00
b011f08172 feature/progressBar (#4)
# Add progress bar
Added a progress bar so that one can see how far along the program is.

Reviewed-on: #4
Co-authored-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
Co-committed-by: ProfessionalUwU <andre.fuhry@hopeless-cloud.xyz>
2023-07-03 21:50:38 +00:00
02a0bddd7e Major advancments
New options
Switched to wal (Write ahead log)
Now using redis to cache filehashes
New option to dump redis data into sqlite database
2023-07-03 00:59:50 +02:00
d80a5f5e6b Add option for choosing hash algo 2023-07-02 15:32:46 +02:00
4 changed files with 183 additions and 90 deletions

View File

@ -1,7 +1,20 @@
[![status-badge](https://woodpecker.hopeless-cloud.xyz/api/badges/ProfessionalUwU/chksum/status.svg)](https://woodpecker.hopeless-cloud.xyz/ProfessionalUwU/chksum)
# chksum # chksum
Checksums every file under the current directory Checksums every file under the current directory
## Runtime dependency
If you use version 2.0.0 or above you will need redis!
```bash
pacman -S redis && systemctl start redis
```
With this redis will be downloaded and started.
Don't forget to enable the service if you don't want to start it every time you run the program.
## Run Locally ## Run Locally
Clone the project Clone the project

View File

@ -22,6 +22,7 @@
<PackageReference Include="Serilog" Version="3.0.1" /> <PackageReference Include="Serilog" Version="3.0.1" />
<PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" /> <PackageReference Include="Serilog.Sinks.Console" Version="4.1.0" />
<PackageReference Include="Serilog.Sinks.File" Version="5.0.0" /> <PackageReference Include="Serilog.Sinks.File" Version="5.0.0" />
<PackageReference Include="StackExchange.Redis" Version="2.6.116" />
<PackageReference Include="Standart.Hash.xxHash" Version="4.0.5" /> <PackageReference Include="Standart.Hash.xxHash" Version="4.0.5" />
</ItemGroup> </ItemGroup>

View File

@ -8,7 +8,7 @@ public class Program {
Console.WriteLine("Please specify an option."); Console.WriteLine("Please specify an option.");
PrintAvailableOptions(); PrintAvailableOptions();
return; return;
} else if (args.Length > 1 && args[0] != "compareDatabases") { } else if (args.Length > 3) {
Console.WriteLine("Too many options."); Console.WriteLine("Too many options.");
return; return;
} }
@ -19,26 +19,41 @@ public class Program {
utils.ExtractEmbeddedLibrary(); utils.ExtractEmbeddedLibrary();
var option = args[0].ToLower();
Console.ForegroundColor = ConsoleColor.Green; Console.ForegroundColor = ConsoleColor.Green;
switch (args[0]) { switch (option) {
case "checksum": case "checksum":
Console.WriteLine("Starting the checksum process."); Console.WriteLine("Starting the checksum process.");
Console.ResetColor(); Console.ResetColor();
var hashAlgo = args[1].ToLower();
utils.doTheThing();
if (hashAlgo.Equals("md5")) {
utils.doTheThing(hashAlgo);
break;
}
try {
var bufferSize = int.Parse(args[2]);
utils.doTheThing(hashAlgo, bufferSize);
} catch (FormatException) {
Console.ForegroundColor = ConsoleColor.Red;
Console.WriteLine("Buffer was not a valid integer value. Please specify a valid integer value for the buffer size");
Console.ResetColor();
}
Console.ForegroundColor = ConsoleColor.Green; Console.ForegroundColor = ConsoleColor.Green;
Console.WriteLine("Checksum process finished"); Console.WriteLine("Checksum process finished");
break; break;
case "compareDatabases": case "savetosqlite":
Console.ResetColor();
utils.saveToSqlite();
break;
case "comparedatabases":
Console.ResetColor(); Console.ResetColor();
utils.compareDatabases(args[1]); utils.compareDatabases(args[1]);
break; break;
case "createDB": case "checkiffilewasdeleted":
utils.initializeDB();
break;
case "checkIfFileWasDeleted":
Console.ResetColor(); Console.ResetColor();
utils.checkIfFileWasDeleted(); utils.checkIfFileWasDeleted();
break; break;
@ -57,17 +72,16 @@ public class Program {
static void PrintAvailableOptions() { static void PrintAvailableOptions() {
String[] options = { String[] options = {
"checksum", "checksum - MD5, Murmur and XxHash - Default buffer size is 4096",
"compareDatabases", "compareDatabases",
"compareChecksums", "saveToSqlite",
"createDB",
"checkIfFileWasDeleted", "checkIfFileWasDeleted",
"help" "help"
}; };
Console.ResetColor(); Console.ResetColor();
Console.WriteLine("usage: chksum [option] \nHere is a list of all available options:"); Console.WriteLine("usage: chksum [option] \nHere is a list of all available options:");
foreach (String option in options) { foreach (var option in options) {
Console.WriteLine("\t" + option); Console.WriteLine("\t" + option);
} }
} }

View File

@ -6,6 +6,7 @@ using Serilog;
using Serilog.Events; using Serilog.Events;
using MurmurHash.Net; using MurmurHash.Net;
using Standart.Hash.xxHash; using Standart.Hash.xxHash;
using StackExchange.Redis;
namespace Chksum.Utils; namespace Chksum.Utils;
public class ChksumUtils { public class ChksumUtils {
@ -52,7 +53,7 @@ public class ChksumUtils {
} }
} }
public void initializeDB() { private void initializeDB() {
if (File.Exists("chksum.db")) { if (File.Exists("chksum.db")) {
logger.Information("A database already exits"); logger.Information("A database already exits");
return; return;
@ -73,6 +74,14 @@ public class ChksumUtils {
); );
"; ";
command.ExecuteNonQuery(); command.ExecuteNonQuery();
var walCommand = connection.CreateCommand();
walCommand.CommandText =
@"
PRAGMA journal_mode = 'wal'
";
walCommand.ExecuteNonQuery();
logger.Information("Database was successfully created"); logger.Information("Database was successfully created");
} }
} }
@ -89,9 +98,19 @@ public class ChksumUtils {
} }
} }
private void UpdateProgressBar(int current, int total) {
var progress = (int)((double)current / total * 100);
string progressText = $"Progress: {progress}% [{current}/{total}]";
Console.Write("\r" + progressText.PadRight(Console.WindowWidth));
}
private Dictionary<string, string> CalculateChecksums(string[] filenames) { private Dictionary<string, string> CalculateChecksums(string[] filenames) {
ConcurrentDictionary<string, string> checksums = new ConcurrentDictionary<string, string>(); ConcurrentDictionary<string, string> checksums = new ConcurrentDictionary<string, string>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => { Parallel.ForEach(filenames, (filename, state) => {
using (var md5 = MD5.Create()) { using (var md5 = MD5.Create()) {
using (var stream = File.OpenRead(filename)) { using (var stream = File.OpenRead(filename)) {
@ -102,31 +121,36 @@ public class ChksumUtils {
checksums.TryAdd(filename, checksum); checksums.TryAdd(filename, checksum);
} }
} }
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
} }
}); });
logger.Debug("All files were checksummed");
return new Dictionary<string, string>(checksums); return new Dictionary<string, string>(checksums);
} }
private Dictionary<string, uint> CalculateChecksumsWithMurmur(string[] filenames) { private Dictionary<string, uint> CalculateChecksumsWithMurmur(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, uint> checksums = new ConcurrentDictionary<string, uint>(); ConcurrentDictionary<string, uint> checksums = new ConcurrentDictionary<string, uint>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => { Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) { using (var stream = File.OpenRead(filename)) {
var hash = CalculateMurmurHash32(stream); var hash = CalculateMurmurHash32(stream, userDefinedBufferSize);
lock (checksums) { lock (checksums) {
checksums.TryAdd(filename, hash); checksums.TryAdd(filename, hash);
} }
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
} }
}); });
logger.Debug("All files were checksummed");
return new Dictionary<string, uint>(checksums); return new Dictionary<string, uint>(checksums);
} }
private uint CalculateMurmurHash32(Stream stream) { private uint CalculateMurmurHash32(Stream stream, int userDefinedBufferSize) {
const int bufferSize = 4096; int bufferSize = userDefinedBufferSize;
const uint seed = 123456U; const uint seed = 123456U;
var buffer = new byte[bufferSize]; var buffer = new byte[bufferSize];
@ -141,21 +165,26 @@ public class ChksumUtils {
return hash; return hash;
} }
private Dictionary<string, ulong> CalculateChecksumsWithXxHash3(string[] filenames) { private Dictionary<string, ulong> CalculateChecksumsWithXxHash3(string[] filenames, int userDefinedBufferSize) {
ConcurrentDictionary<string, ulong> checksums = new ConcurrentDictionary<string, ulong>(); ConcurrentDictionary<string, ulong> checksums = new ConcurrentDictionary<string, ulong>();
int totalFiles = filenames.Length;
int processedFiles = 0;
Parallel.ForEach(filenames, (filename, state) => { Parallel.ForEach(filenames, (filename, state) => {
using (var stream = File.OpenRead(filename)) { using (var stream = File.OpenRead(filename)) {
var hash = CalculateXxHash3(stream); var hash = CalculateXxHash3(stream, userDefinedBufferSize);
checksums.TryAdd(filename, hash); checksums.TryAdd(filename, hash);
} }
Interlocked.Increment(ref processedFiles);
UpdateProgressBar(processedFiles, totalFiles);
}); });
return new Dictionary<string, ulong>(checksums); return new Dictionary<string, ulong>(checksums);
} }
private ulong CalculateXxHash3(Stream stream) { private ulong CalculateXxHash3(Stream stream, int userDefinedBufferSize) {
const int bufferSize = 4096; int bufferSize = userDefinedBufferSize;
const ulong seed = 123456U; const ulong seed = 123456U;
var buffer = new byte[bufferSize]; var buffer = new byte[bufferSize];
@ -170,63 +199,96 @@ public class ChksumUtils {
return hash; return hash;
} }
public void doTheThing(string hashAlgo, int bufferSize = 4096) {
public void doTheThing(string hashalgo, int bufferSize) { ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { IDatabase db = redis.GetDatabase();
if (getTotalFileCount() < 1) {
logger.Information("There were no files to checksum");
return;
}
connection.Open();
Dictionary<string, object> fileHashes; if (getTotalFileCount() < 1) {
Dictionary<string, ulong> fileHashesXxHash3; logger.Information("There were no files to checksum");
Dictionary<string, uint> fileHashesMurmur; return;
Dictionary<string, string> fileHashesMD5;
switch (hashalgo) {
case "MD5":
fileHashesMD5 = CalculateChecksums(indexFiles());
fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "Murmur":
fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles());
fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "XxHash":
fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles());
fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
default:
logger.Error("No valid hash algorithm was selected");
throw new Exception($"{hashalgo} is not a valid option. Valid options are MD5, Murmur and XxHash");
}
foreach (var file in fileHashes) {
string absolutePathToFile = file.Key;
string fileName = Path.GetFileName(absolutePathToFile);
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile);
var fileHash = file.Value;
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) == false && checkIfFileAlreadyExistsInDatabase(fileHash, fileName) == false) {
var command = connection.CreateCommand();
command.CommandText =
@"
INSERT INTO file (filehash, filename, pathtofile)
VALUES ($filehash, $filename, $pathtofile)
";
command.Parameters.AddWithValue("$filehash", fileHash);
command.Parameters.AddWithValue("$filename", fileName);
command.Parameters.AddWithValue("$pathtofile", pathToFile);
command.ExecuteNonQuery();
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully inserted into the database", fileName, pathToFile, fileHash);
}
}
logger.Information("All files were successfully written to the database");
} }
Dictionary<string, object> fileHashes;
Dictionary<string, ulong> fileHashesXxHash3;
Dictionary<string, uint> fileHashesMurmur;
Dictionary<string, string> fileHashesMD5;
switch (hashAlgo) {
case "md5":
fileHashesMD5 = CalculateChecksums(indexFiles());
fileHashes = fileHashesMD5.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "murmur":
fileHashesMurmur = CalculateChecksumsWithMurmur(indexFiles(), bufferSize);
fileHashes = fileHashesMurmur.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
case "xxhash":
fileHashesXxHash3 = CalculateChecksumsWithXxHash3(indexFiles(), bufferSize);
fileHashes = fileHashesXxHash3.ToDictionary(kv => kv.Key, kv => (object)kv.Value);
break;
default:
logger.Error("No valid hash algorithm was selected");
throw new Exception($"{hashAlgo} is not a valid option. Valid options are MD5, Murmur and XxHash");
}
logger.Information("All files were checksummed");
HashEntry[] hashEntries = fileHashes.Select(kv => new HashEntry(kv.Key, kv.Value.ToString())).ToArray();
string hashKey = "fileHashes";
db.HashSet(hashKey, hashEntries);
logger.Information("Dictionary inserted into Redis.");
} }
private bool checkIfFileAlreadyExistsInDatabase(object fileHash, string pathToFile) { public void saveToSqlite() {
initializeDB();
ConnectionMultiplexer redis = ConnectionMultiplexer.Connect("localhost");
IDatabase db = redis.GetDatabase();
HashEntry[] fileHashes = db.HashGetAll("fileHashes");
logger.Information("Retrived all values from redis");
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
connection.Open();
foreach (var file in fileHashes) {
var absolutePathToFile = file.Name.ToString();
string fileName = Path.GetFileName(absolutePathToFile.ToString());
string pathToFile = Path.GetRelativePath(DatabaseRoot, absolutePathToFile.ToString());
var fileHash = file.Value.ToString();
if (checkIfFileMovedAndUpdatePathToFile(fileHash, fileName, pathToFile) || checkIfFileAlreadyExistsInDatabase(fileHash, fileName)) {
continue;
}
var InsertCommand = connection.CreateCommand();
InsertCommand.CommandText =
@"
INSERT INTO file (filehash, filename, pathtofile)
VALUES ($filehash, $filename, $pathtofile)
";
InsertCommand.Parameters.AddWithValue("$filehash", fileHash);
InsertCommand.Parameters.AddWithValue("$filename", fileName);
InsertCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
InsertCommand.ExecuteNonQuery();
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully inserted into the database", fileName, pathToFile, fileHash);
}
}
logger.Information("All filehashes were successfully inserted into the database");
var keys = db.Execute("KEYS", "*");
if (keys == null) {
logger.Error("No values found in redis");
return;
}
foreach (var key in (RedisValue[])keys) {
db.KeyDelete((RedisKey)key.ToString());
}
logger.Information("Redis was successfully cleared of any remaining data");
}
private bool checkIfFileAlreadyExistsInDatabase(string fileHash, string pathToFile) {
string filehash = string.Empty; string filehash = string.Empty;
string pathtofile = string.Empty; string pathtofile = string.Empty;
bool doesExist = false; bool doesExist = false;
@ -239,7 +301,7 @@ public class ChksumUtils {
@" @"
SELECT filehash, pathtofile FROM file WHERE filehash = $filehash SELECT filehash, pathtofile FROM file WHERE filehash = $filehash
"; ";
command.Parameters.AddWithValue("$filehash", fileHash.ToString()); command.Parameters.AddWithValue("$filehash", fileHash);
using (var reader = command.ExecuteReader()) { using (var reader = command.ExecuteReader()) {
while (reader.Read()) { while (reader.Read()) {
@ -247,17 +309,17 @@ public class ChksumUtils {
pathtofile = reader.GetString(1); pathtofile = reader.GetString(1);
} }
} }
logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash.ToString()); logger.Verbose("{pathToFile} with the hash {fileHash} was successfully loaded", pathToFile, fileHash);
} }
if (fileHash.ToString() == filehash) { if (fileHash == filehash) {
logger.Verbose("File with filehash {filehash} already exists in the database", filehash); logger.Verbose("File with filehash {filehash} already exists in the database", filehash);
doesExist = true; doesExist = true;
} }
return doesExist; return doesExist;
} }
private bool checkIfFileMovedAndUpdatePathToFile(object fileHash, string fileName, string pathToFile) { private bool checkIfFileMovedAndUpdatePathToFile(string fileHash, string fileName, string pathToFile) {
string pathtofile = string.Empty; string pathtofile = string.Empty;
bool wasMoved = false; bool wasMoved = false;
@ -269,7 +331,7 @@ public class ChksumUtils {
@" @"
SELECT pathtofile FROM file WHERE filehash = $filehash SELECT pathtofile FROM file WHERE filehash = $filehash
"; ";
command.Parameters.AddWithValue("$filehash", fileHash.ToString()); command.Parameters.AddWithValue("$filehash", fileHash);
using (var reader = command.ExecuteReader()) { using (var reader = command.ExecuteReader()) {
while (reader.Read()) { while (reader.Read()) {
@ -286,20 +348,21 @@ public class ChksumUtils {
WHERE filehash = $filehash WHERE filehash = $filehash
"; ";
command2.Parameters.AddWithValue("$newpathtofile", pathToFile); command2.Parameters.AddWithValue("$newpathtofile", pathToFile);
command2.Parameters.AddWithValue("$filehash", fileHash.ToString()); command2.Parameters.AddWithValue("$filehash", fileHash);
command2.ExecuteNonQuery(); command2.ExecuteNonQuery();
//Console.WriteLine("File moved or is a duplicate:"); logger.Verbose("File moved or is a duplicate:\n\tfrom\t{pathToFile}\n\tto \t{pathtofile}\n", pathToFile, pathtofile);
//Console.WriteLine($"\tfrom\t{pathToFile}");
//Console.WriteLine($"\tto \t{pathtofile}\n");
wasMoved = true; wasMoved = true;
} }
logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash.ToString()); logger.Verbose("{fileName} which is located at {pathToFile} relative to the database with the hash {fileHash} was successfully checked", fileName, pathToFile, fileHash);
} }
return wasMoved; return wasMoved;
} }
public void checkIfFileWasDeleted() { public void checkIfFileWasDeleted() {
saveToSqlite();
string pathToFile = string.Empty; string pathToFile = string.Empty;
using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) { using (var connection = new SqliteConnection("Data Source=" + DatabaseRoot + "chksum.db;Mode=ReadWrite")) {
@ -328,9 +391,7 @@ public class ChksumUtils {
deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile); deleteCommand.Parameters.AddWithValue("$pathtofile", pathToFile);
deleteCommand.ExecuteNonQuery(); deleteCommand.ExecuteNonQuery();
//Console.WriteLine("File deleted:"); logger.Information("File deleted:\n\t{pathToFile}", pathToFile);
//Console.WriteLine($"\t{pathToFile}\n");
logger.Information("File deleted: {pathToFile}", pathToFile);
} }
} }
logger.Information("All deleted files were successfully removed from the database"); logger.Information("All deleted files were successfully removed from the database");
@ -364,6 +425,9 @@ public class ChksumUtils {
} }
public void compareDatabases(string filePathToOtherDatabase) { public void compareDatabases(string filePathToOtherDatabase) {
saveToSqlite();
if (!File.Exists(filePathToOtherDatabase)) { if (!File.Exists(filePathToOtherDatabase)) {
logger.Error("No database could be found at {filePathToOtherDatabase}", filePathToOtherDatabase); logger.Error("No database could be found at {filePathToOtherDatabase}", filePathToOtherDatabase);
throw new Exception("No database could be found at " + filePathToOtherDatabase); throw new Exception("No database could be found at " + filePathToOtherDatabase);
@ -400,6 +464,7 @@ public class ChksumUtils {
public void cleanup() { public void cleanup() {
File.Delete(libraryPath); File.Delete(libraryPath);
logger.Debug("Successfully deleted libe_sqlite3.so"); logger.Information("Successfully deleted libe_sqlite3.so");
Console.ResetColor();
} }
} }