Skip to content

Commit

Permalink
Merge pull request #413 from openzim/no_crash_on_corrupted
Browse files Browse the repository at this point in the history
[CHECK] Do not continue with other checks if integrity check fails.
  • Loading branch information
veloman-yunkan authored Jun 14, 2024
2 parents d6c6a1b + 875e09a commit 1b2e64c
Show file tree
Hide file tree
Showing 4 changed files with 78 additions and 56 deletions.
3 changes: 2 additions & 1 deletion src/zimcheck/checks.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ void test_checksum(zim::Archive& archive, ErrorLogger& reporter) {
}
}

void test_integrity(const std::string& filename, ErrorLogger& reporter) {
bool test_integrity(const std::string& filename, ErrorLogger& reporter) {
reporter.infoMsg("[INFO] Verifying ZIM-archive structure integrity...");
zim::IntegrityCheckList checks;
checks.set(); // enable all checks (including checksum)
Expand All @@ -249,6 +249,7 @@ void test_integrity(const std::string& filename, ErrorLogger& reporter) {
if (!result) {
reporter.infoMsg(" [ERROR] ZIM file's low level structure is invalid");
}
return result;
}


Expand Down
2 changes: 1 addition & 1 deletion src/zimcheck/checks.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,7 +109,7 @@ class ErrorLogger {


void test_checksum(zim::Archive& archive, ErrorLogger& reporter);
void test_integrity(const std::string& filename, ErrorLogger& reporter);
bool test_integrity(const std::string& filename, ErrorLogger& reporter);
void test_metadata(const zim::Archive& archive, ErrorLogger& reporter);
void test_favicon(const zim::Archive& archive, ErrorLogger& reporter);
void test_mainpage(const zim::Archive& archive, ErrorLogger& reporter);
Expand Down
113 changes: 59 additions & 54 deletions src/zimcheck/zimcheck.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -266,70 +266,75 @@ int zimcheck (const std::vector<const char*>& args)
error.infoMsg("[INFO] Zimcheck version is " + std::string(VERSION));

//Test 0: Low-level ZIM-file structure integrity checks
if(enabled_tests.isEnabled(TestType::INTEGRITY))
test_integrity(filename, error);
bool should_run_full_test = true;
if(enabled_tests.isEnabled(TestType::INTEGRITY)) {
should_run_full_test = test_integrity(filename, error);
} else {
error.infoMsg("[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.");
}


// Does it make sense to do the other checks if the integrity
// check fails?
zim::Archive archive( filename );
error.addInfo("file_uuid", stringify(archive.getUuid()));
if (should_run_full_test) {
zim::Archive archive( filename );
error.addInfo("file_uuid", stringify(archive.getUuid()));

//Test 1: Internal Checksum
if(enabled_tests.isEnabled(TestType::CHECKSUM)) {
if ( enabled_tests.isEnabled(TestType::INTEGRITY) ) {
error.infoMsg(
"[INFO] Avoiding redundant checksum test"
" (already performed by the integrity check)."
);
} else {
test_checksum(archive, error);
//Test 1: Internal Checksum
if(enabled_tests.isEnabled(TestType::CHECKSUM)) {
if ( enabled_tests.isEnabled(TestType::INTEGRITY) ) {
error.infoMsg(
"[INFO] Avoiding redundant checksum test"
" (already performed by the integrity check)."
);
} else {
test_checksum(archive, error);
}
}
}

//Test 2: Metadata Entries:
//The file is searched for the compulsory metadata entries.
if(enabled_tests.isEnabled(TestType::METADATA))
test_metadata(archive, error);
//Test 2: Metadata Entries:
//The file is searched for the compulsory metadata entries.
if(enabled_tests.isEnabled(TestType::METADATA))
test_metadata(archive, error);

//Test 3: Test for Favicon.
if(enabled_tests.isEnabled(TestType::FAVICON))
test_favicon(archive, error);
//Test 3: Test for Favicon.
if(enabled_tests.isEnabled(TestType::FAVICON))
test_favicon(archive, error);


//Test 4: Main Page Entry
if(enabled_tests.isEnabled(TestType::MAIN_PAGE))
test_mainpage(archive, error);
//Test 4: Main Page Entry
if(enabled_tests.isEnabled(TestType::MAIN_PAGE))
test_mainpage(archive, error);

/* Now we want to avoid to loop on the tests but on the article.
*
* If we loop of the tests we will have :
*
* for (test: tests) {
* for(article: articles) {
* data = article->getData();
* ...
* }
* }
*
* And so we will get several the data of an article (and so decompression and so).
* By looping on the articles first, we have :
*
* for (article: articles) {
* data = article->getData();
* for (test: tests) {
* ...
* }
* }
*/
/* Now we want to avoid to loop on the tests but on the article.
*
* If we loop of the tests we will have :
*
* for (test: tests) {
* for(article: articles) {
* data = article->getData();
* ...
* }
* }
*
* And so we will get several the data of an article (and so decompression and so).
* By looping on the articles first, we have :
*
* for (article: articles) {
* data = article->getData();
* for (test: tests) {
* ...
* }
* }
*/

if ( enabled_tests.isEnabled(TestType::URL_INTERNAL) ||
enabled_tests.isEnabled(TestType::URL_EXTERNAL) ||
enabled_tests.isEnabled(TestType::REDUNDANT) ||
enabled_tests.isEnabled(TestType::EMPTY) )
test_articles(archive, error, progress, enabled_tests, thread_count);
if ( enabled_tests.isEnabled(TestType::URL_INTERNAL) ||
enabled_tests.isEnabled(TestType::URL_EXTERNAL) ||
enabled_tests.isEnabled(TestType::REDUNDANT) ||
enabled_tests.isEnabled(TestType::EMPTY) )
test_articles(archive, error, progress, enabled_tests, thread_count);

if ( enabled_tests.isEnabled(TestType::REDIRECT))
test_redirect_loop(archive, error);
if ( enabled_tests.isEnabled(TestType::REDIRECT))
test_redirect_loop(archive, error);
}

const bool overallStatus = error.overallStatus();
error.addInfo("status", overallStatus);
Expand Down
16 changes: 16 additions & 0 deletions test/zimcheck-test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -274,6 +274,7 @@ TEST(zimcheck, checksum_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Internal Checksum..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand All @@ -293,6 +294,7 @@ TEST(zimcheck, metadata_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Checking metadata..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand All @@ -312,6 +314,7 @@ TEST(zimcheck, favicon_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Searching for Favicon..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand All @@ -331,6 +334,7 @@ TEST(zimcheck, mainpage_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Searching for main page..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand All @@ -350,6 +354,7 @@ TEST(zimcheck, article_content_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand All @@ -373,6 +378,7 @@ TEST(zimcheck, redundant_articles_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[INFO] Searching for redundant articles..." "\n"
" Verifying Similar Articles for redundancies..." "\n"
Expand All @@ -394,6 +400,7 @@ TEST(zimcheck, redirect_loop_goodzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/good.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Checking for redirect loops..." "\n"
"[INFO] Overall Test Status: Pass" "\n"
"[INFO] Total time taken by zimcheck: <3 seconds." "\n"
Expand Down Expand Up @@ -505,6 +512,7 @@ TEST(zimcheck, bad_checksum)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/bad_checksum.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Internal Checksum..." "\n"
" [ERROR] Wrong Checksum in ZIM archive" "\n"
"[ERROR] Invalid checksum:" "\n"
Expand All @@ -528,6 +536,7 @@ TEST(zimcheck, metadata_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Checking metadata..." "\n"
"[ERROR] Metadata errors:" "\n"
" Missing mandatory metadata: Title" "\n"
Expand All @@ -553,6 +562,7 @@ TEST(zimcheck, favicon_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Searching for Favicon..." "\n"
"[ERROR] Favicon:" "\n"
" Favicon is missing" "\n"
Expand All @@ -574,6 +584,7 @@ TEST(zimcheck, mainpage_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Searching for main page..." "\n"
"[ERROR] Missing mainpage:" "\n"
" Main Page Index stored in Archive Header: 4294967295" "\n"
Expand All @@ -595,6 +606,7 @@ TEST(zimcheck, empty_items_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[ERROR] Empty articles:" "\n"
" Entry empty.html is empty" "\n"
Expand All @@ -616,6 +628,7 @@ TEST(zimcheck, internal_url_check_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[ERROR] Invalid internal links found:" "\n"
" The following links:" "\n"
Expand All @@ -642,6 +655,7 @@ TEST(zimcheck, external_url_check_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[ERROR] Invalid external links found:" "\n"
" http://a.io/pic.png is an external dependence in article external_image_http.html" "\n"
Expand All @@ -665,6 +679,7 @@ TEST(zimcheck, redundant_poorzimfile)
const std::string expected_stdout(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Verifying Articles' content..." "\n"
"[INFO] Searching for redundant articles..." "\n"
" Verifying Similar Articles for redundancies..." "\n"
Expand All @@ -688,6 +703,7 @@ TEST(zimcheck, redirect_loop_poorzimfile)
const std::string expected_output(
"[INFO] Checking zim file data/zimfiles/poor.zim" "\n"
"[INFO] Zimcheck version is " VERSION "\n"
"[WARNING] Integrity check is skipped. Any detected errors may in fact be due to corrupted/invalid data.\n"
"[INFO] Checking for redirect loops..." "\n"
"[ERROR] Redirect loop(s) exist:" "\n"
" Redirect loop exists from entry redirect_loop.html" "\n"
Expand Down

0 comments on commit 1b2e64c

Please sign in to comment.