Skip to content

Commit

Permalink
[CREATE] Explorer Python Script
Browse files Browse the repository at this point in the history
  • Loading branch information
FMCalisto committed Sep 23, 2024
1 parent 011c5c5 commit a98eda0
Show file tree
Hide file tree
Showing 5 changed files with 433 additions and 65 deletions.
1 change: 0 additions & 1 deletion scripts/clean_tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ directories=(
"$dataset_multimodal_breast/tests/test005/"
"$dicom_images_breast/data/meta/pre/"
"$dicom_images_breast/data/meta/post/"
"$dicom_images_breast/data/mapping/"
"$dicom_images_breast/data/logs/"
)

Expand Down
100 changes: 100 additions & 0 deletions scripts/move_back_to_unexplored.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
#!/bin/bash
#
# Author: Francisco Maria Calisto
# Maintainer: Francisco Maria Calisto
# Email: francisco.calisto@tecnico.ulisboa.pt
# License: ACADEMIC & COMMERCIAL
# Created Date: 2024-09-22
# Revised Date: 2024-09-22 # Updated to reflect file limit via variable
# Version: 1.2 # Incremented version to reflect changes for variable-based file limit
# Status: Development
# Credits:
# - Carlos Santiago
# - Catarina Barata
# - Jacinto C. Nascimento
# - Diogo Araújo
# Usage: ./move_back_to_unexplored.sh
# Example: ./script/move_back_to_unexplored.sh
# Description: This script moves a limited number of DICOM files from the "checking" folder
# to the "unexplored" folder in the dataset-multimodal-breast repository. It includes error
# handling, logging, and batch file processing with a configurable file limit.

# Exit script on any command failure to ensure safe execution
set -e

# Define the home directory using the system's HOME environment variable
home="$HOME"

# Define the file limit for the number of DICOM files to move
FILE_LIMIT=10 # You can adjust this value to change the file limit

# Define source and destination directories (using realpath for absolute paths)
SRC_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/checking")"
DEST_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/unexplored")"
LOG_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/logs")"

# Ensure the logs directory exists, create it if necessary
if [ ! -d "$LOG_DIR" ]; then
mkdir -p "$LOG_DIR"
fi

# Create a timestamp for the log file to avoid overwriting previous logs
LOG_FILE="$LOG_DIR/move_back_$(date +'%Y%m%d_%H%M%S').log"

# Function to log messages to both the terminal and log file
log_message() {
echo "$1" | tee -a "$LOG_FILE"
}

# Function to validate directory existence
# Arguments:
# $1: Directory path to validate
# $2: Directory name for logging purposes
validate_directory() {
local dir="$1"
local dir_name="$2"

if [ ! -d "$dir" ]; then
log_message "Error: $dir_name directory $dir does not exist. Exiting."
exit 1
fi
}

# Function to move DICOM files from source to destination, respecting the file limit
# Arguments:
# $1: Source directory
# $2: Destination directory
# $3: File limit
move_files() {
local src="$1"
local dest="$2"
local limit="$3"
local count=0

log_message "Moving up to $limit DICOM files from $src to $dest..."

# Find and move only DICOM files (.dcm extension), limit to the specified number
find "$src" -type f -name "*.dcm" | while IFS= read -r file; do
if [ -f "$file" ] && (( count < limit )); then
mv "$file" "$dest"
log_message "$(date): Moved $file"
((count++))
fi
done

log_message "Moved $count DICOM files from $src to $dest."
}

# Main execution begins here

# Validate that both source and destination directories exist
validate_directory "$SRC_DIR" "Checking"
validate_directory "$DEST_DIR" "Unexplored"

# Move DICOM files from the "checking" folder to the "unexplored" folder, respecting the file limit
move_files "$SRC_DIR" "$DEST_DIR" "$FILE_LIMIT"

# Log completion message
log_message "File move operation completed successfully. Logs are saved to $LOG_FILE."

# End of script
94 changes: 56 additions & 38 deletions scripts/move_files.sh
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
# Email: francisco.calisto@tecnico.ulisboa.pt
# License: ACADEMIC & COMMERCIAL
# Created Date: 2024-09-21
# Revised Date: 2024-09-22 # Updated to reflect optimizations and improvements
# Version: 1.5 # Incremented version to reflect new logging location
# Revised Date: 2024-09-23 # Updated to reflect optimizations and improvements
# Version: 1.6 # Incremented version to reflect additional logging and optimizations
# Status: Development
# Credits:
# - Carlos Santiago
Expand All @@ -19,59 +19,75 @@
# inside the dataset-multimodal-breast repository. It handles large datasets by processing files in batches,
# offers parallelism for speed, checks disk space, and logs errors in the curation/logs folder.

# Exit script on any command failure
# Exit script on any command failure to ensure safe execution
set -e

# Define home directory using the system's HOME environment variable
# Define the home directory using the system's HOME environment variable
home="$HOME"

# Resolve the absolute paths for source, destination, and log directories
SRC_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/unexplored")"
DEST_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/checking")"
# Log file with timestamp to prevent overwriting previous logs
timestamp=$(date +"%Y%m%d_%H%M%S")
LOG_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/logs")"
LOG_FILE="$LOG_DIR/move_files_$timestamp.log"

# Ensure the logs directory exists, create if it doesn't
if [ ! -d "$LOG_DIR" ]; then
mkdir -p "$LOG_DIR"
fi

# Log file to capture details of file moves and errors
LOG_FILE="$LOG_DIR/move_files.log"
# Log the beginning of the script execution
echo "$(date): Starting move_files.sh script" >> "$LOG_FILE"

# Define the absolute paths for source and destination directories
SRC_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/unexplored")"
DEST_DIR="$(realpath "$home/Git/dataset-multimodal-breast/data/curation/checking")"

# Log the source and destination directories
echo "$(date): Source directory: $SRC_DIR" >> "$LOG_FILE"
echo "$(date): Destination directory: $DEST_DIR" >> "$LOG_FILE"

# Function to log both errors and standard messages
log_message() {
echo "$1"
echo "$(date): $1" >> "$LOG_FILE"
}

# Function to print error messages to stderr and log them
# Function to log errors, print to stderr, and log to the log file
# Arguments:
# $1: The error message to display and log
function print_error {
print_error() {
echo "$1" >&2 # Print the error message to stderr
echo "$(date): $1" >> "$LOG_FILE" # Append the error message to the log file
echo "$(date): ERROR: $1" >> "$LOG_FILE" # Log the error message to the log file
}

# Function to validate that a directory exists
# Function to validate the existence of a directory
# Arguments:
# $1: The path of the directory to validate
# $2: A friendly name for the directory (e.g., "Source", "Destination")
# $1: The directory path
# $2: Friendly name for the directory (e.g., "Source", "Destination")
validate_directory() {
local dir_path="$1" # Directory to check
local dir_name="$2" # Friendly name for logging and messages

# Check if the directory exists and is a valid directory
local dir_path="$1"
local dir_name="$2"
if [ ! -d "$dir_path" ]; then
print_error "$dir_name directory $dir_path does not exist. Exiting."
exit 1 # Exit the script if the directory doesn't exist
exit 1
else
log_message "$dir_name directory exists: $dir_path"
fi
}

# Function to check disk space before moving files
# Function to check if sufficient disk space is available before moving files
# Arguments:
# $1: The required minimum free space in kilobytes (e.g., 10485760 for 10GB)
check_disk_space() {
local required_space="$1"
local available_space=$(df "$DEST_DIR" | awk 'NR==2 {print $4}')

# Check if there is enough available space
# Check if the available disk space is sufficient
if (( available_space < required_space )); then
print_error "Not enough disk space. Available: ${available_space}KB, Required: ${required_space}KB. Exiting."
exit 1
else
log_message "Sufficient disk space available: ${available_space}KB"
fi
}

Expand All @@ -83,51 +99,53 @@ move_files_in_batches() {
local src="$1"
local dest="$2"
local count=0
local BATCH_SIZE=100 # Customize the batch size for optimal performance

echo "Moving files from $src to $dest in batches of $BATCH_SIZE..."
log_message "Moving files from $src to $dest in batches of $BATCH_SIZE..."

# Find all files in the source directory and move them in batches
find "$src" -type f | while IFS= read -r file; do
if [ -f "$file" ]; then
mv "$file" "$dest" # Move file to the destination
echo "$(date): Moved $file" >> "$LOG_FILE"
mv "$file" "$dest"
log_message "Moved file: $file"
((count++))
# Check if we've reached the batch size

# Log progress every batch
if (( count % BATCH_SIZE == 0 )); then
echo "Moved $count files so far..."
sleep 1 # Optional: Add a pause between batches to reduce system load
log_message "Moved $count files so far..."
sleep 1 # Add a short delay between batches to reduce system load
fi
fi
done
log_message "Finished moving files. Total files moved: $count"
}

# Function to check if the last operation (moving files) was successful
# Function to check if the move operation was successful
check_move_success() {
# $? holds the exit status of the last command (mv in this case)
if [ $? -eq 0 ]; then
echo "Files moved successfully."
log_message "File move operation completed successfully."
else
print_error "An error occurred while moving files."
exit 1 # Exit with an error status if something went wrong
print_error "An error occurred during the file move operation."
exit 1
fi
}

# Main script execution begins here

# Validate the existence of the source and destination directories
# Validate the source and destination directories
validate_directory "$SRC_DIR" "Source"
validate_directory "$DEST_DIR" "Destination"

# Check if there is enough disk space (assuming 10GB minimum required space)
# Check for sufficient disk space (assuming a minimum of 10GB required space)
check_disk_space 10485760 # 10GB in kilobytes

# Move files in batches from the source to the destination
# Move the files from the source to the destination in batches
move_files_in_batches "$SRC_DIR" "$DEST_DIR"

# Check if the move operation was successful
check_move_success

# Print a final message indicating that the script has completed successfully
echo "Operation complete. Logs can be found in $LOG_FILE."
# Final log message indicating that the script has completed
log_message "Operation complete. Logs saved in $LOG_FILE."

# End of script
Loading

0 comments on commit a98eda0

Please sign in to comment.