Skip to content

Commit

Permalink
wip
Browse files Browse the repository at this point in the history
  • Loading branch information
PaulHax committed Mar 27, 2024
1 parent 2553931 commit 690d3fe
Show file tree
Hide file tree
Showing 5 changed files with 190 additions and 106 deletions.
265 changes: 174 additions & 91 deletions packages/dicom/gdcm/image-sets-normalization.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ const std::set<std::string> PATIENT_TAGS = {
};

const std::set<std::string> STUDY_TAGS = {
"0020|000D", // Study Instance UID
"0008|0020", // Study Date
"0008|0030", // Study Time
"0008|1030", // Study Description
Expand All @@ -56,6 +57,7 @@ const std::set<std::string> NON_INSTANCE_TAGS = {
"0010|1010", // Patient's Age
"0010|1030", // Patient's Weight
"0010|21b0", // Additional Patient's History
"0020|000D", // Study Instance UID
"0008|0020", // Study Date
"0008|0030", // Study Time
"0008|1030", // Study Description
Expand All @@ -65,8 +67,14 @@ const std::set<std::string> NON_INSTANCE_TAGS = {
"0008|0060", // Modality
};

const std::string STUDY_INSTANCE_UID = "0020|000D";
const std::string SERIES_INSTANCE_UID = "0020|000e";

rapidjson::Value mapToJsonObj(const itk::DICOMTagReader::TagMapType &tags, rapidjson::Document::AllocatorType &allocator)
using File = std::string;
using TagMap = itk::DICOMTagReader::TagMapType;
using FileToTags = std::map<File, TagMap>;

rapidjson::Value mapToJsonObj(const TagMap &tags, rapidjson::Document::AllocatorType &allocator)
{
rapidjson::Value json(rapidjson::kObjectType);
for (const auto &[tag, value] : tags)
Expand Down Expand Up @@ -100,126 +108,181 @@ rapidjson::Value jsonFromTags(const itk::DICOMTagReader::TagMapType &tags, const
return mapToJsonObj(filteredTags, allocator);
}

int main(int argc, char *argv[])
FileToTags readTags(const std::vector<File> &files)
{
itk::wasm::Pipeline pipeline("image-sets-normalization", "Group DICOM files into image sets", argc, argv);

std::vector<std::string> files;
pipeline.add_option("--files", files, "DICOM files")->required()->check(CLI::ExistingFile)->type_size(1, -1)->type_name("INPUT_BINARY_FILE");

itk::wasm::OutputTextStream imageSets;
pipeline.add_option("image-sets", imageSets, "Image sets JSON")->required()->type_name("OUTPUT_JSON");

ITK_WASM_PARSE(pipeline);

std::vector<gdcm::Directory::FilenamesType> volumes;
gdcm::Scanner s;

const gdcm::Tag t1(0x0020, 0x000d); // Study Instance UID
const gdcm::Tag t2(0x0020, 0x000e); // Series Instance UID
const gdcm::Tag t3(0x0020, 0x0052); // Frame of Reference UID
const gdcm::Tag t4(0x0020, 0x0037); // Image Orientation (Patient)

s.AddTag(t1);
s.AddTag(t2);
s.AddTag(t3);
s.AddTag(t4);

bool b = s.Scan(files);
if (!b)
FileToTags fileToTags;
itk::DICOMTagReader tagReader;
for (const File &fileName : files)
{
std::cerr << "Scanner failed" << std::endl;
return EXIT_FAILURE;
if (!tagReader.CanReadFile(fileName))
{
std::cerr << "Could not read the input DICOM file: " << fileName << std::endl;
throw std::runtime_error("Could not read the input DICOM file: " + fileName);
}
tagReader.SetFileName(fileName);
const TagMap dicomTags = tagReader.ReadAllTags();
fileToTags[fileName] = dicomTags;
}
return fileToTags;
}

gdcm::DiscriminateVolume dv;
dv.ProcessIntoVolume(s);
using Volume = std::vector<const File>;
using Volumes = std::vector<Volume>;
using ImageSets = std::vector<Volumes>;

std::vector<gdcm::Directory::FilenamesType> sorted = dv.GetSortedFiles();
for (gdcm::Directory::FilenamesType &volume : sorted)
bool isSameVolume(const TagMap &volumeTags, const TagMap &fileTags)
{
const auto it1 = volumeTags.find(SERIES_INSTANCE_UID);
const auto it2 = fileTags.find(SERIES_INSTANCE_UID);
if (it1 == volumeTags.end() || it2 == fileTags.end())
{
volumes.push_back(volume);
return false;
}
return it1->second == it2->second;
}

std::vector<gdcm::Directory::FilenamesType> unsorted = dv.GetUnsortedFiles();
for (gdcm::Directory::FilenamesType fileGroups : unsorted)
Volumes groupByVolume(const FileToTags &fileToTags)
{
Volumes volumes;
for (const auto &[file, tags] : fileToTags)
{
volumes.push_back(fileGroups);
Volume *matchingVolume = nullptr;
for (Volume &volume : volumes)
{
const File fileInVolume = *volume.begin();
const TagMap volumeTags = fileToTags.at(fileInVolume);
if (isSameVolume(volumeTags, tags))
{
matchingVolume = &volume;
break;
}
}
if (matchingVolume)
{
matchingVolume->push_back(file);
}
else
{
Volume newVolume({file});
volumes.push_back(newVolume);
}
}
return volumes;
}

rapidjson::Document imageSetsJson(rapidjson::kArrayType);
rapidjson::Document::AllocatorType &allocator = imageSetsJson.GetAllocator();

itk::DICOMTagReader tagReader;
bool isSameImageSet(const TagMap &imageSetTags, const TagMap &volumeTags)
{
const auto it1 = imageSetTags.find(STUDY_INSTANCE_UID);
const auto it2 = volumeTags.find(STUDY_INSTANCE_UID);
if (it1 == imageSetTags.end() || it2 == volumeTags.end())
{
return false;
}
return it1->second == it2->second;
}

// read all tags for file
for (const auto &fileNames : volumes)
ImageSets groupByImageSet(const Volumes &volumes, const FileToTags &fileToTags)
{
ImageSets imageSets;
for (const Volume &volume : volumes)
{
itk::DICOMTagReader::TagMapType dicomTags; // series/study/patent tags are pulled from last file
rapidjson::Value instances(rapidjson::kObjectType);
for (const auto &fileName : fileNames)
File file = *volume.begin();
TagMap volumeTags = fileToTags.at(file);
Volumes *matchingImageSet = nullptr;
for (Volumes &volumes : imageSets)
{
if (!tagReader.CanReadFile(fileName))
const Volume volumeInImageSet = *volumes.begin();
File fileInImageSet = *volumeInImageSet.begin();
const TagMap imageSetTags = fileToTags.at(fileInImageSet);
if (isSameImageSet(imageSetTags, volumeTags))
{
std::cerr << "Could not read the input DICOM file: " << fileName << std::endl;
return EXIT_FAILURE;
matchingImageSet = &volumes;
break;
}
tagReader.SetFileName(fileName);
dicomTags = tagReader.ReadAllTags();
}
if (matchingImageSet)
{
matchingImageSet->push_back(volume);
}
else
{
Volumes newImageSet({volume});
imageSets.push_back(newImageSet);
}
}
return imageSets;
}

// filter out patient, study, series tags
itk::DICOMTagReader::TagMapType instanceTags;
for (const auto &[tag, value] : dicomTags)
rapidjson::Document toJson(const ImageSets &imageSets, const FileToTags &fileToTags)
{
rapidjson::Document imageSetsJson(rapidjson::kArrayType);
rapidjson::Document::AllocatorType &allocator = imageSetsJson.GetAllocator();
TagMap dicomTags;
for (const Volumes &volumes : imageSets)
{
rapidjson::Value seriesById(rapidjson::kObjectType);
for (const Volume &volume : volumes)
{
rapidjson::Value instances(rapidjson::kObjectType);
for (const File &file : volume)
{
if (NON_INSTANCE_TAGS.find(tag) == NON_INSTANCE_TAGS.end())
dicomTags = fileToTags.at(file);
// filter out patient, study, series tags
itk::DICOMTagReader::TagMapType instanceTags;
for (const auto &[tag, value] : dicomTags)
{
instanceTags[tag] = value;
if (NON_INSTANCE_TAGS.find(tag) == NON_INSTANCE_TAGS.end())
{
instanceTags[tag] = value;
}
}
rapidjson::Value instanceTagsJson = mapToJsonObj(instanceTags, allocator);
rapidjson::Value instance(rapidjson::kObjectType);
instance.AddMember("DICOM", instanceTagsJson, allocator);

rapidjson::Value fileNameValue;
fileNameValue.SetString(file.c_str(), file.size(), allocator);
rapidjson::Value imageFrame(rapidjson::kObjectType);
imageFrame.AddMember("ID", fileNameValue, allocator);
rapidjson::Value imageFrames(rapidjson::kArrayType);
imageFrames.PushBack(imageFrame, allocator);
instance.AddMember("ImageFrames", imageFrames, allocator);

// instance by UID under instances
TagMap::iterator it = dicomTags.find("0008|0018");
if (it == dicomTags.end())
{
std::cerr << "Instance UID not found in dicomTags" << std::endl;
throw std::runtime_error("Instance UID not found in dicomTags");
}
const auto tag = it->second;
rapidjson::Value instanceId;
instanceId.SetString(tag.c_str(), tag.size(), allocator);
instances.AddMember(instanceId, instance, allocator);
}
rapidjson::Value instanceTagsJson = mapToJsonObj(instanceTags, allocator);
rapidjson::Value instance(rapidjson::kObjectType);
instance.AddMember("DICOM", instanceTagsJson, allocator);
rapidjson::Value fileNameValue;
fileNameValue.SetString(fileName.c_str(), fileName.size(), allocator);
instance.AddMember("FileName", fileNameValue, allocator);

// instance by UID under instances
itk::DICOMTagReader::TagMapType::iterator it = dicomTags.find("0008|0018");
if (it == dicomTags.end())
{
std::cerr << "Instance UID not found in dicomTags" << std::endl;
return EXIT_FAILURE;
}
const auto tag = it->second;
rapidjson::Value instanceId;
instanceId.SetString(tag.c_str(), tag.size(), allocator);
instances.AddMember(instanceId, instance, allocator);
}

rapidjson::Value seriesTags = jsonFromTags(dicomTags, SERIES_TAGS, allocator);
rapidjson::Value series(rapidjson::kObjectType);
series.AddMember("DICOM", seriesTags, allocator);
series.AddMember("Instances", instances, allocator);
// series by ID object
itk::DICOMTagReader::TagMapType::iterator it = dicomTags.find("0020|000e");
if (it == dicomTags.end())
{
std::cerr << "Series UID not found in dicomTags" << std::endl;
return EXIT_FAILURE;
// Series
rapidjson::Value seriesTags = jsonFromTags(dicomTags, SERIES_TAGS, allocator);
rapidjson::Value series(rapidjson::kObjectType);
series.AddMember("DICOM", seriesTags, allocator);
series.AddMember("Instances", instances, allocator);

int volumeIndex = std::distance(volumes.begin(), std::find(volumes.begin(), volumes.end(), volume));
const std::string seriesId = dicomTags.at(SERIES_INSTANCE_UID) + '.' + std::to_string(volumeIndex);
rapidjson::Value seriesIdJson;
seriesIdJson.SetString(seriesId.c_str(), seriesId.size(), allocator);
seriesById.AddMember(seriesIdJson, series, allocator);
}
const auto tag = it->second;
rapidjson::Value seriesId;
seriesId.SetString(tag.c_str(), tag.size(), allocator);
rapidjson::Value seriesById(rapidjson::kObjectType);
seriesById.AddMember(seriesId, series, allocator);

rapidjson::Value imageSet(rapidjson::kObjectType);

// Patient
rapidjson::Value patient(rapidjson::kObjectType);
rapidjson::Value patientTags = jsonFromTags(dicomTags, PATIENT_TAGS, allocator);
patient.AddMember("DICOM", patientTags, allocator);
imageSet.AddMember("Patient", patient, allocator);

// Study
rapidjson::Value study(rapidjson::kObjectType);
rapidjson::Value studyTagsJson = jsonFromTags(dicomTags, STUDY_TAGS, allocator);
study.AddMember("DICOM", studyTagsJson, allocator);
Expand All @@ -228,11 +291,31 @@ int main(int argc, char *argv[])

imageSetsJson.PushBack(imageSet, allocator);
}
return imageSetsJson;
}

int main(int argc, char *argv[])
{
itk::wasm::Pipeline pipeline("image-sets-normalization", "Group DICOM files into image sets", argc, argv);

std::vector<std::string> files;
pipeline.add_option("--files", files, "DICOM files")->required()->check(CLI::ExistingFile)->type_size(1, -1)->type_name("INPUT_BINARY_FILE");

itk::wasm::OutputTextStream imageSetsOutput;
pipeline.add_option("image-sets", imageSetsOutput, "Image sets JSON")->required()->type_name("OUTPUT_JSON");

ITK_WASM_PARSE(pipeline);

const FileToTags fileToTags = readTags(files);
const Volumes volumes = groupByVolume(fileToTags);
const ImageSets imageSets = groupByImageSet(volumes, fileToTags);

rapidjson::Document imageSetsJson = toJson(imageSets, fileToTags);

rapidjson::StringBuffer stringBuffer;
rapidjson::Writer<rapidjson::StringBuffer> writer(stringBuffer);
imageSetsJson.Accept(writer);
imageSets.Get() << stringBuffer.GetString();
imageSetsOutput.Get() << stringBuffer.GetString();

return EXIT_SUCCESS;
}

Large diffs are not rendered by default.

Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ def test_one_series():
orientation_series[0],
]
image_sets = image_sets_normalization(out_of_order)
print(image_sets)
assert image_sets
instances = list(image_sets[0]["Study"]["Series"].values())[0]["Instances"].values()
sorted_files = [instance["FileName"] for instance in instances]
Expand All @@ -34,20 +35,20 @@ def test_one_series():
)


def test_two_series():
files = [
orientation_series[1],
orientation_series[2],
orientation_series[0],
mr_series[3],
mr_series[0],
mr_series[4],
mr_series[2],
mr_series[1],
]
assert files[0].exists()
image_sets = image_sets_normalization(files)
assert len(image_sets) == 2
# def test_two_series():
# files = [
# orientation_series[1],
# orientation_series[2],
# orientation_series[0],
# mr_series[3],
# mr_series[0],
# mr_series[4],
# mr_series[2],
# mr_series[1],
# ]
# assert files[0].exists()
# image_sets = image_sets_normalization(files)
# assert len(image_sets) == 2


# def test_strange_ct():
Expand Down
Empty file.

0 comments on commit 690d3fe

Please sign in to comment.