Orthanc/OrthancFramework/Sources/DicomFormat/DicomStreamReader.cpp
2025-06-23 19:07:37 +05:30

744 lines
22 KiB
C++

/**
* Orthanc - A Lightweight, RESTful DICOM Store
* Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
* Department, University Hospital of Liege, Belgium
* Copyright (C) 2017-2023 Osimis S.A., Belgium
* Copyright (C) 2024-2025 Orthanc Team SRL, Belgium
* Copyright (C) 2021-2025 Sebastien Jodogne, ICTEAM UCLouvain, Belgium
*
* This program is free software: you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public License
* as published by the Free Software Foundation, either version 3 of
* the License, or (at your option) any later version.
*
* This program is distributed in the hope that it will be useful, but
* WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with this program. If not, see
* <http://www.gnu.org/licenses/>.
**/
#include "../PrecompiledHeaders.h"
#include "DicomStreamReader.h"
#include "../OrthancException.h"
#include <cassert>
#include <sstream>
#include <boost/iostreams/device/array.hpp>
#include <boost/iostreams/stream.hpp>
#include <iostream>
namespace Orthanc
{
static bool IsNormalizationNeeded(const std::string& source,
ValueRepresentation vr)
{
return (!source.empty() &&
(source[source.size() - 1] == ' ' ||
source[source.size() - 1] == '\0') &&
// Normalization only applies to string-based VR
(vr == ValueRepresentation_ApplicationEntity ||
vr == ValueRepresentation_AgeString ||
vr == ValueRepresentation_CodeString ||
vr == ValueRepresentation_DecimalString ||
vr == ValueRepresentation_IntegerString ||
vr == ValueRepresentation_LongString ||
vr == ValueRepresentation_LongText ||
vr == ValueRepresentation_PersonName ||
vr == ValueRepresentation_ShortString ||
vr == ValueRepresentation_ShortText ||
vr == ValueRepresentation_UniqueIdentifier ||
vr == ValueRepresentation_UnlimitedText));
}
static void NormalizeValue(std::string& inplace,
ValueRepresentation vr)
{
if (IsNormalizationNeeded(inplace, vr))
{
assert(!inplace.empty());
inplace.resize(inplace.size() - 1);
}
}
static uint16_t ReadUnsignedInteger16(const char* dicom,
bool littleEndian)
{
const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom);
if (littleEndian)
{
return (static_cast<uint16_t>(p[0]) |
(static_cast<uint16_t>(p[1]) << 8));
}
else
{
return (static_cast<uint16_t>(p[1]) |
(static_cast<uint16_t>(p[0]) << 8));
}
}
static uint32_t ReadUnsignedInteger32(const char* dicom,
bool littleEndian)
{
const uint8_t* p = reinterpret_cast<const uint8_t*>(dicom);
if (littleEndian)
{
return (static_cast<uint32_t>(p[0]) |
(static_cast<uint32_t>(p[1]) << 8) |
(static_cast<uint32_t>(p[2]) << 16) |
(static_cast<uint32_t>(p[3]) << 24));
}
else
{
return (static_cast<uint32_t>(p[3]) |
(static_cast<uint32_t>(p[2]) << 8) |
(static_cast<uint32_t>(p[1]) << 16) |
(static_cast<uint32_t>(p[0]) << 24));
}
}
static DicomTag ReadTag(const char* dicom,
bool littleEndian)
{
return DicomTag(ReadUnsignedInteger16(dicom, littleEndian),
ReadUnsignedInteger16(dicom + 2, littleEndian));
}
static bool IsShortExplicitTag(ValueRepresentation vr)
{
/**
* Are we in the case of Table 7.1-2? "Data Element with
* Explicit VR of AE, AS, AT, CS, DA, DS, DT, FL, FD, IS, LO,
* LT, PN, SH, SL, SS, ST, TM, UI, UL and US"
* http://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_7.html#sect_7.1.2
**/
return (vr == ValueRepresentation_ApplicationEntity /* AE */ ||
vr == ValueRepresentation_AgeString /* AS */ ||
vr == ValueRepresentation_AttributeTag /* AT */ ||
vr == ValueRepresentation_CodeString /* CS */ ||
vr == ValueRepresentation_Date /* DA */ ||
vr == ValueRepresentation_DecimalString /* DS */ ||
vr == ValueRepresentation_DateTime /* DT */ ||
vr == ValueRepresentation_FloatingPointSingle /* FL */ ||
vr == ValueRepresentation_FloatingPointDouble /* FD */ ||
vr == ValueRepresentation_IntegerString /* IS */ ||
vr == ValueRepresentation_LongString /* LO */ ||
vr == ValueRepresentation_LongText /* LT */ ||
vr == ValueRepresentation_PersonName /* PN */ ||
vr == ValueRepresentation_ShortString /* SH */ ||
vr == ValueRepresentation_SignedLong /* SL */ ||
vr == ValueRepresentation_SignedShort /* SS */ ||
vr == ValueRepresentation_ShortText /* ST */ ||
vr == ValueRepresentation_Time /* TM */ ||
vr == ValueRepresentation_UniqueIdentifier /* UI */ ||
vr == ValueRepresentation_UnsignedLong /* UL */ ||
vr == ValueRepresentation_UnsignedShort /* US */);
}
bool DicomStreamReader::IsLittleEndian() const
{
return (transferSyntax_ != DicomTransferSyntax_BigEndianExplicit);
}
void DicomStreamReader::HandlePreamble(IVisitor& visitor,
const std::string& block)
{
assert(block.size() == 144u);
assert(reader_.GetProcessedBytes() == 144u);
/**
* The "DICOM file meta information" is always encoded using
* "Explicit VR Little Endian Transfer Syntax"
* http://dicom.nema.org/medical/dicom/current/output/chtml/part10/chapter_7.html
**/
if (block[128] != 'D' ||
block[129] != 'I' ||
block[130] != 'C' ||
block[131] != 'M' ||
ReadTag(block.c_str() + 132, true) != DicomTag(0x0002, 0x0000) ||
block[136] != 'U' ||
block[137] != 'L' ||
ReadUnsignedInteger16(block.c_str() + 138, true) != 4)
{
throw OrthancException(ErrorCode_BadFileFormat);
}
uint32_t length = ReadUnsignedInteger32(block.c_str() + 140, true);
reader_.Schedule(length);
state_ = State_MetaHeader;
}
void DicomStreamReader::HandleMetaHeader(IVisitor& visitor,
const std::string& block)
{
size_t pos = 0;
const char* p = block.c_str();
bool hasTransferSyntax = false;
while (pos + 8 <= block.size())
{
DicomTag tag = ReadTag(p + pos, true);
ValueRepresentation vr = StringToValueRepresentation(std::string(p + pos + 4, 2), true);
if (IsShortExplicitTag(vr))
{
uint16_t length = ReadUnsignedInteger16(p + pos + 6, true);
std::string value;
value.assign(p + pos + 8, length);
NormalizeValue(value, vr);
if (tag.GetGroup() == 0x0002)
{
visitor.VisitMetaHeaderTag(tag, vr, value);
}
if (tag == DICOM_TAG_TRANSFER_SYNTAX_UID)
{
if (LookupTransferSyntax(transferSyntax_, value))
{
hasTransferSyntax = true;
}
else
{
throw OrthancException(ErrorCode_NotImplemented, "Unsupported transfer syntax: " + value);
}
}
pos += length + 8;
}
else if (pos + 12 <= block.size())
{
uint16_t reserved = ReadUnsignedInteger16(p + pos + 6, true);
if (reserved != 0)
{
break;
}
uint32_t length = ReadUnsignedInteger32(p + pos + 8, true);
if (tag.GetGroup() == 0x0002)
{
std::string value;
value.assign(p + pos + 12, length);
NormalizeValue(value, vr);
visitor.VisitMetaHeaderTag(tag, vr, value);
}
pos += length + 12;
}
else
{
throw OrthancException(ErrorCode_BadFileFormat, "Invalid DICOM File: Unable to parse Meta Header");
}
}
if (pos != block.size())
{
throw OrthancException(ErrorCode_BadFileFormat);
}
if (!hasTransferSyntax)
{
throw OrthancException(ErrorCode_BadFileFormat, "DICOM file meta-header without transfer syntax UID");
}
visitor.VisitTransferSyntax(transferSyntax_);
reader_.Schedule(8);
state_ = State_DatasetTag;
}
void DicomStreamReader::HandleDatasetTag(const std::string& block,
const DicomTag& untilTag)
{
static const DicomTag DICOM_TAG_SEQUENCE_ITEM(0xfffe, 0xe000);
static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_ITEM(0xfffe, 0xe00d);
static const DicomTag DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE(0xfffe, 0xe0dd);
assert(block.size() == 8u);
const bool littleEndian = IsLittleEndian();
DicomTag tag = ReadTag(block.c_str(), littleEndian);
if (sequenceDepth_ == 0 &&
tag >= untilTag)
{
state_ = State_Done;
return;
}
if (tag == DICOM_TAG_SEQUENCE_ITEM ||
tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM ||
tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
{
// The special sequence items are encoded like "Implicit VR"
uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, littleEndian);
if (tag == DICOM_TAG_SEQUENCE_ITEM)
{
if (length == 0xffffffffu)
{
// Undefined length: Need to loop over the tags of the nested dataset
reader_.Schedule(8);
state_ = State_DatasetTag;
}
else
{
// Explicit length: Can skip the full sequence at once
reader_.Schedule(length);
state_ = State_DatasetValue;
}
}
else if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_ITEM ||
tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
{
if (length != 0 ||
sequenceDepth_ == 0)
{
throw OrthancException(ErrorCode_BadFileFormat);
}
if (tag == DICOM_TAG_SEQUENCE_DELIMITATION_SEQUENCE)
{
sequenceDepth_ --;
}
reader_.Schedule(8);
state_ = State_DatasetTag;
}
else
{
throw OrthancException(ErrorCode_InternalError);
}
}
else
{
assert(reader_.GetProcessedBytes() >= block.size());
const uint64_t tagOffset = reader_.GetProcessedBytes() - block.size();
ValueRepresentation vr = ValueRepresentation_Unknown;
if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit)
{
if (sequenceDepth_ == 0)
{
danglingTag_ = tag;
danglingVR_ = vr;
danglingOffset_ = tagOffset;
}
uint32_t length = ReadUnsignedInteger32(block.c_str() + 4, true /* little endian */);
HandleDatasetExplicitLength(length);
}
else
{
// This in an explicit transfer syntax
vr = StringToValueRepresentation(
std::string(block.c_str() + 4, 2), false /* ignore unknown VR */);
if (vr == ValueRepresentation_Sequence)
{
sequenceDepth_ ++;
reader_.Schedule(4);
state_ = State_SequenceExplicitLength;
}
else if (IsShortExplicitTag(vr))
{
uint16_t length = ReadUnsignedInteger16(block.c_str() + 6, littleEndian);
reader_.Schedule(length);
state_ = State_DatasetValue;
}
else
{
uint16_t reserved = ReadUnsignedInteger16(block.c_str() + 6, littleEndian);
if (reserved != 0)
{
throw OrthancException(ErrorCode_BadFileFormat);
}
reader_.Schedule(4);
state_ = State_DatasetExplicitLength;
}
if (sequenceDepth_ == 0)
{
danglingTag_ = tag;
danglingVR_ = vr;
danglingOffset_ = tagOffset;
}
}
}
}
void DicomStreamReader::HandleDatasetExplicitLength(uint32_t length)
{
if (length == 0xffffffffu)
{
/**
* This is the case of pixel data with compressed transfer
* syntaxes. Schedule the reading of the first tag of the
* nested dataset.
* http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.html
**/
state_ = State_DatasetTag;
reader_.Schedule(8);
sequenceDepth_ ++;
}
else
{
reader_.Schedule(length);
state_ = State_DatasetValue;
}
}
void DicomStreamReader::HandleDatasetExplicitLength(IVisitor& visitor,
const std::string& block)
{
assert(block.size() == 4);
uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian());
HandleDatasetExplicitLength(length);
std::string empty;
if (!visitor.VisitDatasetTag(danglingTag_, danglingVR_, empty, IsLittleEndian(), danglingOffset_))
{
state_ = State_Done;
}
}
void DicomStreamReader::HandleSequenceExplicitLength(const std::string& block)
{
assert(block.size() == 4);
uint32_t length = ReadUnsignedInteger32(block.c_str(), IsLittleEndian());
if (length == 0xffffffffu)
{
state_ = State_DatasetTag;
reader_.Schedule(8);
}
else
{
reader_.Schedule(length);
state_ = State_SequenceExplicitValue;
}
}
void DicomStreamReader::HandleSequenceExplicitValue()
{
if (sequenceDepth_ == 0)
{
throw OrthancException(ErrorCode_InternalError);
}
sequenceDepth_ --;
state_ = State_DatasetTag;
reader_.Schedule(8);
}
void DicomStreamReader::HandleDatasetValue(IVisitor& visitor,
const std::string& block)
{
if (sequenceDepth_ == 0)
{
bool c;
if (IsNormalizationNeeded(block, danglingVR_))
{
std::string s(block.begin(), block.end() - 1);
c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, s, IsLittleEndian(), danglingOffset_);
}
else
{
c = visitor.VisitDatasetTag(danglingTag_, danglingVR_, block, IsLittleEndian(), danglingOffset_);
}
if (!c)
{
state_ = State_Done;
return;
}
}
reader_.Schedule(8);
state_ = State_DatasetTag;
}
DicomStreamReader::DicomStreamReader(std::istream& stream) :
reader_(stream),
state_(State_Preamble),
transferSyntax_(DicomTransferSyntax_LittleEndianImplicit), // Dummy
danglingTag_(0x0000, 0x0000), // Dummy
danglingVR_(ValueRepresentation_Unknown), // Dummy
danglingOffset_(0), // Dummy
sequenceDepth_(0)
{
reader_.Schedule(128 /* empty header */ +
4 /* "DICM" magic value */ +
4 /* (0x0002, 0x0000) tag */ +
2 /* value representation of (0x0002, 0x0000) == "UL" */ +
2 /* length of "UL" value == 4 */ +
4 /* actual length of the meta-header */);
}
void DicomStreamReader::Consume(IVisitor& visitor,
const DicomTag& untilTag)
{
while (state_ != State_Done)
{
std::string block;
if (reader_.Read(block))
{
switch (state_)
{
case State_Preamble:
HandlePreamble(visitor, block);
break;
case State_MetaHeader:
HandleMetaHeader(visitor, block);
break;
case State_DatasetTag:
HandleDatasetTag(block, untilTag);
break;
case State_DatasetExplicitLength:
HandleDatasetExplicitLength(visitor, block);
break;
case State_SequenceExplicitLength:
HandleSequenceExplicitLength(block);
break;
case State_SequenceExplicitValue:
HandleSequenceExplicitValue();
break;
case State_DatasetValue:
HandleDatasetValue(visitor, block);
break;
default:
throw OrthancException(ErrorCode_InternalError);
}
}
else
{
return; // No more data in the stream
}
}
}
void DicomStreamReader::Consume(IVisitor& visitor)
{
DicomTag untilTag(0xffff, 0xffff);
Consume(visitor, untilTag);
}
bool DicomStreamReader::IsDone() const
{
return (state_ == State_Done);
}
uint64_t DicomStreamReader::GetProcessedBytes() const
{
return reader_.GetProcessedBytes();
}
class DicomStreamReader::PixelDataVisitor : public DicomStreamReader::IVisitor
{
private:
bool hasPixelData_;
uint64_t pixelDataOffset_;
ValueRepresentation pixelDataVR_;
DicomTransferSyntax transferSyntax_;
public:
PixelDataVisitor() :
hasPixelData_(false),
pixelDataOffset_(0),
pixelDataVR_(ValueRepresentation_Unknown),
transferSyntax_(DicomTransferSyntax_LittleEndianImplicit) // Default DICOM transfer syntax
{
}
virtual void VisitMetaHeaderTag(const DicomTag& tag,
const ValueRepresentation& vr,
const std::string& value) ORTHANC_OVERRIDE
{
}
virtual void VisitTransferSyntax(DicomTransferSyntax transferSyntax) ORTHANC_OVERRIDE
{
transferSyntax_ = transferSyntax;
}
virtual bool VisitDatasetTag(const DicomTag& tag,
const ValueRepresentation& vr,
const std::string& value,
bool isLittleEndian,
uint64_t fileOffset) ORTHANC_OVERRIDE
{
if (tag == DICOM_TAG_PIXEL_DATA)
{
hasPixelData_ = true;
pixelDataOffset_ = fileOffset;
if (transferSyntax_ == DicomTransferSyntax_LittleEndianImplicit)
{
// Implicit Little Endian has always "OW" VR for pixel data
// https://dicom.nema.org/medical/dicom/current/output/chtml/part05/chapter_A.html
pixelDataVR_ = ValueRepresentation_OtherWord;
}
else if (transferSyntax_ == DicomTransferSyntax_LittleEndianExplicit ||
transferSyntax_ == DicomTransferSyntax_BigEndianExplicit)
{
pixelDataVR_ = vr;
}
else
{
// Compressed transfer syntaxes must always be OB
pixelDataVR_ = ValueRepresentation_OtherByte;
}
}
// Stop processing once pixel data has been passed
return (tag < DICOM_TAG_PIXEL_DATA);
}
bool HasPixelData() const
{
return hasPixelData_;
}
uint64_t GetPixelDataOffset() const
{
return pixelDataOffset_;
}
ValueRepresentation GetPixelDataVR() const
{
return pixelDataVR_;
}
static bool LookupPixelDataOffset(uint64_t& offset,
ValueRepresentation& vr,
std::istream& stream)
{
PixelDataVisitor visitor;
bool isLittleEndian;
{
DicomStreamReader reader(stream);
try
{
reader.Consume(visitor);
isLittleEndian = reader.IsLittleEndian();
}
catch (OrthancException&)
{
// Invalid DICOM file
return false;
}
}
if (visitor.HasPixelData())
{
// Sanity check if we face an unsupported DICOM file: Make
// sure that we can read DICOM_TAG_PIXEL_DATA at the reported
// position in the stream
stream.seekg(visitor.GetPixelDataOffset(), stream.beg);
std::string s;
s.resize(4);
stream.read(&s[0], s.size());
if (!isLittleEndian)
{
// Byte swapping if reading a file whose transfer syntax is
// 1.2.840.10008.1.2.2 (big endian explicit)
std::swap(s[0], s[1]);
std::swap(s[2], s[3]);
}
if (stream.gcount() == static_cast<std::streamsize>(s.size()) &&
s[0] == char(0xe0) &&
s[1] == char(0x7f) &&
s[2] == char(0x10) &&
s[3] == char(0x00))
{
offset = visitor.GetPixelDataOffset();
vr = visitor.GetPixelDataVR();
return true;
}
else
{
return false;
}
}
else
{
return false;
}
}
};
bool DicomStreamReader::LookupPixelDataOffset(uint64_t& offset,
ValueRepresentation& vr,
const std::string& dicom)
{
std::stringstream stream(dicom);
return PixelDataVisitor::LookupPixelDataOffset(offset, vr, stream);
}
bool DicomStreamReader::LookupPixelDataOffset(uint64_t& offset,
ValueRepresentation& vr,
const void* buffer,
size_t size)
{
boost::iostreams::array_source source(reinterpret_cast<const char*>(buffer), size);
boost::iostreams::stream<boost::iostreams::array_source> stream(source);
return PixelDataVisitor::LookupPixelDataOffset(offset, vr, stream);
}
}