470 lines
12 KiB
C++
470 lines
12 KiB
C++
/**
|
|
* Orthanc - A Lightweight, RESTful DICOM Store
|
|
* Copyright (C) 2012-2016 Sebastien Jodogne, Medical Physics
|
|
* Department, University Hospital of Liege, Belgium
|
|
* Copyright (C) 2017-2023 Osimis S.A., Belgium
|
|
* Copyright (C) 2024-2025 Orthanc Team SRL, Belgium
|
|
* Copyright (C) 2021-2025 Sebastien Jodogne, ICTEAM UCLouvain, Belgium
|
|
*
|
|
* This program is free software: you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public License
|
|
* as published by the Free Software Foundation, either version 3 of
|
|
* the License, or (at your option) any later version.
|
|
*
|
|
* This program is distributed in the hope that it will be useful, but
|
|
* WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this program. If not, see
|
|
* <http://www.gnu.org/licenses/>.
|
|
**/
|
|
|
|
|
|
#include "../PrecompiledHeaders.h"
|
|
#include "MultipartStreamReader.h"
|
|
|
|
#include "../Logging.h"
|
|
#include "../OrthancException.h"
|
|
#include "../Toolbox.h"
|
|
|
|
#include <boost/algorithm/string/predicate.hpp>
|
|
#include <boost/lexical_cast.hpp>
|
|
|
|
#if defined(_MSC_VER)
|
|
# include <BaseTsd.h> // Definition of ssize_t
|
|
#endif
|
|
|
|
namespace Orthanc
|
|
{
|
|
static void ParseHeaders(MultipartStreamReader::HttpHeaders& headers,
|
|
const char* start,
|
|
const char* end /* exclusive */)
|
|
{
|
|
assert(start <= end);
|
|
std::string tmp(start, end - start);
|
|
|
|
std::vector<std::string> lines;
|
|
Toolbox::TokenizeString(lines, tmp, '\n');
|
|
|
|
headers.clear();
|
|
|
|
for (size_t i = 0; i < lines.size(); i++)
|
|
{
|
|
size_t separator = lines[i].find(':');
|
|
if (separator != std::string::npos)
|
|
{
|
|
std::string key = Toolbox::StripSpaces(lines[i].substr(0, separator));
|
|
std::string value = Toolbox::StripSpaces(lines[i].substr(separator + 1));
|
|
|
|
Toolbox::ToLowerCase(key);
|
|
headers[key] = value;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
static bool LookupHeaderSizeValue(size_t& target,
|
|
const MultipartStreamReader::HttpHeaders& headers,
|
|
const std::string& key)
|
|
{
|
|
MultipartStreamReader::HttpHeaders::const_iterator it = headers.find(key);
|
|
if (it == headers.end())
|
|
{
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
int64_t value;
|
|
|
|
try
|
|
{
|
|
value = boost::lexical_cast<int64_t>(it->second);
|
|
}
|
|
catch (boost::bad_lexical_cast&)
|
|
{
|
|
throw OrthancException(ErrorCode_ParameterOutOfRange);
|
|
}
|
|
|
|
if (value < 0)
|
|
{
|
|
throw OrthancException(ErrorCode_ParameterOutOfRange);
|
|
}
|
|
else
|
|
{
|
|
target = static_cast<size_t>(value);
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::ParseBlock(const void* data,
|
|
size_t size)
|
|
{
|
|
if (handler_ == NULL ||
|
|
state_ == State_Done ||
|
|
size == 0)
|
|
{
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
const char* current = reinterpret_cast<const char*>(data);
|
|
const char* corpusEnd = current + size;
|
|
|
|
if (state_ == State_UnusedArea)
|
|
{
|
|
/**
|
|
* "Before the first boundary is an area that is ignored by
|
|
* MIME-compliant clients. This area is generally used to put
|
|
* a message to users of old non-MIME clients."
|
|
* https://en.wikipedia.org/wiki/MIME#Multipart_messages
|
|
**/
|
|
|
|
if (boundaryMatcher_.Apply(current, corpusEnd))
|
|
{
|
|
current = boundaryMatcher_.GetMatchBegin();
|
|
state_ = State_Content;
|
|
}
|
|
else
|
|
{
|
|
// We have not seen the end of the unused area yet
|
|
assert(current <= corpusEnd);
|
|
buffer_.AddChunk(current, corpusEnd - current);
|
|
return;
|
|
}
|
|
}
|
|
|
|
for (;;)
|
|
{
|
|
assert(current <= corpusEnd);
|
|
|
|
size_t patternSize = boundaryMatcher_.GetPattern().size();
|
|
size_t remainingSize = corpusEnd - current;
|
|
if (remainingSize < patternSize + 2)
|
|
{
|
|
break; // Not enough data available
|
|
}
|
|
|
|
std::string boundary(current, current + patternSize + 2);
|
|
if (boundary == boundaryMatcher_.GetPattern() + "--")
|
|
{
|
|
state_ = State_Done;
|
|
return;
|
|
}
|
|
|
|
if (boundary != boundaryMatcher_.GetPattern() + "\r\n")
|
|
{
|
|
throw OrthancException(ErrorCode_NetworkProtocol,
|
|
"Garbage between two items in a multipart stream");
|
|
}
|
|
|
|
const char* start = current + patternSize + 2;
|
|
|
|
if (!headersMatcher_.Apply(start, corpusEnd))
|
|
{
|
|
break; // Not enough data available
|
|
}
|
|
|
|
HttpHeaders headers;
|
|
ParseHeaders(headers, start, headersMatcher_.GetMatchBegin());
|
|
|
|
size_t contentLength = 0;
|
|
if (!LookupHeaderSizeValue(contentLength, headers, "content-length"))
|
|
{
|
|
if (boundaryMatcher_.Apply(headersMatcher_.GetMatchEnd(), corpusEnd))
|
|
{
|
|
assert(headersMatcher_.GetMatchEnd() <= boundaryMatcher_.GetMatchBegin());
|
|
size_t d = boundaryMatcher_.GetMatchBegin() - headersMatcher_.GetMatchEnd();
|
|
if (d <= 1)
|
|
{
|
|
throw OrthancException(ErrorCode_NetworkProtocol);
|
|
}
|
|
else
|
|
{
|
|
contentLength = d - 2;
|
|
}
|
|
}
|
|
else
|
|
{
|
|
break; // Not enough data available to have a full part
|
|
}
|
|
}
|
|
|
|
// "static_cast<>" to avoid warning about signed vs. unsigned comparison
|
|
assert(headersMatcher_.GetMatchEnd() <= corpusEnd);
|
|
if (contentLength + 2 > static_cast<size_t>(corpusEnd - headersMatcher_.GetMatchEnd()))
|
|
{
|
|
break; // Not enough data available to have a full part
|
|
}
|
|
|
|
const char* p = headersMatcher_.GetMatchEnd() + contentLength;
|
|
if (p[0] != '\r' ||
|
|
p[1] != '\n')
|
|
{
|
|
throw OrthancException(ErrorCode_NetworkProtocol,
|
|
"No endline at the end of a part");
|
|
}
|
|
|
|
handler_->HandlePart(headers, headersMatcher_.GetMatchEnd(), contentLength);
|
|
current = headersMatcher_.GetMatchEnd() + contentLength + 2;
|
|
}
|
|
|
|
if (current != corpusEnd)
|
|
{
|
|
assert(current < corpusEnd);
|
|
buffer_.AddChunk(current, corpusEnd - current);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::ParseStream()
|
|
{
|
|
if (handler_ == NULL ||
|
|
state_ == State_Done)
|
|
{
|
|
return;
|
|
}
|
|
else
|
|
{
|
|
std::string corpus;
|
|
buffer_.Flatten(corpus);
|
|
|
|
if (!corpus.empty())
|
|
{
|
|
ParseBlock(corpus.c_str(), corpus.size());
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
MultipartStreamReader::MultipartStreamReader(const std::string& boundary) :
|
|
state_(State_UnusedArea),
|
|
handler_(NULL),
|
|
headersMatcher_("\r\n\r\n"),
|
|
boundaryMatcher_("--" + boundary),
|
|
blockSize_(10 * 1024 * 1024)
|
|
{
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::SetBlockSize(size_t size)
|
|
{
|
|
if (size == 0)
|
|
{
|
|
throw OrthancException(ErrorCode_ParameterOutOfRange);
|
|
}
|
|
else
|
|
{
|
|
blockSize_ = size;
|
|
}
|
|
}
|
|
|
|
size_t MultipartStreamReader::GetBlockSize() const
|
|
{
|
|
return blockSize_;
|
|
}
|
|
|
|
void MultipartStreamReader::SetHandler(MultipartStreamReader::IHandler &handler)
|
|
{
|
|
handler_ = &handler;
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::AddChunk(const void* chunk,
|
|
size_t size)
|
|
{
|
|
if (state_ != State_Done &&
|
|
size != 0)
|
|
{
|
|
size_t oldSize = buffer_.GetNumBytes();
|
|
if (oldSize == 0)
|
|
{
|
|
/**
|
|
* Optimization in Orthanc 1.9.3: Directly parse the input
|
|
* buffer instead of going through the ChunkedBuffer if the
|
|
* latter is still empty. This notably avoids one memcpy() in
|
|
* STOW-RS server if chunked transfers is disabled.
|
|
**/
|
|
ParseBlock(chunk, size);
|
|
}
|
|
else
|
|
{
|
|
buffer_.AddChunk(chunk, size);
|
|
|
|
if (oldSize / blockSize_ != buffer_.GetNumBytes() / blockSize_)
|
|
{
|
|
ParseStream();
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::AddChunk(const std::string& chunk)
|
|
{
|
|
if (!chunk.empty())
|
|
{
|
|
AddChunk(chunk.c_str(), chunk.size());
|
|
}
|
|
}
|
|
|
|
|
|
void MultipartStreamReader::CloseStream()
|
|
{
|
|
if (buffer_.GetNumBytes() != 0)
|
|
{
|
|
ParseStream();
|
|
}
|
|
}
|
|
|
|
|
|
bool MultipartStreamReader::GetMainContentType(std::string& contentType,
|
|
const HttpHeaders& headers)
|
|
{
|
|
HttpHeaders::const_iterator it = headers.find("content-type");
|
|
|
|
if (it == headers.end())
|
|
{
|
|
return false;
|
|
}
|
|
else
|
|
{
|
|
contentType = it->second;
|
|
return true;
|
|
}
|
|
}
|
|
|
|
|
|
static void RemoveSurroundingQuotes(std::string& value)
|
|
{
|
|
if (value.size() >= 2 &&
|
|
value[0] == '"' &&
|
|
value[value.size() - 1] == '"')
|
|
{
|
|
value = value.substr(1, value.size() - 2);
|
|
}
|
|
}
|
|
|
|
|
|
bool MultipartStreamReader::ParseMultipartContentType(std::string& contentType,
|
|
std::string& subType,
|
|
std::string& boundary,
|
|
const std::string& contentTypeHeader)
|
|
{
|
|
std::vector<std::string> tokens;
|
|
Toolbox::TokenizeString(tokens, contentTypeHeader, ';');
|
|
|
|
if (tokens.empty())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
contentType = Toolbox::StripSpaces(tokens[0]);
|
|
Toolbox::ToLowerCase(contentType);
|
|
|
|
if (contentType.empty())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
bool valid = false;
|
|
subType.clear();
|
|
|
|
for (size_t i = 1; i < tokens.size(); i++)
|
|
{
|
|
std::vector<std::string> items;
|
|
Toolbox::TokenizeString(items, tokens[i], '=');
|
|
|
|
if (items.size() == 2)
|
|
{
|
|
if (boost::iequals("boundary", Toolbox::StripSpaces(items[0])))
|
|
{
|
|
boundary = Toolbox::StripSpaces(items[1]);
|
|
|
|
// https://orthanc.uclouvain.be/bugs/show_bug.cgi?id=190
|
|
RemoveSurroundingQuotes(boundary);
|
|
|
|
valid = !boundary.empty();
|
|
}
|
|
else if (boost::iequals("type", Toolbox::StripSpaces(items[0])))
|
|
{
|
|
subType = Toolbox::StripSpaces(items[1]);
|
|
Toolbox::ToLowerCase(subType);
|
|
|
|
// https://orthanc.uclouvain.be/bugs/show_bug.cgi?id=54
|
|
// https://tools.ietf.org/html/rfc7231#section-3.1.1.1
|
|
RemoveSurroundingQuotes(subType);
|
|
}
|
|
}
|
|
}
|
|
|
|
return valid;
|
|
}
|
|
|
|
|
|
bool MultipartStreamReader::ParseHeaderArguments(std::string& main,
|
|
std::map<std::string, std::string>& arguments,
|
|
const std::string& header)
|
|
{
|
|
std::vector<std::string> tokens;
|
|
Toolbox::TokenizeString(tokens, header, ';');
|
|
|
|
if (tokens.empty())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
main = Toolbox::StripSpaces(tokens[0]);
|
|
Toolbox::ToLowerCase(main);
|
|
if (main.empty())
|
|
{
|
|
return false;
|
|
}
|
|
|
|
arguments.clear();
|
|
|
|
for (size_t i = 1; i < tokens.size(); i++)
|
|
{
|
|
std::vector<std::string> items;
|
|
Toolbox::TokenizeString(items, tokens[i], '=');
|
|
|
|
if (items.size() > 2)
|
|
{
|
|
return false;
|
|
}
|
|
else if (!items.empty())
|
|
{
|
|
std::string key = Toolbox::StripSpaces(items[0]);
|
|
Toolbox::ToLowerCase(key);
|
|
|
|
if (arguments.find(key) != arguments.end())
|
|
{
|
|
LOG(ERROR) << "The same argument was provided twice in an HTTP header: \""
|
|
<< key << "\" in \"" << header << "\"";
|
|
return false;
|
|
}
|
|
else if (!key.empty())
|
|
{
|
|
if (items.size() == 1)
|
|
{
|
|
arguments[key] = "";
|
|
}
|
|
else
|
|
{
|
|
assert(items.size() == 2);
|
|
std::string value = Toolbox::StripSpaces(items[1]);
|
|
RemoveSurroundingQuotes(value);
|
|
arguments[key] = value;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
}
|