From 524da0e004de4c32c02ab113155da649099ebf7a Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 5 Jun 2026 21:42:35 -0400 Subject: [PATCH 1/4] Refactor MSVC demangler finalization Parse MSVC symbols into structured type nodes before finalization. This keeps thunk suffixes, calling conventions, member pointers, and implicit this parameters dependent on the final platform/view context. --- demangle.cpp | 15 +- demangler/gnu3/demangled_type_node.cpp | 1309 +++++++++-- demangler/gnu3/demangled_type_node.h | 278 ++- demangler/msvc/CMakeLists.txt | 4 +- demangler/msvc/demangle_msvc.cpp | 2934 +++++++++++++++--------- demangler/msvc/demangle_msvc.h | 353 ++- plugins/pdb-ng/src/symbol_parser.rs | 4 +- plugins/rtti/rtti.cpp | 27 +- rust/src/demangle.rs | 40 + view/pe/coffview.cpp | 2 +- view/pe/peview.cpp | 2 +- 11 files changed, 3567 insertions(+), 1401 deletions(-) diff --git a/demangle.cpp b/demangle.cpp index 8f13c2132e..bb7eb1326b 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -47,8 +47,19 @@ namespace BinaryNinja { bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, BinaryView* view) { - const bool simplify = Settings::Instance()->Get("analysis.types.templateSimplifier", view); - return DemangleMS(arch, mangledName, outType, outVarName, simplify); + BNType* localType = nullptr; + char** localVarName = nullptr; + size_t localSize = 0; + if (!BNDemangleMSWithOptions(arch->GetObject(), mangledName.c_str(), &localType, &localVarName, &localSize, + view ? view->GetObject() : nullptr)) + return false; + outType = localType ? new Type(localType) : nullptr; + for (size_t i = 0; i < localSize; i++) + { + outVarName.push_back(localVarName[i]); + } + BNFreeDemangledName(&localVarName, localSize); + return true; } bool DemangleMS(Architecture* arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/gnu3/demangled_type_node.cpp index a52f9ef8fc..afbebf457a 100644 --- a/demangler/gnu3/demangled_type_node.cpp +++ b/demangler/gnu3/demangled_type_node.cpp @@ -13,7 +13,13 @@ // limitations under the License. #include "demangled_type_node.h" +#ifdef BINARYNINJACORE_LIBRARY +#include "binaryview.h" +#endif +#include +#include #include +#include #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; @@ -23,22 +29,316 @@ using namespace BinaryNinja; using namespace std; #endif +namespace +{ + static constexpr uint8_t DemangledPtr64Bit = 1u << 0; + static constexpr uint8_t DemangledUnalignedBit = 1u << 1; + static constexpr uint8_t DemangledRestrictBit = 1u << 2; + static constexpr uint8_t DemangledReferenceBit = 1u << 3; + static constexpr uint8_t DemangledLvalueBit = 1u << 4; + + static void AppendPointerSuffixToken(string& out, const char* token) + { + if (!out.empty() && out.back() != ' ') + out += ' '; + out += token; + } + + static string JoinNameList(const StringList& name) + { + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + size_t total = (name.size() - 1) * 2; + for (const auto& segment : name) + total += segment.size(); + + string out; + out.reserve(total); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) + { + out += "::"; + out += name[i]; + } + return out; + } + + static void AppendTemplateArgumentList(string& out, const vector& args, + bool spaceAfterComma, Platform* platform) + { + if (args.empty()) + return; + + out += '<'; + for (size_t i = 0; i < args.size(); i++) + { + if (i > 0) + out += spaceAfterComma ? ", " : ","; + if (args[i].type) + { + if (spaceAfterComma) + { + string arg; + args[i].type->AppendString(arg, platform); + out += arg; + } + else + args[i].type->AppendString(out, platform); + } + } + if (out.back() == '>') + out += ' '; + out += '>'; + } + + static DemangledQualifiedName ConvertNameSegments(StringList nameSegments) + { + DemangledQualifiedName result; + result.reserve(nameSegments.size()); + for (auto& segment: nameSegments) + result.emplace_back(std::move(segment)); + return result; + } + + static const DemangledQualifiedName& EmptyDemangledQualifiedName() + { + static const DemangledQualifiedName empty; + return empty; + } + + static size_t ResolveAddressWidth(Platform* platform) + { + if (platform) + return platform->GetAddressSize(); + return 8; + } + + static size_t ResolveDefaultIntegerWidth(Platform* platform) + { + if (platform) + { +#ifdef BINARYNINJACORE_LIBRARY + Architecture* platformArch = platform->GetArchitecture(); +#else + Ref platformArch = platform->GetArchitecture(); +#endif + if (platformArch) + return platformArch->GetDefaultIntegerSize(); + } + return 4; + } + + static Ref ResolveCallingConvention(BNCallingConventionName cc, Platform* platform) + { +#ifndef BINARYNINJACORE_LIBRARY + Ref platformArch; +#endif + Architecture* arch = nullptr; + if (platform) + { +#ifdef BINARYNINJACORE_LIBRARY + arch = platform->GetArchitecture(); +#else + platformArch = platform->GetArchitecture(); + arch = platformArch.GetPtr(); +#endif + } + + switch (cc) + { + case CdeclCallingConvention: + if (platform) + { + auto platformCC = platform->GetCdeclCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetCdeclCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("cdecl") : nullptr; + case STDCallCallingConvention: + if (platform) + { + auto platformCC = platform->GetStdcallCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetStdcallCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("stdcall") : nullptr; + case FastcallCallingConvention: + if (platform) + { + auto platformCC = platform->GetFastcallCallingConvention(); + if (platformCC) + return platformCC; + } + if (arch) + { + auto archCC = arch->GetFastcallCallingConvention(); + if (archCC) + return archCC; + } + return arch ? arch->GetCallingConventionByName("fastcall") : nullptr; + case ThisCallCallingConvention: + if (arch) + return arch->GetCallingConventionByName("thiscall"); + return nullptr; + default: + return nullptr; + } + } + +} + +#define HAS_POINTER_SUFFIX(bit) ((m_pointerSuffixBits & (bit)) != 0) + +static const char* CallingConventionString[] = +{ + "", + "__cdecl", + "__pascal", + "__thiscall", + "__stdcall", + "__fastcall", + "__clrcall", + "__eabi", + "__vectorcall", + "__swiftcall", + "__swiftasync" +}; + + +DemangledNamePart::DemangledNamePart(): m_hasTemplateArgs(false), m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart(string base): + m_base(std::move(base)), m_hasTemplateArgs(false), m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart(string base, std::shared_ptr baseTypeSuffix): + m_base(std::move(base)), m_baseTypeSuffix(std::move(baseTypeSuffix)), m_hasTemplateArgs(false), + m_spaceAfterTemplateComma(false) +{ +} + + +DemangledNamePart::DemangledNamePart( + string base, vector templateArgs, bool spaceAfterComma): + m_base(std::move(base)), m_templateArgs(std::move(templateArgs)), m_hasTemplateArgs(true), + m_spaceAfterTemplateComma(spaceAfterComma) +{ +} + + +void DemangledNamePart::SetTemplateArguments(vector args, bool spaceAfterComma) +{ + m_templateArgs = std::move(args); + m_hasTemplateArgs = true; + m_spaceAfterTemplateComma = spaceAfterComma; +} + + +void DemangledNamePart::AppendString(string& out, Platform* platform) const +{ + out += m_base; + if (m_baseTypeSuffix) + m_baseTypeSuffix->AppendString(out, platform); + if (m_templateArgs.empty() && m_hasTemplateArgs) + { + out += "<>"; + return; + } + AppendTemplateArgumentList(out, m_templateArgs, m_spaceAfterTemplateComma, platform); +} + + +string DemangledNamePart::GetString(Platform* platform) const +{ + string out; + AppendString(out, platform); + return out; +} + + +bool DemangledNamePart::IsStructurallyEqual(const DemangledNamePart& other) const +{ + if (m_base != other.m_base || m_hasTemplateArgs != other.m_hasTemplateArgs || + m_spaceAfterTemplateComma != other.m_spaceAfterTemplateComma) + return false; + if (m_baseTypeSuffix != other.m_baseTypeSuffix) + { + if (!m_baseTypeSuffix || !other.m_baseTypeSuffix || + !m_baseTypeSuffix->IsStructurallyEqual(*other.m_baseTypeSuffix)) + return false; + } + if (m_templateArgs.size() != other.m_templateArgs.size()) + return false; + for (size_t i = 0; i < m_templateArgs.size(); i++) + { + const auto& a = m_templateArgs[i]; + const auto& b = other.m_templateArgs[i]; + if (a.name != b.name) + return false; + if (a.type == b.type) + continue; + if (!a.type || !b.type || !a.type->IsStructurallyEqual(*b.type)) + return false; + } + return true; +} + + +DemangledNamePart::Ref DemangledNamePart::CreateShared(DemangledNamePart part) +{ + return std::make_shared(std::move(part)); +} + + +DemangledNamePart::Ref DemangledNamePart::CreateSharedCopy(const DemangledNamePart& part) +{ + return std::make_shared(part); +} + DemangledTypeNode::DemangledTypeNode() - : m_typeClass(VoidTypeClass), m_width(0), m_alignment(0), - m_const(false), m_volatile(false), m_signed(false), - m_hasTemplateArgs(false), m_nameType(NoNameType), - m_ntrClass(UnknownNamedTypeClass), - m_pointerReference(PointerReferenceType), m_elements(0), - m_returnTypeConfidence(BN_DEFAULT_CONFIDENCE) + : m_nameType(NoNameType), m_pointerSuffixBits(0), m_returnTypeConfidence(BN_FULL_CONFIDENCE), + m_const(false), m_volatile(false), m_payload(VoidPayload{}) +{ +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::CreateShared(DemangledTypeNode node) +{ + return std::make_shared(std::move(node)); +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::CreateSharedCopy(const DemangledTypeNode& node) { + return std::make_shared(node); } DemangledTypeNode DemangledTypeNode::VoidType() { DemangledTypeNode n; - n.m_typeClass = VoidTypeClass; + n.m_payload = VoidPayload{}; return n; } @@ -46,8 +346,7 @@ DemangledTypeNode DemangledTypeNode::VoidType() DemangledTypeNode DemangledTypeNode::BoolType() { DemangledTypeNode n; - n.m_typeClass = BoolTypeClass; - n.m_width = 1; + n.m_payload = BoolPayload{}; return n; } @@ -55,10 +354,26 @@ DemangledTypeNode DemangledTypeNode::BoolType() DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, const string& altName) { DemangledTypeNode n; - n.m_typeClass = IntegerTypeClass; - n.m_width = width; - n.m_signed = isSigned; - n.m_altName = altName; + if (altName == "char16_t" || altName == "char32_t" || altName == "wchar_t") + { + n.m_payload = WideCharPayload{width, altName}; + return n; + } + IntegerPayload payload; + payload.width = width; + payload.isSigned = isSigned; + if (!(width == 1 && isSigned && altName == "char")) + payload.altName = altName; + n.m_payload = std::move(payload); + return n; +} + + +DemangledTypeNode DemangledTypeNode::AddressSizedIntegerType(bool isSigned, const string& altName) +{ + DemangledTypeNode n = IntegerType(0, isSigned, altName); + if (auto payload = std::get_if(&n.m_payload)) + payload->widthKind = AddressWidth; return n; } @@ -66,9 +381,15 @@ DemangledTypeNode DemangledTypeNode::IntegerType(size_t width, bool isSigned, co DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altName) { DemangledTypeNode n; - n.m_typeClass = FloatTypeClass; - n.m_width = width; - n.m_altName = altName; + n.m_payload = FloatPayload{width, altName}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::WideCharType(size_t width, const string& altName) +{ + DemangledTypeNode n; + n.m_payload = WideCharPayload{width, altName}; return n; } @@ -76,21 +397,49 @@ DemangledTypeNode DemangledTypeNode::FloatType(size_t width, const string& altNa DemangledTypeNode DemangledTypeNode::VarArgsType() { DemangledTypeNode n; - n.m_typeClass = VarArgsTypeClass; + n.m_payload = VarArgsPayload{}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PointerType(DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = PointerPayload{CreateShared(std::move(child)), refType}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PointerType(NodeRef child, bool cnst, bool vltl, BNReferenceType refType) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = PointerPayload{std::move(child), refType}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::MemberPointerType( + DemangledTypeNode child, DemangledQualifiedName ownerName, bool cnst, bool vltl) +{ + DemangledTypeNode n; + n.m_const = cnst; + n.m_volatile = vltl; + n.m_payload = MemberPointerPayload{CreateShared(std::move(child)), std::move(ownerName), false}; return n; } -DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTypeNode child, - bool cnst, bool vltl, BNReferenceType refType) +DemangledTypeNode DemangledTypeNode::MemberPointerType( + NodeRef child, DemangledQualifiedName ownerName, bool cnst, bool vltl) { DemangledTypeNode n; - n.m_typeClass = PointerTypeClass; - n.m_width = arch->GetAddressSize(); - n.m_childType = std::make_shared(std::move(child)); n.m_const = cnst; n.m_volatile = vltl; - n.m_pointerReference = refType; + n.m_payload = MemberPointerPayload{std::move(child), std::move(ownerName), false}; return n; } @@ -98,9 +447,15 @@ DemangledTypeNode DemangledTypeNode::PointerType(Architecture* arch, DemangledTy DemangledTypeNode DemangledTypeNode::ArrayType(DemangledTypeNode child, uint64_t count) { DemangledTypeNode n; - n.m_typeClass = ArrayTypeClass; - n.m_childType = std::make_shared(std::move(child)); - n.m_elements = count; + n.m_payload = ArrayPayload{CreateShared(std::move(child)), count}; + return n; +} + + +DemangledTypeNode DemangledTypeNode::ArrayType(NodeRef child, uint64_t count) +{ + DemangledTypeNode n; + n.m_payload = ArrayPayload{std::move(child), count}; return n; } @@ -109,196 +464,571 @@ DemangledTypeNode DemangledTypeNode::FunctionType(DemangledTypeNode retType, std::nullptr_t, vector params) { DemangledTypeNode n; - n.m_typeClass = FunctionTypeClass; - n.m_childType = std::make_shared(std::move(retType)); - n.m_params = std::move(params); + FunctionPayload payload; + payload.returnType = CreateShared(std::move(retType)); + payload.params = std::move(params); + n.m_payload = std::move(payload); return n; } -DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - vector nameSegments, size_t width, size_t align) +DemangledTypeNode DemangledTypeNode::FunctionType(NodeRef retType, + std::nullptr_t, vector params) { DemangledTypeNode n; - n.m_typeClass = NamedTypeReferenceClass; - n.m_ntrClass = cls; - n.m_nameSegments = std::make_shared>(std::move(nameSegments)); - n.m_width = width; - n.m_alignment = align; + FunctionPayload payload; + payload.returnType = std::move(retType); + payload.params = std::move(params); + n.m_payload = std::move(payload); return n; } +void DemangledTypeNode::SetImplicitThisParameter(DemangledTypeNode type) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->implicitThisParameterType = CreateShared(std::move(type)); + return; + } + assert(false && "SetImplicitThisParameter called for non-function demangled type"); +} + + +DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, + StringList nameSegments, size_t width, bool isSigned) +{ + DemangledTypeNode n; + n.m_payload = NamedTypePayload{cls, ConvertNameSegments(std::move(nameSegments)), width, FixedWidth, isSigned}; + return n; +} + DemangledTypeNode DemangledTypeNode::NamedType(BNNamedTypeReferenceClass cls, - const QualifiedName& name, size_t width, size_t align) + DemangledQualifiedName nameSegments, size_t width, bool isSigned) +{ + DemangledTypeNode n; + n.m_payload = NamedTypePayload{cls, std::move(nameSegments), width, FixedWidth, isSigned}; + return n; +} + +DemangledTypeNode DemangledTypeNode::NamedTypeWithDefaultIntegerWidth(BNNamedTypeReferenceClass cls, + StringList nameSegments, bool isSigned) +{ + DemangledTypeNode n = NamedType(cls, std::move(nameSegments), 0, isSigned); + if (auto payload = std::get_if(&n.m_payload)) + payload->widthKind = DefaultIntegerWidth; + return n; +} + + +DemangledTypeNode DemangledTypeNode::PostfixType(NodeRef child, string suffix) { - return NamedType(cls, vector(name.begin(), name.end()), width, align); + DemangledTypeNode n; + n.m_payload = PostfixPayload{std::move(child), std::move(suffix), nullptr}; + return n; } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, vector nameSegments) +DemangledTypeNode DemangledTypeNode::PostfixType(NodeRef child, string separator, NodeRef suffixType) { - m_ntrClass = cls; - m_nameSegments = std::make_shared>(std::move(nameSegments)); + DemangledTypeNode n = PostfixType(child, std::move(separator)); + if (auto payload = std::get_if(&n.m_payload)) + payload->suffixType = std::move(suffixType); + return n; } -void DemangledTypeNode::SetNTR(BNNamedTypeReferenceClass cls, const QualifiedName& name) +uint8_t DemangledTypeNode::PointerSuffixBit(BNPointerSuffix ps) { - SetNTR(cls, vector(name.begin(), name.end())); + switch (ps) + { + case Ptr64Suffix: + return DemangledPtr64Bit; + case UnalignedSuffix: + return DemangledUnalignedBit; + case RestrictSuffix: + return DemangledRestrictBit; + case ReferenceSuffix: + return DemangledReferenceBit; + case LvalueSuffix: + return DemangledLvalueBit; + default: + return 0; + } } -string DemangledTypeNode::GetTypeNameString() const +size_t DemangledTypeNode::ResolveWidth(size_t width, WidthKind widthKind, Platform* platform) { - if (!m_nameSegments) - return {}; - const auto& segs = *m_nameSegments; - size_t n = segs.size(); - if (n == 0) - return {}; - if (n == 1) - return segs[0]; + switch (widthKind) + { + case AddressWidth: + return ResolveAddressWidth(platform); + case DefaultIntegerWidth: + return ResolveDefaultIntegerWidth(platform); + case FixedWidth: + default: + return width; + } +} - // Pre-reserve: sum of segments + (n-1) * 2 for "::" separators - size_t total = (n - 1) * 2; - for (const auto& s : segs) - total += s.size(); - string result; - result.reserve(total); - result += segs[0]; - for (size_t i = 1; i < n; i++) +BNTypeClass DemangledTypeNode::GetPayloadClass() const +{ + switch (m_payload.index()) { - result += "::"; - result += segs[i]; + case 0: return VoidTypeClass; + case 1: return BoolTypeClass; + case 2: return IntegerTypeClass; + case 3: return FloatTypeClass; + case 4: return WideCharTypeClass; + case 5: return VarArgsTypeClass; + case 6: + case 7: + // PointerPayload and MemberPointerPayload both preserve the public pointer type class. + return PointerTypeClass; + case 8: return ArrayTypeClass; + case 9: return FunctionTypeClass; + case 10: + case 11: + // PostfixPayload is an internal named-type rendering form, so it reports as a named type. + return NamedTypeReferenceClass; + default: + return VoidTypeClass; } +} + + +DemangledTypeNode::NodeRef DemangledTypeNode::GetPrimaryChild() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + if (auto payload = std::get_if(&m_payload)) + return payload->returnType; + if (auto payload = std::get_if(&m_payload)) + return payload->childType; + return nullptr; +} + + +bool DemangledTypeNode::AddQualifiersToPointerChild(bool cnst, bool vltl) +{ + NodeRef* childType = nullptr; + if (auto payload = std::get_if(&m_payload)) + childType = &payload->childType; + else if (auto payload = std::get_if(&m_payload)) + childType = &payload->childType; + else + return false; + + if (!*childType) + return true; + if ((*childType).use_count() > 1) + *childType = CreateSharedCopy(**childType); + if (cnst) + (*childType)->SetConst(true); + if (vltl) + (*childType)->SetVolatile(true); + return true; +} + + +const DemangledQualifiedName& DemangledTypeNode::GetName() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->name; + return EmptyDemangledQualifiedName(); +} + + +DemangledQualifiedName& DemangledTypeNode::GetMutableName() +{ + if (auto payload = std::get_if(&m_payload)) + return payload->name; + assert(false && "GetMutableName called for non-named demangled type"); + static thread_local DemangledQualifiedName empty; + empty.clear(); + return empty; +} + + +void DemangledTypeNode::SetName(DemangledQualifiedName name) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->name = std::move(name); + return; + } + assert(false && "SetName called for non-named demangled type"); +} + + +BNNamedTypeReferenceClass DemangledTypeNode::GetNTRClass() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->ntrClass; + return UnknownNamedTypeClass; +} + + +void DemangledTypeNode::SetNTRType(BNNamedTypeReferenceClass cls) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->ntrClass = cls; + return; + } + assert(false && "SetNTRType called for non-named demangled type"); +} + + +void DemangledTypeNode::SetParenthesizedMemberPointer(bool parenthesized) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->parenthesized = parenthesized; + return; + } + assert(false && "SetParenthesizedMemberPointer called for non-member-pointer demangled type"); +} + + +void DemangledTypeNode::SetCallingConventionName(BNCallingConventionName cc) +{ + if (auto payload = std::get_if(&m_payload)) + { + payload->callingConventionName = cc; + return; + } + assert(false && "SetCallingConventionName called for non-function demangled type"); +} + + +bool DemangledTypeNode::HasTemplateArguments() const +{ + const auto* payload = std::get_if(&m_payload); + if (!payload) + return false; + for (const auto& segment: payload->name) + if (segment.HasTemplateArguments()) + return true; + return false; +} + + +bool DemangledTypeNode::IsStructurallyEqual(const DemangledTypeNode& other) const +{ + if (m_nameType != other.m_nameType || m_pointerSuffixBits != other.m_pointerSuffixBits || + m_returnTypeConfidence != other.m_returnTypeConfidence || + m_const != other.m_const || m_volatile != other.m_volatile || + m_payload.index() != other.m_payload.index()) + return false; + + auto typePtrsEqual = [](const NodeRef& a, const NodeRef& b) { + if (a == b) + return true; + if (!a || !b) + return false; + return a->IsStructurallyEqual(*b); + }; + + auto namePartsEqual = [](const DemangledQualifiedName& a, const DemangledQualifiedName& b) { + if (a.size() != b.size()) + return false; + for (size_t i = 0; i < a.size(); i++) + { + if (!a[i].IsStructurallyEqual(b[i])) + return false; + } + return true; + }; + + auto paramsEqual = [&typePtrsEqual](const vector& a, const vector& b) { + if (a.size() != b.size()) + return false; + for (size_t i = 0; i < a.size(); i++) + { + if (a[i].name != b[i].name || !typePtrsEqual(a[i].type, b[i].type)) + return false; + } + return true; + }; + + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + return payload && std::get_if(&other.m_payload); + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && + payload->widthKind == otherPayload->widthKind && + payload->isSigned == otherPayload->isSigned && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->width == otherPayload->width && payload->altName == otherPayload->altName; + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->referenceType == otherPayload->referenceType && + typePtrsEqual(payload->childType, otherPayload->childType); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->parenthesized == otherPayload->parenthesized && + typePtrsEqual(payload->childType, otherPayload->childType) && + namePartsEqual(payload->ownerName, otherPayload->ownerName); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->elements == otherPayload->elements && + typePtrsEqual(payload->childType, otherPayload->childType); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->callingConventionName == otherPayload->callingConventionName && + typePtrsEqual(payload->returnType, otherPayload->returnType) && + typePtrsEqual(payload->implicitThisParameterType, otherPayload->implicitThisParameterType) && + paramsEqual(payload->params, otherPayload->params); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->ntrClass == otherPayload->ntrClass && + payload->width == otherPayload->width && payload->widthKind == otherPayload->widthKind && + payload->isSigned == otherPayload->isSigned && + namePartsEqual(payload->name, otherPayload->name); + } + if (auto payload = std::get_if(&m_payload)) + { + auto otherPayload = std::get_if(&other.m_payload); + return otherPayload && payload->suffix == otherPayload->suffix && + typePtrsEqual(payload->childType, otherPayload->childType) && + typePtrsEqual(payload->suffixType, otherPayload->suffixType); + } + + return false; +} + + +StringList DemangledTypeNode::RenderTypeNameSegments(Platform* platform) const +{ + StringList result; + if (auto payload = std::get_if(&m_payload)) + { + result.push_back(GetString(platform)); + return result; + } + auto payload = std::get_if(&m_payload); + if (!payload) + return result; + result.reserve(payload->name.size()); + for (const auto& segment: payload->name) + result.push_back(segment.GetString(platform)); return result; } -size_t DemangledTypeNode::NameStringSize() const +void DemangledTypeNode::AddPointerSuffixes(TypeBuilder& tb, bool omitPtr64) const { - if (!m_nameSegments) - return 0; - size_t total = 0; - for (const auto& s : *m_nameSegments) - total += s.size(); - return total; + if (HAS_POINTER_SUFFIX(DemangledPtr64Bit) && !omitPtr64) + tb.AddPointerSuffix(Ptr64Suffix); + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + tb.AddPointerSuffix(UnalignedSuffix); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + tb.AddPointerSuffix(RestrictSuffix); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + tb.AddPointerSuffix(ReferenceSuffix); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + tb.AddPointerSuffix(LvalueSuffix); } -string DemangledTypeNode::GetModifierString() const +bool DemangledTypeNode::HasPostfixType() const +{ + return std::holds_alternative(m_payload); +} + + +void DemangledTypeNode::AppendPostfixType(string& out, Platform* platform) const +{ + const auto* payload = std::get_if(&m_payload); + if (!payload) + return; + if (payload->childType) + payload->childType->AppendString(out, platform); + out += payload->suffix; + if (payload->suffixType) + payload->suffixType->AppendString(out, platform); +} + + +void DemangledTypeNode::AppendModifiers(string& out) const { if (m_const && m_volatile) - return "const volatile"; - if (m_const) - return "const"; - if (m_volatile) - return "volatile"; - return ""; + out += " const volatile"; + else if (m_const) + out += " const"; + else if (m_volatile) + out += " volatile"; } -string DemangledTypeNode::GetPointerSuffixString() const +void DemangledTypeNode::AppendPointerSuffix(string& out) const { - static const char* suffixStrings[] = { - "__ptr64", - "__unaligned", - "__restrict", - "&", - "&&" - }; + if (HAS_POINTER_SUFFIX(DemangledUnalignedBit)) + AppendPointerSuffixToken(out, "__unaligned"); + if (HAS_POINTER_SUFFIX(DemangledRestrictBit)) + AppendPointerSuffixToken(out, "__restrict"); + if (HAS_POINTER_SUFFIX(DemangledReferenceBit)) + AppendPointerSuffixToken(out, "&"); + if (HAS_POINTER_SUFFIX(DemangledLvalueBit)) + AppendPointerSuffixToken(out, "&&"); +} - string out; - for (auto& s : m_pointerSuffix) + +void DemangledTypeNode::AppendNamePartList( + string& out, const DemangledQualifiedName& name, Platform* platform) +{ + if (name.empty()) + return; + name[0].AppendString(out, platform); + for (size_t i = 1; i < name.size(); i++) { - if (!out.empty() && out.back() != ' ') - out += ' '; - out += suffixStrings[s]; + out += "::"; + name[i].AppendString(out, platform); } - return out; } -string DemangledTypeNode::GetStringBeforeName() const +void DemangledTypeNode::AppendTypeName(string& out, Platform* platform) const +{ + if (auto payload = std::get_if(&m_payload)) + AppendNamePartList(out, payload->name, platform); +} + + +string DemangledTypeNode::GetStringBeforeName(Platform* platform) const { string out; - AppendBeforeName(out); + AppendBeforeName(out, nullptr, platform); return out; } -string DemangledTypeNode::GetStringAfterName() const +string DemangledTypeNode::GetStringAfterName(Platform* platform) const { string out; - AppendAfterName(out); + AppendAfterName(out, nullptr, platform); return out; } -void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType) const +void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* parentType, Platform* platform) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - - switch (m_typeClass) + switch (GetPayloadClass()) { case FunctionTypeClass: + { + const auto& payload = std::get(m_payload); // Return type before name - if (m_childType) + if (payload.returnType) { - if (!out.empty() && out.back() != ' ') - out += " "; - m_childType->AppendBeforeName(out, this); + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + payload.returnType->AppendBeforeName(out, this, platform); } // If parent is a pointer, add "(" for function pointer syntax - if (parentType && parentType->m_typeClass == PointerTypeClass) + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) + { + const auto* parentMemberPointer = std::get_if(&parentType->m_payload); + if (!out.empty() && out.back() != ' ' && + !(parentMemberPointer && parentMemberPointer->parenthesized)) + out += ' '; + out += '('; + } + if (static_cast(payload.callingConventionName) < (sizeof(CallingConventionString) / sizeof(CallingConventionString[0]))) { - if (!out.empty() && out.back() != ' ') - out += " "; - out += "("; + const char* callingConvention = CallingConventionString[static_cast(payload.callingConventionName)]; + if (callingConvention[0] != 0) + { + if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + out += callingConvention; + } } break; + } case IntegerTypeClass: - if (!m_altName.empty()) - out += m_altName; - else if (m_signed && m_width == 1) + { + const auto& payload = std::get(m_payload); + const size_t width = ResolveWidth(payload.width, payload.widthKind, platform); + if (!payload.altName.empty()) + out += payload.altName; + else if (payload.isSigned && width == 1) out += "char"; - else if (m_signed) - out += "int" + to_string(m_width * 8) + "_t"; + else if (payload.isSigned) + { + out += "int"; + out += to_string(width * 8); + out += "_t"; + } else - out += "uint" + to_string(m_width * 8) + "_t"; - if (!modifiers.empty()) - out += " " + modifiers; + { + out += "uint"; + out += to_string(width * 8); + out += "_t"; + } + AppendModifiers(out); break; + } case FloatTypeClass: - if (!m_altName.empty()) - out += m_altName; - else switch (m_width) + { + const auto& payload = std::get(m_payload); + if (!payload.altName.empty()) + out += payload.altName; + else switch (payload.width) { case 2: out += "float16"; break; case 4: out += "float"; break; case 8: out += "double"; break; case 10: out += "long double"; break; - default: out += "float" + to_string(m_width * 8); break; + default: + out += "float"; + out += to_string(payload.width * 8); + break; } - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; + } case BoolTypeClass: out += "bool"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VoidTypeClass: out += "void"; - if (!modifiers.empty()) - out += " " + modifiers; + AppendModifiers(out); break; case VarArgsTypeClass: @@ -306,32 +1036,65 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p break; case PointerTypeClass: - if (m_childType) - m_childType->AppendBeforeName(out, this); - switch (m_pointerReference) + if (auto payload = std::get_if(&m_payload)) { - case ReferenceReferenceType: out += "&"; break; - case PointerReferenceType: out += "*"; break; - case RValueReferenceType: out += "&&"; break; - default: break; + if (payload->childType) + payload->childType->AppendBeforeName(out, this, platform); + if (payload->parenthesized) + { + if (out.empty() || out.back() != '(') + out += '('; + } + else if (!out.empty() && out.back() != ' ' && out.back() != '(') + out += ' '; + if (!payload->ownerName.empty()) + AppendNamePartList(out, payload->ownerName, platform); + out += "::*"; + } + else if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendBeforeName(out, this, platform); + switch (payload->referenceType) + { + case ReferenceReferenceType: out += '&'; break; + case PointerReferenceType: out += '*'; break; + case RValueReferenceType: out += "&&"; break; + default: break; + } } - if (!ptrSuffix.empty()) - out += " " + ptrSuffix; - if (!modifiers.empty()) - out += " " + modifiers; + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + { + out += ' '; + AppendPointerSuffix(out); + } + AppendModifiers(out); break; case ArrayTypeClass: - if (m_childType) - m_childType->AppendBeforeName(out, this); - if (parentType && parentType->m_typeClass == PointerTypeClass) + { + const auto& payload = std::get(m_payload); + if (payload.childType) + payload.childType->AppendBeforeName(out, this, platform); + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) { - out += " ("; + const auto* parentMemberPointer = std::get_if(&parentType->m_payload); + out += (parentMemberPointer && parentMemberPointer->parenthesized) ? "(" : " ("; } break; + } case NamedTypeReferenceClass: - switch (m_ntrClass) + if (HasPostfixType()) + { + AppendPostfixType(out, platform); + AppendModifiers(out); + break; + } + { + const auto& payload = std::get(m_payload); + switch (payload.ntrClass) { case ClassNamedTypeClass: out += "class "; break; case StructNamedTypeClass: out += "struct "; break; @@ -339,10 +1102,21 @@ void DemangledTypeNode::AppendBeforeName(string& out, const DemangledTypeNode* p case EnumNamedTypeClass: out += "enum "; break; default: break; } - out += GetTypeNameString(); - if (!modifiers.empty()) - out += " " + modifiers; + AppendTypeName(out, platform); + AppendModifiers(out); + break; + } + + case WideCharTypeClass: + { + const auto& payload = std::get(m_payload); + if (!payload.altName.empty()) + out += payload.altName; + else + out += "wchar_t"; + AppendModifiers(out); break; + } default: break; @@ -356,73 +1130,116 @@ static string FormatArrayCount(uint64_t elements) } -void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType) const +void DemangledTypeNode::AppendAfterName(string& out, const DemangledTypeNode* parentType, Platform* platform) const { - string modifiers = GetModifierString(); - string ptrSuffix = GetPointerSuffixString(); - - switch (m_typeClass) + switch (GetPayloadClass()) { case FunctionTypeClass: { + const auto& payload = std::get(m_payload); // Close the "(" from before-name if parent is pointer - if (parentType && parentType->m_typeClass == PointerTypeClass) - out += ")"; + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) + out += ')'; - out += "("; - for (size_t i = 0; i < m_params.size(); i++) + out += '('; + for (size_t i = 0; i < payload.params.size(); i++) { if (i != 0) out += ", "; - if (m_params[i].type) - out += m_params[i].type->GetString(); + if (payload.params[i].type) + payload.params[i].type->AppendString(out, platform); } - out += ")"; - if (!modifiers.empty()) - out += " " + modifiers; - if (!ptrSuffix.empty()) - out += ptrSuffix; + out += ')'; + AppendModifiers(out); + if ((m_pointerSuffixBits & (DemangledUnalignedBit | DemangledRestrictBit | + DemangledReferenceBit | DemangledLvalueBit)) != 0) + AppendPointerSuffix(out); // Return type's after-name tokens - if (m_childType) - m_childType->AppendAfterName(out, this); + if (payload.returnType) + payload.returnType->AppendAfterName(out, this, platform); break; } case PointerTypeClass: - if (m_childType) - m_childType->AppendAfterName(out, this); + if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendAfterName(out, this, platform); + const BNTypeClass childClass = payload->childType ? payload->childType->GetPayloadClass() : VoidTypeClass; + if (payload->parenthesized && (!payload->childType || + (childClass != FunctionTypeClass && childClass != ArrayTypeClass))) + out += ')'; + } + else if (auto payload = std::get_if(&m_payload)) + { + if (payload->childType) + payload->childType->AppendAfterName(out, this, platform); + } break; case ArrayTypeClass: - if (parentType && parentType->m_typeClass == PointerTypeClass) + { + const auto& payload = std::get(m_payload); + if (parentType && parentType->GetPayloadClass() == PointerTypeClass) out += ")"; - out += "[" + FormatArrayCount(m_elements) + "]"; - if (m_childType) - m_childType->AppendAfterName(out, this); + out += "[" + FormatArrayCount(payload.elements) + "]"; + if (payload.childType) + payload.childType->AppendAfterName(out, this, platform); break; + } default: break; } } -string DemangledTypeNode::GetString() const +void DemangledTypeNode::AppendString(string& out, Platform* platform) const { - const string before = GetStringBeforeName(); - const string after = GetStringAfterName(); - if (!before.empty() && !after.empty() && before.back() != ' ' && before.back() != '*' - && before.back() != '&' && after.front() != ' ' && after.front() != '[' - && m_childType && m_childType->m_typeClass != FunctionTypeClass) + size_t beforeEnd = out.size(); + AppendBeforeName(out, nullptr, platform); + beforeEnd = out.size(); // track where "before" ends + + string after; + AppendAfterName(after, nullptr, platform); + + if (!after.empty() && beforeEnd > 0) { - return before + " " + after; + char lastBefore = out[beforeEnd - 1]; + NodeRef child = GetPrimaryChild(); + if (lastBefore != ' ' && lastBefore != '*' && lastBefore != '&' + && after.front() != ' ' && after.front() != '[' + && child && child->GetPayloadClass() != FunctionTypeClass) + { + out += ' '; + } } - return before + after; + out += after; +} + + +string DemangledTypeNode::GetString() const +{ + return GetString(nullptr); +} + + +string DemangledTypeNode::GetString(Platform* platform) const +{ + string out; + AppendString(out, platform); + return out; +} + + +string DemangledTypeNode::GetTypeAndName(const StringList& name) const +{ + return GetTypeAndName(name, nullptr); } -string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const +string DemangledTypeNode::GetTypeAndName(const StringList& name, Platform* platform) const { - const string before = GetStringBeforeName(); - const string qName = name.GetString(); - const string after = GetStringAfterName(); + const string before = GetStringBeforeName(platform); + const string qName = JoinNameList(name); + const string after = GetStringAfterName(platform); if ((!before.empty() && !qName.empty() && before.back() != ' ' && qName.front() != ' ') || (!before.empty() && !after.empty() && before.back() != ' ' && after.front() != ' ')) return before + " " + qName + after; @@ -430,9 +1247,27 @@ string DemangledTypeNode::GetTypeAndName(const QualifiedName& name) const } -Ref DemangledTypeNode::Finalize() const +bool DemangledTypeNode::HasUndeterminedTopLevelSize() const +{ + if (auto payload = std::get_if(&m_payload)) + return payload->widthKind == FixedWidth && payload->width == 0; + if (std::holds_alternative(m_payload)) + return true; + if (auto payload = std::get_if(&m_payload)) + return payload->childType && payload->childType->HasUndeterminedTopLevelSize(); + return false; +} + + +uint8_t DemangledTypeNode::GetValueConfidence() const +{ + return HasUndeterminedTopLevelSize() ? BN_DEFAULT_CONFIDENCE : BN_FULL_CONFIDENCE; +} + + +Ref DemangledTypeNode::Finalize(Platform* platform) const { - switch (m_typeClass) + switch (GetPayloadClass()) { case VoidTypeClass: { @@ -456,9 +1291,11 @@ Ref DemangledTypeNode::Finalize() const case IntegerTypeClass: { + const auto& payload = std::get(m_payload); + const size_t width = ResolveWidth(payload.width, payload.widthKind, platform); if (!m_const && !m_volatile) - return Type::IntegerType(m_width, m_signed, m_altName); - TypeBuilder tb = TypeBuilder::IntegerType(m_width, m_signed, m_altName); + return Type::IntegerType(width, payload.isSigned, payload.altName); + TypeBuilder tb = TypeBuilder::IntegerType(width, payload.isSigned, payload.altName); tb.SetConst(m_const); tb.SetVolatile(m_volatile); return tb.Finalize(); @@ -466,9 +1303,10 @@ Ref DemangledTypeNode::Finalize() const case FloatTypeClass: { + const auto& payload = std::get(m_payload); if (!m_const && !m_volatile) - return Type::FloatType(m_width, m_altName); - TypeBuilder tb = TypeBuilder::FloatType(m_width, m_altName); + return Type::FloatType(payload.width, payload.altName); + TypeBuilder tb = TypeBuilder::FloatType(payload.width, payload.altName); tb.SetConst(m_const); tb.SetVolatile(m_volatile); return tb.Finalize(); @@ -477,16 +1315,43 @@ Ref DemangledTypeNode::Finalize() const case VarArgsTypeClass: return TypeBuilder::VarArgsType().Finalize(); + case WideCharTypeClass: + { + const auto& payload = std::get(m_payload); + if (!m_const && !m_volatile) + return Type::WideCharType(payload.width, payload.altName); + TypeBuilder tb = TypeBuilder::WideCharType(payload.width, payload.altName); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + return tb.Finalize(); + } + case PointerTypeClass: { - Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - return TypeBuilder::PointerType(m_width, child, m_const, m_volatile, m_pointerReference).Finalize(); + if (auto payload = std::get_if(&m_payload)) + { + Ref child = payload->childType ? payload->childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::PointerType( + ResolveWidth(0, AddressWidth, platform), child, m_const, m_volatile, PointerReferenceType); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + return Type::NamedType(QualifiedName({GetString(platform)}), normalized.GetPtr()); + } + + const auto& payload = std::get(m_payload); + Ref child = payload.childType ? payload.childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::PointerType( + ResolveWidth(0, AddressWidth, platform), child, m_const, m_volatile, payload.referenceType); + AddPointerSuffixes(tb, true); + Ref normalized = tb.Finalize(); + return normalized; } case ArrayTypeClass: { - Ref child = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); - TypeBuilder tb = TypeBuilder::ArrayType(child, m_elements); + const auto& payload = std::get(m_payload); + Ref child = payload.childType ? payload.childType->Finalize(platform) : Ref(Type::VoidType()); + TypeBuilder tb = TypeBuilder::ArrayType(child, payload.elements); if (m_const) tb.SetConst(m_const); if (m_volatile) @@ -496,35 +1361,68 @@ Ref DemangledTypeNode::Finalize() const case FunctionTypeClass: { - Ref retType = m_childType ? m_childType->Finalize() : Ref(Type::VoidType()); + const auto& payload = std::get(m_payload); + Ref retType = payload.returnType ? payload.returnType->Finalize(platform) : Ref(Type::VoidType()); + uint8_t retTypeConfidence = payload.returnType ? payload.returnType->GetValueConfidence() : BN_FULL_CONFIDENCE; + retTypeConfidence = std::min(retTypeConfidence, m_returnTypeConfidence); + vector finalParams; - finalParams.reserve(m_params.size()); - for (auto& p : m_params) + finalParams.reserve(payload.params.size() + (payload.implicitThisParameterType ? 1 : 0)); + if (payload.implicitThisParameterType) + { + Ref thisType = payload.implicitThisParameterType->Finalize(platform); + finalParams.push_back({"this", thisType->WithConfidence(payload.implicitThisParameterType->GetValueConfidence()), + DefaultLocationSource, Variable()}); + } + for (auto& p : payload.params) + { + Ref pType = p.type ? p.type->Finalize(platform) : Ref(Type::VoidType()); + uint8_t pTypeConfidence = p.type ? p.type->GetValueConfidence() : BN_FULL_CONFIDENCE; + finalParams.push_back({p.name, pType->WithConfidence(pTypeConfidence), DefaultLocationSource, Variable()}); + } + Confidence> callingConvention; + if (payload.callingConventionName != NoCallingConvention) { - Ref pType = p.type ? p.type->Finalize() : Ref(Type::VoidType()); - finalParams.push_back({p.name, pType, DefaultLocationSource, Variable()}); + if (auto resolvedCallingConvention = ResolveCallingConvention(payload.callingConventionName, platform)) + callingConvention = Confidence>(resolvedCallingConvention, BN_FULL_CONFIDENCE); } - TypeBuilder tb = TypeBuilder::FunctionType(retType->WithConfidence(m_returnTypeConfidence), nullptr, finalParams); + TypeBuilder tb = TypeBuilder::FunctionType( + retType->WithConfidence(retTypeConfidence), callingConvention, finalParams, + Confidence(false, 0)); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); + if (payload.callingConventionName != NoCallingConvention) + tb.SetCallingConventionName(payload.callingConventionName); return tb.Finalize(); } case NamedTypeReferenceClass: { + if (auto payload = std::get_if(&m_payload)) + { + QualifiedName name(RenderTypeNameSegments(platform)); + TypeBuilder tb = TypeBuilder::NamedType( + NamedTypeReference::GenerateAutoDemangledTypeReference(UnknownNamedTypeClass, name), 0, 1); + tb.SetConst(m_const); + tb.SetVolatile(m_volatile); + AddPointerSuffixes(tb); + tb.SetNameType(m_nameType); + tb.SetHasTemplateArguments(false); + return tb.Finalize(); + } + + const auto& payload = std::get(m_payload); + QualifiedName name(RenderTypeNameSegments(platform)); TypeBuilder tb = TypeBuilder::NamedType( - NamedTypeReference::GenerateAutoDemangledTypeReference( - m_ntrClass, QualifiedName(m_nameSegments ? *m_nameSegments : vector{})), - m_width, m_alignment > 0 ? m_alignment : 1); + NamedTypeReference::GenerateAutoDemangledTypeReference(payload.ntrClass, name), + ResolveWidth(payload.width, payload.widthKind, platform), 1); tb.SetConst(m_const); tb.SetVolatile(m_volatile); - for (auto ps : m_pointerSuffix) - tb.AddPointerSuffix(ps); + AddPointerSuffixes(tb); tb.SetNameType(m_nameType); - tb.SetHasTemplateArguments(m_hasTemplateArgs); + tb.SetHasTemplateArguments(HasTemplateArguments()); return tb.Finalize(); } @@ -532,3 +1430,6 @@ Ref DemangledTypeNode::Finalize() const return Type::VoidType(); } } + +#undef HAS_POINTER_SUFFIX +#undef GetClass diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/gnu3/demangled_type_node.h index 62ad9004a5..39573d94a0 100644 --- a/demangler/gnu3/demangled_type_node.h +++ b/demangler/gnu3/demangled_type_node.h @@ -40,26 +40,76 @@ #endif #endif +#include #include +#include + #ifdef BINARYNINJACORE_LIBRARY -#include "binaryninjacore_global.h" -#define _STD_SET BinaryNinjaCore::set +namespace BinaryNinjaCore { class Platform; } #else -#include -#define _STD_SET std::set +namespace BinaryNinja { class Platform; } #endif -// Lightweight type representation for the GNU3 demangler. +using StringList = _STD_VECTOR<_STD_STRING>; + +class DemangledTypeNode; + +struct DemangledTypeNodeParam +{ + _STD_STRING name; + std::shared_ptr type = nullptr; +}; + +class DemangledNamePart +{ +public: + using Ref = std::shared_ptr; + + DemangledNamePart(); + explicit DemangledNamePart(_STD_STRING base); + DemangledNamePart(_STD_STRING base, std::shared_ptr baseTypeSuffix); + DemangledNamePart(_STD_STRING base, _STD_VECTOR templateArgs, + bool spaceAfterComma = false); + + const _STD_STRING& GetBase() const { return m_base; } + void SetBase(_STD_STRING base) { m_base = std::move(base); } + void AppendBase(const _STD_STRING& suffix) { m_base += suffix; } + bool HasTemplateArguments() const { return m_hasTemplateArgs || !m_templateArgs.empty(); } + _STD_VECTOR& GetMutableTemplateArguments() { return m_templateArgs; } + void SetTemplateArguments(_STD_VECTOR args, bool spaceAfterComma = false); + + void AppendString(_STD_STRING& out, BN::Platform* platform) const; + _STD_STRING GetString(BN::Platform* platform = nullptr) const; + bool IsStructurallyEqual(const DemangledNamePart& other) const; + + static Ref CreateShared(DemangledNamePart part); + static Ref CreateSharedCopy(const DemangledNamePart& part); + +private: + _STD_STRING m_base; + std::shared_ptr m_baseTypeSuffix; + _STD_VECTOR m_templateArgs; + bool m_hasTemplateArgs; + bool m_spaceAfterTemplateComma; +}; + +using DemangledQualifiedName = _STD_VECTOR; + +// Lightweight type representation for demanglers (GNU3 and MSVC). // This object serves as an abstraction layer between C++'s type system and our own. // It also removes a source of a lot of reallocation of NamedTypeReference BinaryNinja::Type objects // and only creates real Type objects when Finalize() is called. class DemangledTypeNode { public: - struct Param + using NodeRef = std::shared_ptr; + using Param = DemangledTypeNodeParam; + + enum WidthKind : uint8_t { - _STD_STRING name; - std::shared_ptr type; + FixedWidth, + AddressWidth, + DefaultIntegerWidth }; DemangledTypeNode(); @@ -72,100 +122,174 @@ class DemangledTypeNode static DemangledTypeNode VoidType(); static DemangledTypeNode BoolType(); static DemangledTypeNode IntegerType(size_t width, bool isSigned, const _STD_STRING& altName = ""); + static DemangledTypeNode AddressSizedIntegerType(bool isSigned, const _STD_STRING& altName = ""); static DemangledTypeNode FloatType(size_t width, const _STD_STRING& altName = ""); + static DemangledTypeNode WideCharType(size_t width, const _STD_STRING& altName = ""); static DemangledTypeNode VarArgsType(); - static DemangledTypeNode PointerType(BN::Architecture* arch, DemangledTypeNode child, - bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode PointerType(DemangledTypeNode child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode PointerType(NodeRef child, bool cnst, bool vltl, BNReferenceType refType); + static DemangledTypeNode MemberPointerType(DemangledTypeNode child, DemangledQualifiedName ownerName, + bool cnst, bool vltl); + static DemangledTypeNode MemberPointerType(NodeRef child, DemangledQualifiedName ownerName, + bool cnst, bool vltl); static DemangledTypeNode ArrayType(DemangledTypeNode child, uint64_t count); + static DemangledTypeNode ArrayType(NodeRef child, uint64_t count); static DemangledTypeNode FunctionType(DemangledTypeNode retType, std::nullptr_t, _STD_VECTOR params); + static DemangledTypeNode FunctionType(NodeRef retType, + std::nullptr_t, _STD_VECTOR params); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - _STD_VECTOR<_STD_STRING> nameSegments, size_t width = 0, size_t align = 0); + StringList nameSegments, size_t width = 0, bool isSigned = false); static DemangledTypeNode NamedType(BNNamedTypeReferenceClass cls, - const BN::QualifiedName& name, size_t width = 0, size_t align = 0); + DemangledQualifiedName nameSegments, size_t width = 0, bool isSigned = false); + static DemangledTypeNode NamedTypeWithDefaultIntegerWidth(BNNamedTypeReferenceClass cls, + StringList nameSegments, bool isSigned = false); + static DemangledTypeNode PostfixType(NodeRef child, _STD_STRING suffix); + static DemangledTypeNode PostfixType(NodeRef child, _STD_STRING separator, NodeRef suffixType); + static NodeRef CreateShared(DemangledTypeNode node); + static NodeRef CreateSharedCopy(const DemangledTypeNode& node); - // Getters - BNTypeClass GetClass() const { return m_typeClass; } -#ifdef BINARYNINJACORE_LIBRARY - BNTypeClass GetTypeClass() const { return m_typeClass; } -#endif - const _STD_VECTOR<_STD_STRING>& GetTypeName() const - { - if (!m_nameSegments) - { - static const _STD_VECTOR<_STD_STRING> empty; - return empty; - } - return *m_nameSegments; - } - _STD_VECTOR<_STD_STRING>& GetMutableTypeName() - { - if (!m_nameSegments) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(); - else if (m_nameSegments.use_count() > 1) - m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(*m_nameSegments); - return *m_nameSegments; - } - _STD_STRING GetTypeNameString() const; - size_t NameStringSize() const; + BNTypeClass GetClass() const { return GetPayloadClass(); } + const DemangledQualifiedName& GetName() const; + DemangledQualifiedName& GetMutableName(); bool IsConst() const { return m_const; } bool IsVolatile() const { return m_volatile; } BNNameType GetNameType() const { return m_nameType; } - bool HasTemplateArguments() const { return m_hasTemplateArgs; } - const _STD_SET& GetPointerSuffix() const { return m_pointerSuffix; } - BNNamedTypeReferenceClass GetNTRClass() const { return m_ntrClass; } + bool HasTemplateArguments() const; + uint8_t GetPointerSuffixBits() const { return m_pointerSuffixBits; } + BNNamedTypeReferenceClass GetNTRClass() const; + void SetParenthesizedMemberPointer(bool parenthesized); + StringList RenderTypeNameSegments(BN::Platform* platform = nullptr) const; + bool IsStructurallyEqual(const DemangledTypeNode& other) const; - // Setters - void SetTypeName(_STD_VECTOR<_STD_STRING> name) { m_nameSegments = std::make_shared<_STD_VECTOR<_STD_STRING>>(std::move(name)); } + void SetName(DemangledQualifiedName name); void SetConst(bool c) { m_const = c; } void SetVolatile(bool v) { m_volatile = v; } void SetNameType(BNNameType nt) { m_nameType = nt; } - void SetHasTemplateArguments(bool t) { m_hasTemplateArgs = t; } - void SetPointerSuffix(const _STD_SET& s) { m_pointerSuffix = s; } - void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffix.insert(ps); } + void SetPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits = bits; } + void AddPointerSuffixBits(uint8_t bits) { m_pointerSuffixBits |= bits; } + void AddPointerSuffix(BNPointerSuffix ps) { m_pointerSuffixBits |= PointerSuffixBit(ps); } + bool AddQualifiersToPointerChild(bool cnst, bool vltl); void SetReturnTypeConfidence(uint8_t c) { m_returnTypeConfidence = c; } + void SetCallingConventionName(BNCallingConventionName cc); + void SetNTRType(BNNamedTypeReferenceClass cls); + void SetImplicitThisParameter(DemangledTypeNode type); - // Named type reference operations - void SetNTR(BNNamedTypeReferenceClass cls, _STD_VECTOR<_STD_STRING> nameSegments); - void SetNTR(BNNamedTypeReferenceClass cls, const BN::QualifiedName& name); - - // String formatting + void AppendString(_STD_STRING& out, BN::Platform* platform) const; _STD_STRING GetString() const; - _STD_STRING GetStringBeforeName() const; - _STD_STRING GetStringAfterName() const; - _STD_STRING GetTypeAndName(const BN::QualifiedName& name) const; + _STD_STRING GetString(BN::Platform* platform) const; + _STD_STRING GetStringBeforeName(BN::Platform* platform) const; + _STD_STRING GetStringAfterName(BN::Platform* platform) const; + _STD_STRING GetTypeAndName(const StringList& name) const; + _STD_STRING GetTypeAndName(const StringList& name, BN::Platform* platform) const; - // Conversion to real Type - BN::Ref Finalize() const; + BN::Ref Finalize(BN::Platform* platform = nullptr) const; private: - BNTypeClass m_typeClass; - size_t m_width; - size_t m_alignment; - bool m_const; - bool m_volatile; - bool m_signed; - bool m_hasTemplateArgs; - BNNameType m_nameType; - _STD_SET m_pointerSuffix; - _STD_STRING m_altName; + struct VoidPayload {}; + struct BoolPayload {}; + struct VarArgsPayload {}; - // Named type ref data - BNNamedTypeReferenceClass m_ntrClass; - std::shared_ptr<_STD_VECTOR<_STD_STRING>> m_nameSegments; + struct IntegerPayload + { + size_t width = 0; + WidthKind widthKind = FixedWidth; + bool isSigned = false; + _STD_STRING altName; + }; + + struct FloatPayload + { + size_t width = 0; + _STD_STRING altName; + }; - // Child type (for pointer/array/function return) - std::shared_ptr m_childType; - BNReferenceType m_pointerReference; - uint64_t m_elements; + struct WideCharPayload + { + size_t width = 0; + _STD_STRING altName; + }; + + struct PointerPayload + { + NodeRef childType; + BNReferenceType referenceType = PointerReferenceType; + }; + + struct MemberPointerPayload + { + NodeRef childType; + DemangledQualifiedName ownerName; + bool parenthesized = false; + }; + + struct ArrayPayload + { + NodeRef childType; + uint64_t elements = 0; + }; - // Function params - _STD_VECTOR m_params; + struct FunctionPayload + { + NodeRef returnType; + _STD_VECTOR params; + NodeRef implicitThisParameterType; + BNCallingConventionName callingConventionName = NoCallingConvention; + }; + + struct NamedTypePayload + { + BNNamedTypeReferenceClass ntrClass = UnknownNamedTypeClass; + DemangledQualifiedName name; + size_t width = 0; + WidthKind widthKind = FixedWidth; + bool isSigned = false; + }; + + struct PostfixPayload + { + NodeRef childType; + _STD_STRING suffix; + NodeRef suffixType; + }; + + using Payload = std::variant< + VoidPayload, + BoolPayload, + IntegerPayload, + FloatPayload, + WideCharPayload, + VarArgsPayload, + PointerPayload, + MemberPointerPayload, + ArrayPayload, + FunctionPayload, + NamedTypePayload, + PostfixPayload>; + + bool HasUndeterminedTopLevelSize() const; + uint8_t GetValueConfidence() const; + BNTypeClass GetPayloadClass() const; + NodeRef GetPrimaryChild() const; + static size_t ResolveWidth(size_t width, WidthKind widthKind, BN::Platform* platform = nullptr); + + BNNameType m_nameType; + uint8_t m_pointerSuffixBits; uint8_t m_returnTypeConfidence; + bool m_const; + bool m_volatile; + Payload m_payload; // Helpers for string formatting - _STD_STRING GetModifierString() const; - _STD_STRING GetPointerSuffixString() const; - void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; - void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType = nullptr) const; + static uint8_t PointerSuffixBit(BNPointerSuffix ps); + void AddPointerSuffixes(BN::TypeBuilder& tb, bool omitPtr64 = true) const; + bool HasPostfixType() const; + void AppendPostfixType(_STD_STRING& out, BN::Platform* platform) const; + void AppendModifiers(_STD_STRING& out) const; + void AppendPointerSuffix(_STD_STRING& out) const; + static void AppendNamePartList(_STD_STRING& out, const DemangledQualifiedName& name, + BN::Platform* platform); + void AppendTypeName(_STD_STRING& out, BN::Platform* platform) const; + void AppendBeforeName(_STD_STRING& out, const DemangledTypeNode* parentType, BN::Platform* platform) const; + void AppendAfterName(_STD_STRING& out, const DemangledTypeNode* parentType, BN::Platform* platform) const; }; diff --git a/demangler/msvc/CMakeLists.txt b/demangler/msvc/CMakeLists.txt index b125599168..3536c899ab 100644 --- a/demangler/msvc/CMakeLists.txt +++ b/demangler/msvc/CMakeLists.txt @@ -5,7 +5,9 @@ project(demangle_msvc) file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c - *.h) + *.h + ../gnu3/demangled_type_node.cpp + ../gnu3/demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/msvc/demangle_msvc.cpp b/demangler/msvc/demangle_msvc.cpp index 412ed96080..64956fd6d8 100644 --- a/demangler/msvc/demangle_msvc.cpp +++ b/demangler/msvc/demangle_msvc.cpp @@ -16,195 +16,391 @@ // See https://llvm.org/LICENSE.txt for license information. #include "demangle_msvc.h" +#include "unicode.h" +#include #include +#include +#include #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; -#define GetClass GetTypeClass #else using namespace BinaryNinja; using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 4096 +// The largest observed depth in a real-world corpus of roughly 200k MSVC symbols was 54. +static constexpr size_t MAX_DEMANGLE_NESTING_DEPTH = 256; +static constexpr size_t MAX_ENCODED_NUMBER_HEX_DIGITS = 16; +static constexpr size_t MAX_BACKREFS = 10; -Demangle::Reader::Reader(string data) +static int64_t EncodedNumberToInt64(uint64_t magnitude, bool negative) { - m_data = data; - //Check for non-ascii characters - for (auto a : m_data) + constexpr auto int64Max = static_cast(std::numeric_limits::max()); + constexpr auto int64MinMagnitude = int64Max + 1; + + if (!negative) { - if (a < 0x20 || a > 0x7e) - throw DemangleException(); + if (magnitude > int64Max) + throw DemangleException("Invalid encoded number"); + return static_cast(magnitude); } + + if (magnitude > int64MinMagnitude) + throw DemangleException("Invalid encoded number"); + if (magnitude == int64MinMagnitude) + return std::numeric_limits::min(); + return -static_cast(magnitude); +} + +static _STD_STRING FormatEncodedNumberLiteral(uint64_t magnitude, bool negative) +{ + if (negative) + return "-" + to_string(magnitude); + return to_string(magnitude); } +// Define MSVC_DEMANGLE_DEBUG to enable trace logging +#ifdef MSVC_DEMANGLE_DEBUG +#define MSVC_TRACE(...) LogTraceF(__VA_ARGS__) +#else +#define MSVC_TRACE(...) do {} while(0) +#endif -string Demangle::Reader::PeekString(size_t count) +_STD_STRING Demangle::Reader::ReadString(size_t count) { if (count > Length()) throw DemangleException(); - return m_data.substr(0, count); + _STD_STRING out(m_ptr, count); + m_ptr += count; + return out; } -char Demangle::Reader::Peek() +_STD_STRING Demangle::Reader::ReadUntil(char sentinel) { - if (1 > Length()) + const char* found = static_cast(memchr(m_ptr, sentinel, m_end - m_ptr)); + if (!found) throw DemangleException(); - return (char)m_data[0]; + size_t count = found - m_ptr; + _STD_STRING out = ReadString(count); + Consume(); // sentinel + return out; } -const char* Demangle::Reader::GetRaw() +DemangledTypeNode::NodeRef Demangle::BackrefList::GetTypeBackrefRef(size_t reference) { - return m_data.c_str(); + if (reference < typeList.size() && typeList[reference]) + return typeList[reference]; + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); } -char Demangle::Reader::Read() +DemangledNamePart::Ref Demangle::BackrefList::GetNameBackrefRef(size_t reference) { - if (1 > Length()) - throw DemangleException(); - char out = m_data[0]; - m_data = m_data.substr(1); - return out; + if (reference < nameList.size() && nameList[reference]) + return nameList[reference]; + MSVC_TRACE("type: {} - Backref too large: {}/{}", fmt::ptr(this), nameList.size(), reference); + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); } -string Demangle::Reader::ReadString(size_t count) +const DemangledTypeNode& Demangle::BackrefList::GetTypeBackref(size_t reference) { - if (count > Length()) - throw DemangleException(); - string out = m_data.substr(0, count); - m_data = m_data.substr(count + 1); - return out; + return *GetTypeBackrefRef(reference); } -string Demangle::Reader::ReadUntil(char sentinal) +const DemangledNamePart& Demangle::BackrefList::GetNameBackref(size_t reference) { - size_t pos = m_data.find_first_of(sentinal); - if (pos == string::npos) - throw DemangleException(); - return ReadString(pos); + return *GetNameBackrefRef(reference); } -void Demangle::Reader::Consume(size_t count) +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(DemangledTypeNode::NodeRef t) { - if (count > Length()) - throw DemangleException(); - m_data = m_data.substr(count); + if (!t) + return nullptr; + if (typeList.size() >= MAX_BACKREFS) + return nullptr; + typeList.push_back(t); + return t; } -size_t Demangle::Reader::Length() +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(const DemangledTypeNode& t) { - return m_data.length(); + if (typeList.size() < MAX_BACKREFS) + return PushTypeBackref(DemangledTypeNode::CreateSharedCopy(t)); + return nullptr; } -const TypeBuilder& Demangle::BackrefList::GetTypeBackref(size_t reference) +DemangledTypeNode::NodeRef Demangle::BackrefList::PushTypeBackref(DemangledTypeNode&& t) { - if (reference < typeList.size()) - return typeList[reference]; - // LogDebug("type: %llx - : %d/%d\n", this, typeList.size(), reference); - throw DemangleException(string("Backref too large " + std::to_string(reference))); + if (typeList.size() < MAX_BACKREFS) + return PushTypeBackref(DemangledTypeNode::CreateShared(std::move(t))); + return nullptr; } -string Demangle::BackrefList::GetStringBackref(size_t reference) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(DemangledNamePart::Ref t) { - // LogDebug("type: %llx - ref: %d\n", this, reference); - if (reference < nameList.size()) - return nameList[reference]; - LogDebug("type: %p - Backref too large: %zu/%zu\n", this, nameList.size(), reference); - throw DemangleException(string("Backref too large " + std::to_string(reference))); + if (!t) + return nullptr; + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); + for (const auto& name : nameList) + if (name && ((name == t) || name->IsStructurallyEqual(*t))) + return name; + if (nameList.size() < MAX_BACKREFS) + { + nameList.push_back(t); + return t; + } + return nullptr; } -void Demangle::BackrefList::PushTypeBackref(TypeBuilder t) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(const DemangledNamePart& t) { - // LogDebug("this: %llx - TypeBackref: %lld %s\n", this, nameList.size(), t.GetString().c_str()); - if (typeList.size() <= 9) - typeList.push_back(t); + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); + for (const auto& name : nameList) + if (name && name->IsStructurallyEqual(t)) + return name; + if (nameList.size() < MAX_BACKREFS) + { + auto ref = DemangledNamePart::CreateSharedCopy(t); + nameList.push_back(ref); + return ref; + } + return nullptr; } -void Demangle::BackrefList::PushStringBackref(string& s) +DemangledNamePart::Ref Demangle::BackrefList::PushNameBackref(DemangledNamePart&& t) { - if (s.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - LogDebug("this: %p - Backref: %zu - %s\n", this, nameList.size(), s.c_str()); + MSVC_TRACE("this: {} - Backref: {}", fmt::ptr(this), nameList.size()); for (const auto& name : nameList) - if (name == s) - return; - nameList.push_back(s); + if (name && name->IsStructurallyEqual(t)) + return name; + if (nameList.size() < MAX_BACKREFS) + { + auto ref = DemangledNamePart::CreateShared(std::move(t)); + nameList.push_back(ref); + return ref; + } + return nullptr; } -void Demangle::BackrefList::PushFrontStringBackref(string& s) +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(DemangledNamePart::Ref t) { - if (s.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - // LogDebug("this: %llx - F-Backref: %lld - %s\n", this, nameList.size(), s.c_str()); - nameList.insert(nameList.begin(), s); + if (!t) + return nullptr; + templateList.push_back(t); + return t; +} + + +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(const DemangledNamePart& t) +{ + return PushTemplateSpecialization(DemangledNamePart::CreateSharedCopy(t)); +} + + +DemangledNamePart::Ref Demangle::BackrefList::PushTemplateSpecialization(DemangledNamePart&& t) +{ + return PushTemplateSpecialization(DemangledNamePart::CreateShared(std::move(t))); +} + + +Demangle::BackrefContextSwitch::BackrefContextSwitch(BackrefList& active): active(active) +{ + Swap(active, saved); +} + + +Demangle::BackrefContextSwitch::~BackrefContextSwitch() +{ + Swap(active, saved); +} + + +void Demangle::BackrefContextSwitch::Swap(BackrefList& left, BackrefList& right) +{ + std::swap(left.typeList, right.typeList); + std::swap(left.nameList, right.nameList); + std::swap(left.templateList, right.templateList); } -Demangle::Demangle(Architecture* arch, string mangledName) : - reader(mangledName), + +Demangle::Demangle(Architecture* arch, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), m_arch(arch), m_platform(nullptr), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref platform, string mangledName) : - reader(mangledName), - m_arch(platform->GetArchitecture()), - m_platform(platform), +Demangle::Demangle(Ref platform, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), + m_arch(nullptr), + m_platform(std::move(platform)), m_view(nullptr) { - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -Demangle::Demangle(Ref view, string mangledName) : - reader(mangledName), - m_view(view) +Demangle::Demangle(Ref view, _STD_STRING mangledName) : + m_mangledName(std::move(mangledName)), + m_reader(m_mangledName), + m_arch(nullptr), + m_platform(nullptr), + m_view(std::move(view)) { - m_platform = view->GetDefaultPlatform(); - if (!m_platform) - throw DemangleException(); - m_arch = m_platform->GetArchitecture(); - m_logger = LogRegistry::CreateLogger("MSVCDemangle"); - //m_logger->ResetIndent(); } -TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, QualifiedName& name) +Demangle::NestingGuard::NestingGuard(Demangle& demangler) : m_demangler(demangler) +{ + m_demangler.m_nestingDepth++; + if (m_demangler.m_nestingDepth > MAX_DEMANGLE_NESTING_DEPTH) + { + m_demangler.m_nestingDepth--; + throw DemangleException("Detected adversarial mangled string"); + } +} + + +Demangle::NestingGuard::~NestingGuard() +{ + m_demangler.m_nestingDepth--; +} + + +void Demangle::Reset(Architecture* arch, const _STD_STRING& mangledName) +{ + m_mangledName = mangledName; + m_reader.Reset(m_mangledName); + m_backrefList.Clear(); + m_arch = arch; + m_platform = nullptr; + m_view = nullptr; + m_templateParamDepth = 0; + m_nestingDepth = 0; +} + + +void Demangle::RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList) +{ + if (typeName.empty()) + return; + + DemangledNamePart& baseName = typeName.back(); + if (baseName.HasTemplateArguments()) + return; + _STD_STRING base = baseName.GetBase(); + + for (const auto & it : std::views::reverse(nameBackrefList.templateList)) + { + if (!it) + continue; + const DemangledNamePart& candidate = *it; + if (!candidate.HasTemplateArguments()) + continue; + if (candidate.GetBase() != base) + continue; + baseName = candidate; + return; + } +} + +_STD_STRING Demangle::FormatTypeAndName(const DemangledTypeNode& type, const NameList& name) const +{ + StringList nameSegments = FinalizeNameList(name); + if (type.GetNameType() == OperatorReturnTypeNameType) + { + Ref finalizedType = type.Finalize(m_platform.GetPtr()); + if (finalizedType) + return finalizedType->GetTypeAndName(QualifiedName(nameSegments)); + } + return type.GetTypeAndName(nameSegments); +} + +DemangledTypeNode Demangle::DemangleReferencedSymbolValue(BackrefList& varList) +{ + // Match LLVM's TemplateParameterReferenceNode parsing: referenced-symbol + // non-type template arguments are parsed in the active backref context, so + // later template arguments may refer to names/types introduced inside the + // referenced symbol. + BackrefList symbolBackrefs = varList; + + auto context = DemangleSymbol(symbolBackrefs); + varList = symbolBackrefs; + _STD_STRING value = "&" + FormatTypeAndName(context.type, context.name); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{value}); +} + + +DemangledTypeNode Demangle::DemangleAutoNonTypeTemplateParam(BackrefList& varList) +{ + if (m_reader.ConsumeIf('0')) + { + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{DecodeEncodedNumberLiteral()}); + } + if (m_reader.ConsumeIf('1')) + { + return DemangleReferencedSymbolValue(varList); + } + throw DemangleException(); +} + + +DemangledTypeNode Demangle::DemangleVarType(BackrefList& varList, bool isReturn, + bool includeImplicitThis, DemangledTypeNode::NodeRef* outTypeBackref, TypeBackrefMode typeBackrefMode) { - m_logger->LogDebug("%s: '%s' - %lu\n", __FUNCTION__, reader.GetRaw(), varList.nameList.size()); - TypeBuilder newType; - bool _const = false, _volatile = false, isMember = false; //TODO: use this info, _signed = false; - BNReferenceType refType; + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}' - {}", __FUNCTION__, m_reader.GetRaw(), varList.nameList.size()); + if (outTypeBackref) + *outTypeBackref = nullptr; + auto recordTypeBackref = [&](const DemangledTypeNode& type) -> DemangledTypeNode::NodeRef { + if (isReturn || typeBackrefMode == TypeBackrefMode::SuppressTopLevel) + return nullptr; + auto ref = varList.PushTypeBackref(type); + if (outTypeBackref) + *outTypeBackref = ref; + return ref; + }; + DemangledTypeNode newType; + bool _const = false, _volatile = false; + BNReferenceType refType = PointerReferenceType; BNTypeClass typeClass = IntegerTypeClass; - BNStructureVariant structType; - QualifiedName varName; - QualifiedName typeName; + BNStructureVariant structType = StructStructureType; + NameList typeName; BNNameType classFunctionType; - - size_t width; - char elm = reader.Read(); - switch (elm) + size_t width = 0; + bool _enumSigned = false; + auto demangleArrayExtents = [this]() -> _STD_VECTOR { + uint64_t dimensionCount = DecodeEncodedUnsignedNumber(); + if (dimensionCount > static_cast(m_reader.Length())) + throw DemangleException("Array dimension count is too large"); + + _STD_VECTOR elementList; + for (uint64_t i = 0; i < dimensionCount; i++) + { + uint64_t element = DecodeEncodedUnsignedNumber(); + elementList.push_back(element); + } + return elementList; + }; + switch (char elm = m_reader.Read()) { case 'A': typeClass = PointerTypeClass; @@ -218,18 +414,18 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali _const = false; _volatile = true; break; - case 'C': return TypeBuilder::IntegerType(1, true); - case 'D': return TypeBuilder::IntegerType(1, true); - case 'E': return TypeBuilder::IntegerType(1, false); - case 'F': return TypeBuilder::IntegerType(2, true); - case 'G': return TypeBuilder::IntegerType(2, false); - case 'H': return TypeBuilder::IntegerType(4, true); - case 'I': return TypeBuilder::IntegerType(4, false); - case 'J': return TypeBuilder::IntegerType(4, true, "long"); - case 'K': return TypeBuilder::IntegerType(4, false, "unsigned long"); - case 'M': return TypeBuilder::FloatType(4); - case 'N': return TypeBuilder::FloatType(8); - case 'O': return TypeBuilder::FloatType(10, "long double"); + case 'C': return DemangledTypeNode::IntegerType(1, true, "signed char"); + case 'D': return DemangledTypeNode::IntegerType(1, true); + case 'E': return DemangledTypeNode::IntegerType(1, false); + case 'F': return DemangledTypeNode::IntegerType(2, true); + case 'G': return DemangledTypeNode::IntegerType(2, false); + case 'H': return DemangledTypeNode::IntegerType(4, true); + case 'I': return DemangledTypeNode::IntegerType(4, false); + case 'J': return DemangledTypeNode::IntegerType(4, true, "long"); + case 'K': return DemangledTypeNode::IntegerType(4, false, "unsigned long"); + case 'M': return DemangledTypeNode::FloatType(4); + case 'N': return DemangledTypeNode::FloatType(8); + case 'O': return DemangledTypeNode::FloatType(10, "long double"); case 'P': // * typeClass = PointerTypeClass; refType = PointerReferenceType; @@ -259,111 +455,205 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case 'V': typeClass = StructureTypeClass; structType = ClassStructureType; break; case 'W': typeClass = EnumerationTypeClass; - switch (reader.Read()) + switch (m_reader.Read()) { - case '0': width = 1; /* TODO: use these _signed = true; */ break; - case '1': width = 1; /* TODO: use these _signed = false; */ break; - case '2': width = 2; /* TODO: use these _signed = true; */ break; - case '3': width = 2; /* TODO: use these _signed = false; */ break; - case '4': width = 4; /* TODO: use these _signed = true; */ break; - case '5': width = 4; /* TODO: use these _signed = false; */ break; - case '6': width = 4; /* TODO: use these _signed = true; */ break; - case '7': width = 4; /* TODO: use these _signed = false; */ break; + case '0': width = 1; _enumSigned = true; break; + case '1': width = 1; _enumSigned = false; break; + case '2': width = 2; _enumSigned = true; break; + case '3': width = 2; _enumSigned = false; break; + case '4': width = 4; _enumSigned = true; break; + case '5': width = 4; _enumSigned = false; break; + case '6': width = 4; _enumSigned = true; break; + case '7': width = 4; _enumSigned = false; break; default: throw DemangleException(); } break; - case 'X': return TypeBuilder::VoidType(); break; + case 'X': return DemangledTypeNode::VoidType(); break; case 'Y': - throw DemangleException(); //TODO: handle cointerfaces - case 'Z': return TypeBuilder::VarArgsType(); break; + { + // Multi-dimensional array type: Y...@ + _STD_VECTOR elementList = demangleArrayExtents(); + newType = DemangleVarType(varList, false); + for (uint64_t i : std::views::reverse(elementList)) + { + newType = DemangledTypeNode::ArrayType(std::move(newType), i); + } + recordTypeBackref(newType); + return newType; + } + case 'Z': return DemangledTypeNode::VarArgsType(); + case '?': + { + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') + { + size_t reference = m_reader.Read() - '0'; + if (reference < varList.typeList.size() && varList.typeList[reference]) + { + auto ref = varList.typeList[reference]; + if (outTypeBackref) + *outTypeBackref = ref; + return *ref; + } + // Legacy fallback: old generated symbols used `?2` here for + // a deduced-auto placeholder before clang/MSVC settled on + // the explicit `?@` spelling handled below. + if (reference == 2) + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); + throw DemangleException(_STD_STRING("Backref too large " + std::to_string(reference))); + } + if (next != '<') + throw DemangleException(); + + _STD_STRING placeholder = m_reader.ReadUntil('@'); + m_reader.ConsumeIf('@'); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); + if (placeholder == "") + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{placeholder}); + } case '_': - switch (reader.Read()) + switch (m_reader.Read()) { - case 'D': newType = TypeBuilder::IntegerType(1, true); break; - case 'E': newType = TypeBuilder::IntegerType(1, false); break; - case 'F': newType = TypeBuilder::IntegerType(2, true); break; - case 'G': newType = TypeBuilder::IntegerType(2, false); break; - case 'H': newType = TypeBuilder::IntegerType(4, true); break; - case 'I': newType = TypeBuilder::IntegerType(4, false); break; - case 'J': newType = TypeBuilder::IntegerType(8, true); break; - case 'K': newType = TypeBuilder::IntegerType(8, false); break; - case 'L': newType = TypeBuilder::IntegerType(16, true); break; - case 'M': newType = TypeBuilder::IntegerType(16, false); break; - case 'N': newType = TypeBuilder::BoolType(); break; + case 'D': newType = DemangledTypeNode::IntegerType(1, true); break; + case 'E': newType = DemangledTypeNode::IntegerType(1, false); break; + case 'F': newType = DemangledTypeNode::IntegerType(2, true); break; + case 'G': newType = DemangledTypeNode::IntegerType(2, false); break; + case 'H': newType = DemangledTypeNode::IntegerType(4, true); break; + case 'I': newType = DemangledTypeNode::IntegerType(4, false); break; + case 'J': newType = DemangledTypeNode::IntegerType(8, true); break; + case 'K': newType = DemangledTypeNode::IntegerType(8, false); break; + case 'L': newType = DemangledTypeNode::IntegerType(16, true); break; + case 'M': newType = DemangledTypeNode::IntegerType(16, false); break; + case 'N': newType = DemangledTypeNode::BoolType(); break; case 'O': { - QualifiedName name; - //m_logger->Indent(); - auto childType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); - newType = TypeBuilder::ArrayType(childType.Finalize(), 0); + auto childType = DemangleVarType(varList, false); + newType = DemangledTypeNode::ArrayType(std::move(childType), 0); break; } - case 'S': newType = TypeBuilder::IntegerType(2, true, "char16_t"); break; - case 'U': newType = TypeBuilder::IntegerType(4, true, "char32_t"); break; - case 'W': newType = TypeBuilder::IntegerType(2, false, "wchar_t"); break; - case 'X': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Coclass - case 'Y': typeClass = StructureTypeClass; structType = ClassStructureType; break; //Cointerface + case 'S': newType = DemangledTypeNode::WideCharType(2, "char16_t"); break; + case 'U': newType = DemangledTypeNode::WideCharType(4, "char32_t"); break; + case 'W': newType = DemangledTypeNode::WideCharType(2, "wchar_t"); break; + // `_P` (auto) and `_T` (decltype(auto)) are placeholder return-type + // encodings. For normal source code they are deduced at the function + // definition and mangled as the deduced type — you will not see `_P` + // or `_T` from something like `auto foo() { return 0; }` (that becomes + // `?foo@@YAHXZ`). They do appear in compiler-emitted symbols for + // function templates whose declared return type is literally `auto` + // or `decltype(auto)` and which are mangled before/without deduction + // settling on a concrete type — e.g. `??$seq@HX@llvm@@YA?A_PH@Z` + // (llvm::seq) or `??$_Get_unwrapped@...@std@@YA?A_T...@Z`. Handle + // them as named-type placeholders so downstream type consumers get + // something sensible (rather than a `` demangle) even though + // the underlying type is not expressible as a Binary Ninja Type. + case 'P': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"auto"}); break; + case 'Q': newType = DemangledTypeNode::IntegerType(1, true, "char8_t"); break; // C++20 char8_t + case 'T': newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"decltype(auto)"}); break; + // NOTE: `_X` and `_Y` were previously mapped to coclass/cointerface + // here, but those encodings are not emitted by any real toolchain. + // LLVM's MicrosoftDemangle / MicrosoftMangle and Wine's undname + // reimplementation none of them recognize `_X` or `_Y` as type + // codes. Real cointerface is plain `Y@@` (no underscore) at + // the top-level type switch, grouped with T/U/V; coclass has no + // dedicated mangling and is emitted as `V@@` (class). Let + // `_X` / `_Y` fall through to the `default: throw` so malformed + // input is rejected instead of producing a bogus class type. default: throw DemangleException(); } break; case '$': - if (reader.PeekString(2) == "$Q") // && + if (m_reader.ConsumeIf("$Q")) // && { - reader.Consume(2); typeClass = PointerTypeClass; refType = RValueReferenceType; _const = false; _volatile = false; } - else if (reader.PeekString(2) == "$R") // && volatile + else if (m_reader.ConsumeIf("$R")) // && volatile { - reader.Consume(2); typeClass = PointerTypeClass; refType = RValueReferenceType; _const = false; _volatile = true; } - else if (reader.PeekString(2) == "$A") + else if (m_reader.ConsumeIf("$A")) { - reader.Consume(2); - char num = reader.Read(); - if (num == 8) - return DemangleFunction(NoNameType, true, varList); - if (num == '6' || num == '7') - return DemangleFunction(NoNameType, false, varList); + char num = m_reader.Read(); + if (num >= '6' && num <= '9') + { + // For member function types (8/9), skip the class scope marker @@ + if (num == '8' || num == '9') + m_reader.ConsumeIf("@@"); + return DemangleFunction(NoNameType, num >= '7', varList).type; + } throw DemangleException(); } - else if (reader.PeekString(2) == "$C") + else if (m_reader.ConsumeIf("$C")) { - reader.Consume(2); + bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - QualifiedName name; - //m_logger->Indent(); - newType = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + newType = DemangleVarType(varList, isReturn, includeImplicitThis, nullptr, + TypeBackrefMode::SuppressTopLevel); newType.SetConst(_const); newType.SetVolatile(_volatile); + recordTypeBackref(newType); return newType; } - else if (reader.PeekString(2) == "$T") + else if (m_reader.ConsumeIf("$T")) + { + auto t = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{"std::nullptr_t"}); + recordTypeBackref(t); + return t; + } + else if (m_reader.ConsumeIf("$B")) + { + // $$B is a type modifier (managed/const) - strip and parse underlying type + return DemangleVarType(varList, isReturn, includeImplicitThis, outTypeBackref, typeBackrefMode); + } + else if (m_reader.ConsumeIf('0')) + { + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{DecodeEncodedNumberLiteral()}); + } + else if (m_reader.ConsumeIf('D')) + { + // $D - template type alias / anonymous type parameter + return DemangleVarType(varList, isReturn, includeImplicitThis, outTypeBackref, typeBackrefMode); + } + else if (m_reader.ConsumeIf('M')) { - reader.Consume(2); - return TypeBuilder::ValueType("std::nullptr"); + // $M - C++17 `auto` non-type template parameter. + // The encoded type is the deduced type for the following bare + // non-type payload and is not itself printed as a template arg. + DemangleVarType(varList, false); + return DemangleAutoNonTypeTemplateParam(varList); } - else if (reader.Peek() == '0') + else if (char next = m_reader.PeekOr(); next == 'H' || next == 'I' || next == 'J') { - reader.Consume(); - int64_t value; - DemangleNumber(value); - return TypeBuilder::ValueType(to_string(value)); + // $H/$I/$J - member function pointer value as a non-type template + // parameter. Format: $H@; + // $I has two adjustment numbers, $J has three. + char kind = m_reader.Read(); + BackrefList symbolBackrefs = varList; + auto context = DemangleSymbol(symbolBackrefs); + varList = symbolBackrefs; + _STD_STRING value = "{" + FormatTypeAndName(context.type, context.name); + + // Read adjustment number(s) — NOT $-prefixed, just raw numbers. + int adjustments = (kind == 'H') ? 1 : (kind == 'I') ? 2 : 3; + for (int i = 0; i < adjustments; i++) + { + int64_t adj = DecodeEncodedSignedNumber(); + value += "," + to_string(adj); + } + value += "}"; + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{value}); } - else if (reader.Peek() == '1') + else if (m_reader.ConsumeIf('1')) { - reader.Consume(); - auto context = DemangleSymbol(); - return TypeBuilder::PointerType(m_arch, context.type.Finalize()); + return DemangleReferencedSymbolValue(varList); } else throw DemangleException(); @@ -378,9 +668,13 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '7': case '8': case '9': - //Make a copy of the item in the backref list. Exit early since we don't want this added to the backref list. - m_logger->LogDebug("Backref %u %lu", elm - '0', varList.typeList.size()); - return varList.GetTypeBackref(elm - '0'); + { + MSVC_TRACE("Backref {} {}", elm - '0', varList.typeList.size()); + auto ref = varList.GetTypeBackrefRef(elm - '0'); + if (outTypeBackref) + *outTypeBackref = ref; + return *ref; + } default: throw DemangleException(); } @@ -389,7 +683,28 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali { case PointerTypeClass: { - switch (reader.Peek()) + if (m_reader.ConsumeIf('6')) + { + auto childType = DemangleFunction(NoNameType, false, varList).type; + newType = DemangledTypeNode::PointerType(std::move(childType), + _const, + _volatile, + refType); + break; + } + if (m_reader.ConsumeIf('8')) + { + NameList ownerName; + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + auto childType = DemangleFunction(NoNameType, true, varList).type; + newType = DemangledTypeNode::MemberPointerType(std::move(childType), + std::move(ownerName), + _const, + _volatile); + break; + } + switch (m_reader.PeekOr()) { case '0': case '1': @@ -397,209 +712,166 @@ TypeBuilder Demangle::DemangleVarType(BackrefList& varList, bool isReturn, Quali case '3': case '4': case '5': + case '7': + case '9': throw DemangleException(); - case '6': - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - auto childType = DemangleFunction(NoNameType, false, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } - case '7': //Function pointer - case '9': //Class Function pointer - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } - case '8': //Named class function pointer - { - if (refType != PointerReferenceType) //No references to functions - { - throw DemangleException(); - } - reader.Consume(); - DemangleName(name, classFunctionType, varList); - name.push_back(""); - auto childType = DemangleFunction(NoNameType, true, varList); - newType = TypeBuilder::PointerType(m_arch, - childType.Finalize(), - _const, - _volatile, - refType); - break; - } default: // Non-numeric { - m_logger->LogDebug("Demangle pointer subtype: '%s'\n", reader.GetRaw()); - TypeBuilder child; - bool _const2 = false, _volatile2 = false, isMember = false; + MSVC_TRACE("Demangle pointer subtype: '{}'", m_reader.GetRaw()); + DemangledTypeNode child; + bool _const2 = false, _volatile2 = false, localIsMember = false; + NameList ownerName; auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const2, _volatile2, isMember); - if (reader.Peek() == 'Y') //Multi-dimentional array + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const2, _volatile2, localIsMember); + if (localIsMember) { - m_logger->LogDebug("Demangle multi-dimentional array"); - int64_t nDimentions; - reader.Consume(); - DemangleNumber(nDimentions); - vector elementList; - while (nDimentions--) - { - int64_t element = 0; - DemangleNumber(element); - elementList.push_back(element); - } - QualifiedName name; - //m_logger->Indent(); - child = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + DemangleName(ownerName, classFunctionType, varList, true); + RewriteTemplateBackrefName(ownerName, varList); + } + if (m_reader.ConsumeIf('Y')) //Multi-dimensions array + { + MSVC_TRACE("Demangle multi-dimensions array"); + _STD_VECTOR elementList = demangleArrayExtents(); + child = DemangleVarType(varList, false); - for (auto i = elementList.rbegin(); i != elementList.rend(); i++) + for (uint64_t i : std::views::reverse(elementList)) { - child = TypeBuilder::ArrayType(child.Finalize(), *i); + child = DemangledTypeNode::ArrayType(std::move(child), i); } } else { - QualifiedName name; - //m_logger->Indent(); - child = DemangleVarType(varList, true, name); - //m_logger->Dedent(); + child = DemangleVarType(varList, true, includeImplicitThis && !localIsMember); } child.SetConst(_const2); child.SetVolatile(_volatile2); - newType = TypeBuilder::PointerType(m_arch, - child.Finalize(), - _const, - _volatile, - refType); - - newType.SetPointerSuffix(suffix); - m_logger->LogDebug("Name: %s\n", newType.GetString().c_str()); + if (localIsMember) + { + newType = DemangledTypeNode::MemberPointerType( + std::move(child), std::move(ownerName), _const, _volatile); + } + else + { + newType = DemangledTypeNode::PointerType(std::move(child), + _const, + _volatile, + refType); + } + + newType.SetPointerSuffixBits(suffix); + MSVC_TRACE("Name: {}", newType.GetString()); break; } } break; } case EnumerationTypeClass: - m_logger->LogDebug("Demangle enumeration\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference(EnumNamedTypeClass, typeName), - width, width); + MSVC_TRACE("Demangle enumeration"); + DemangleName(typeName, classFunctionType, varList, true); + newType = DemangledTypeNode::NamedType(EnumNamedTypeClass, typeName, width, _enumSigned); break; case StructureTypeClass: - m_logger->LogDebug("Demangle structure\n"); - //m_logger->Indent(); - DemangleName(typeName, classFunctionType, varList); - //m_logger->Dedent(); + MSVC_TRACE("Demangle structure"); + DemangleName(typeName, classFunctionType, varList, true); + RewriteTemplateBackrefName(typeName, varList); switch (structType) { case ClassStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - ClassNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(ClassNamedTypeClass, typeName); break; case StructStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); break; case UnionStructureType: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnionNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnionNamedTypeClass, typeName); break; default: - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - UnknownNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, typeName); break; } break; default: break; } - if (!isReturn) - { - varList.PushTypeBackref(newType); - } + recordTypeBackref(newType); return newType; } - -void Demangle::DemangleNumber(int64_t& num) +Demangle::EncodedNumber Demangle::DecodeEncodedNumber() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - num = 0; - int mult = 1; - if (reader.Peek() == '?') - { - mult = -1; - reader.Consume(); - } + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + bool negative = m_reader.ConsumeIf('?'); + if (m_reader.Length() == 0) + throw DemangleException("Invalid encoded number"); - //The number is decimal 1-10 - if (reader.Peek() >= '0' && reader.Peek() <= '9') + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') { - num = mult * (reader.Read() + 1 - '0'); - return; + uint64_t magnitude = static_cast(m_reader.Read() + 1 - '0'); + return {magnitude, negative}; } - else + + uint64_t magnitude = 0; + size_t digitCount = 0; + while (!m_reader.ConsumeIf('@')) { - //The number is hexidecimal - string strnum = reader.ReadUntil('@'); - for (auto a : strnum) - { - num *= 16; - if (a >= 'A' && a <= 'P') - num += a - 'A'; - else - throw DemangleException(); - } - num *= mult; - return; + char ch = m_reader.Read(); + if (ch < 'A' || ch > 'P') + throw DemangleException("Invalid encoded number"); + if (digitCount >= MAX_ENCODED_NUMBER_HEX_DIGITS) + throw DemangleException("Invalid encoded number"); + magnitude = (magnitude << 4) | static_cast(ch - 'A'); + digitCount++; } + + return {magnitude, negative}; +} + +int64_t Demangle::DecodeEncodedSignedNumber() +{ + EncodedNumber number = DecodeEncodedNumber(); + return EncodedNumberToInt64(number.magnitude, number.negative); +} + +uint64_t Demangle::DecodeEncodedUnsignedNumber() +{ + EncodedNumber number = DecodeEncodedNumber(); + if (number.negative) + throw DemangleException("Invalid encoded number"); + return number.magnitude; +} + +int32_t Demangle::DecodeEncodedSignedInt32() +{ + uint32_t lowBits = static_cast(DecodeEncodedUnsignedNumber()); + if ((lowBits & 0x80000000U) != 0) + return static_cast(static_cast(lowBits) - 0x100000000LL); + return static_cast(lowBits); } +_STD_STRING Demangle::DecodeEncodedNumberLiteral() +{ + EncodedNumber number = DecodeEncodedNumber(); + return FormatEncodedNumberLiteral(number.magnitude, number.negative); +} -void Demangle::DemangleChar(char& ch) + +char Demangle::DemangleChar() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); // Basic char is just the char - if (reader.Peek() != '?') - { - ch = reader.Peek(); - reader.Consume(); - return; - } - reader.Consume(); + if (!m_reader.ConsumeIf('?')) + return m_reader.Read(); // Hex char is ?$XX for 2 hex digits XX - if (reader.Peek() == '$') + if (m_reader.ConsumeIf('$')) { - m_logger->LogDebug("%s: Hex digit '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Hex digit '{}'", __FUNCTION__, m_reader.GetRaw()); - reader.Consume(); - char c1 = reader.Peek(); - reader.Consume(); - char c2 = reader.Peek(); - reader.Consume(); + char c1 = m_reader.Read(); + char c2 = m_reader.Read(); if (c1 < 'A' || c1 > 'P') throw DemangleException("Invalid character"); @@ -609,224 +881,256 @@ void Demangle::DemangleChar(char& ch) uint8_t b1 = c1 - 'A'; uint8_t b2 = c2 - 'A'; - ch = (char)((b1 << 4) | b2); - return; + return static_cast((b1 << 4) | b2); } - m_logger->LogDebug("%s: Table lookup '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Table lookup '{}'", __FUNCTION__, m_reader.GetRaw()); // Otherwise it's a lookup based on some big table // Thanks, LLVM! - switch (reader.Peek()) - { - case '0': ch = ','; reader.Consume(); return; - case '1': ch = '/'; reader.Consume(); return; - case '2': ch = '\\'; reader.Consume(); return; - case '3': ch = ':'; reader.Consume(); return; - case '4': ch = '.'; reader.Consume(); return; - case '5': ch = ' '; reader.Consume(); return; - case '6': ch = '\n'; reader.Consume(); return; - case '7': ch = '\t'; reader.Consume(); return; - case '8': ch = '\''; reader.Consume(); return; - case '9': ch = '-'; reader.Consume(); return; - case 'a': ch = '\xE1'; reader.Consume(); return; - case 'b': ch = '\xE2'; reader.Consume(); return; - case 'c': ch = '\xE3'; reader.Consume(); return; - case 'd': ch = '\xE4'; reader.Consume(); return; - case 'e': ch = '\xE5'; reader.Consume(); return; - case 'f': ch = '\xE6'; reader.Consume(); return; - case 'g': ch = '\xE7'; reader.Consume(); return; - case 'h': ch = '\xE8'; reader.Consume(); return; - case 'i': ch = '\xE9'; reader.Consume(); return; - case 'j': ch = '\xEA'; reader.Consume(); return; - case 'k': ch = '\xEB'; reader.Consume(); return; - case 'l': ch = '\xEC'; reader.Consume(); return; - case 'm': ch = '\xED'; reader.Consume(); return; - case 'n': ch = '\xEE'; reader.Consume(); return; - case 'o': ch = '\xEF'; reader.Consume(); return; - case 'p': ch = '\xF0'; reader.Consume(); return; - case 'q': ch = '\xF1'; reader.Consume(); return; - case 'r': ch = '\xF2'; reader.Consume(); return; - case 's': ch = '\xF3'; reader.Consume(); return; - case 't': ch = '\xF4'; reader.Consume(); return; - case 'u': ch = '\xF5'; reader.Consume(); return; - case 'v': ch = '\xF6'; reader.Consume(); return; - case 'w': ch = '\xF7'; reader.Consume(); return; - case 'x': ch = '\xF8'; reader.Consume(); return; - case 'y': ch = '\xF9'; reader.Consume(); return; - case 'z': ch = '\xFA'; reader.Consume(); return; - case 'A': ch = '\xC1'; reader.Consume(); return; - case 'B': ch = '\xC2'; reader.Consume(); return; - case 'C': ch = '\xC3'; reader.Consume(); return; - case 'D': ch = '\xC4'; reader.Consume(); return; - case 'E': ch = '\xC5'; reader.Consume(); return; - case 'F': ch = '\xC6'; reader.Consume(); return; - case 'G': ch = '\xC7'; reader.Consume(); return; - case 'H': ch = '\xC8'; reader.Consume(); return; - case 'I': ch = '\xC9'; reader.Consume(); return; - case 'J': ch = '\xCA'; reader.Consume(); return; - case 'K': ch = '\xCB'; reader.Consume(); return; - case 'L': ch = '\xCC'; reader.Consume(); return; - case 'M': ch = '\xCD'; reader.Consume(); return; - case 'N': ch = '\xCE'; reader.Consume(); return; - case 'O': ch = '\xCF'; reader.Consume(); return; - case 'P': ch = '\xD0'; reader.Consume(); return; - case 'Q': ch = '\xD1'; reader.Consume(); return; - case 'R': ch = '\xD2'; reader.Consume(); return; - case 'S': ch = '\xD3'; reader.Consume(); return; - case 'T': ch = '\xD4'; reader.Consume(); return; - case 'U': ch = '\xD5'; reader.Consume(); return; - case 'V': ch = '\xD6'; reader.Consume(); return; - case 'W': ch = '\xD7'; reader.Consume(); return; - case 'X': ch = '\xD8'; reader.Consume(); return; - case 'Y': ch = '\xD9'; reader.Consume(); return; - case 'Z': ch = '\xDA'; reader.Consume(); return; + switch (m_reader.Read()) + { + case '0': return ','; + case '1': return '/'; + case '2': return '\\'; + case '3': return ':'; + case '4': return '.'; + case '5': return ' '; + case '6': return '\n'; + case '7': return '\t'; + case '8': return '\''; + case '9': return '-'; + case 'a': return '\xE1'; + case 'b': return '\xE2'; + case 'c': return '\xE3'; + case 'd': return '\xE4'; + case 'e': return '\xE5'; + case 'f': return '\xE6'; + case 'g': return '\xE7'; + case 'h': return '\xE8'; + case 'i': return '\xE9'; + case 'j': return '\xEA'; + case 'k': return '\xEB'; + case 'l': return '\xEC'; + case 'm': return '\xED'; + case 'n': return '\xEE'; + case 'o': return '\xEF'; + case 'p': return '\xF0'; + case 'q': return '\xF1'; + case 'r': return '\xF2'; + case 's': return '\xF3'; + case 't': return '\xF4'; + case 'u': return '\xF5'; + case 'v': return '\xF6'; + case 'w': return '\xF7'; + case 'x': return '\xF8'; + case 'y': return '\xF9'; + case 'z': return '\xFA'; + case 'A': return '\xC1'; + case 'B': return '\xC2'; + case 'C': return '\xC3'; + case 'D': return '\xC4'; + case 'E': return '\xC5'; + case 'F': return '\xC6'; + case 'G': return '\xC7'; + case 'H': return '\xC8'; + case 'I': return '\xC9'; + case 'J': return '\xCA'; + case 'K': return '\xCB'; + case 'L': return '\xCC'; + case 'M': return '\xCD'; + case 'N': return '\xCE'; + case 'O': return '\xCF'; + case 'P': return '\xD0'; + case 'Q': return '\xD1'; + case 'R': return '\xD2'; + case 'S': return '\xD3'; + case 'T': return '\xD4'; + case 'U': return '\xD5'; + case 'V': return '\xD6'; + case 'W': return '\xD7'; + case 'X': return '\xD8'; + case 'Y': return '\xD9'; + case 'Z': return '\xDA'; default: throw DemangleException("Unknown character"); } } -void Demangle::DemangleWideChar(uint16_t& wch) +void Demangle::DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs) { - char c1, c2; - DemangleChar(c1); - DemangleChar(c2); - - wch = (uint16_t)(((uint16_t)c1 << 8) | (uint16_t)c2); -} - - -void Demangle::DemangleVariableList(vector& paramList, BackrefList& varList) -{ - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - set suffix; - for (size_t i = 0; reader.Peek() != 'Z'; i++) + uint8_t suffix = 0; + for (;;) { bool hasModifiers = false; - if (reader.Peek() == '@') + if (m_reader.PeekOr() == 'Z') + { + if (m_reader.PeekMatch("ZZ", 2)) + { + paramList.push_back({"", DemangledTypeNode::CreateShared(DemangledTypeNode::VarArgsType())}); + m_reader.Consume(); + continue; + } + break; + } + if (m_reader.ConsumeIf('@')) { - reader.Consume(); break; } - else if (reader.Peek() == '?') + else if (m_reader.ConsumeIf("$$$V")) + { + // $$$V = empty expanded type / template-template pack (post-MSVC2015 mangling). + // See clang/lib/AST/MicrosoftMangle.cpp: for MSVC2015-compat this emits $$V, + // otherwise $$$V. + continue; + } + else if (m_reader.ConsumeIf("$$V") || m_reader.ConsumeIf("$$Z")) + { + // $$V = empty expanded type / template-template pack (MSVC2015-compat mangling). + // $$Z = separator between two consecutive packs (emitted between non-empty packs, + // not as a lone template argument). LLVM's demangler leniently skips it in + // any position; we follow suit. + // NB: $$S is NOT emitted by any known toolchain - only $S (single $) is a real + // token, handled below. + continue; + } + else if (m_reader.ConsumeIf("$S")) + { + // $S = empty expanded non-type template pack + // (e.g. `template` or `template` instantiated with zero args). + continue; + } + else if (m_reader.ConsumeIf('?')) { - reader.Consume(); suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); hasModifiers = true; } - FunctionParameter vt; - QualifiedName name; - m_logger->LogDebug("Argument %d: %s", i, reader.GetRaw()); - //m_logger->Indent(); - TypeBuilder type = DemangleVarType(varList, false, name); - //m_logger->Dedent(); + MSVC_TRACE("Argument {}: {}", paramList.size(), m_reader.GetRaw()); + DemangledTypeNode::NodeRef parsedType; + DemangledTypeNode type = DemangleVarType(varList, false, true, &parsedType, + typeBackrefs ? TypeBackrefMode::RecordTopLevel : TypeBackrefMode::SuppressTopLevel); if (hasModifiers) { type.SetConst(_const); type.SetVolatile(_volatile); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); } - vt.name = name.GetString(); - vt.type = type.Finalize(); - vt.locationSource = DefaultLocationSource; - paramList.push_back(vt); - m_logger->LogDebug("Argument %zu: '%s' - '%s'\n", i, vt.type->GetString().c_str(), reader.GetRaw()); + DemangledTypeNode::Param vt; + if (hasModifiers || !parsedType) + vt.type = DemangledTypeNode::CreateShared(std::move(type)); + else + vt.type = parsedType; + paramList.push_back(std::move(vt)); + MSVC_TRACE("Argument {}: '{}' - '{}'", paramList.size() - 1, paramList.back().type->GetString(), m_reader.GetRaw()); } - if (reader.Peek() == 'Z') - reader.Consume(); - m_logger->LogDebug("%s: done '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: done '{}'", __FUNCTION__, m_reader.GetRaw()); +} + + +void Demangle::DemangleNameTypeString(_STD_STRING& out) +{ + out = m_reader.ReadUntil('@'); +} + + +static bool IsWinRTEscapedScopeNameChar(char ch) +{ + return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'Z') + || (ch >= 'a' && ch <= 'z') || (ch == '_') || (ch == '$'); } -Demangle::NameType Demangle::GetNameType() +bool Demangle::TryDemangleWinRTEscapedScopeName(NameList& nameList, BackrefList& nameBackrefList) { - if (reader.Peek() == '?') + // LLVM's Microsoft demangler rejects these WinRT interface-scope spellings: + // ?get@?QIXamlType@Markup@Xaml@UI@Windows@@Outer@@... + // We accept them for compatibility with existing BN test cases. At entry, + // DemangleName has consumed the leading '?' and `m_reader` points at the first + // simple scope component. The escaped chain ends at its inner '@@'; the + // normal outer qualified-name '@' is intentionally left for the DemangleName + // loop to consume. + const char* start = m_reader.GetRaw(); + if (m_reader.Length() < 4) + return false; + + char prefix = start[0]; + if (!((prefix >= 'A' && prefix <= 'Z') || (prefix == '_'))) + return false; + if (start[1] == '@' || start[1] == '?') + return false; + + const char* limit = start + m_reader.Length(); + const char* end = nullptr; + for (const char* cur = start + 1; (cur + 1) < limit; cur++) { - reader.Consume(); - if (reader.Peek()== '?') - { - reader.Consume(); - return GetNameType(); - } - else if (reader.Peek() == '$') + if ((cur[0] == '@') && (cur[1] == '@')) { - reader.Consume(); - return NameTemplate; - } - else if (reader.Peek() == '0') - { - reader.Consume(); - return NameConstructor; - } - else if (reader.Peek() == '1') - { - reader.Consume(); - return NameDestructor; - } - else if (reader.Peek() == 'B') - { - reader.Consume(); - return NameReturn; - } - else if (reader.PeekString(2) == "_R") - { - reader.Consume(2); - return NameRtti; + end = cur; + break; } - // else if (reader.PeekString(3) == "__E") - // { - // reader.Consume(2); - // return NameDynamicInitializer; - // } - else + } + if (!end) + return false; + + _STD_VECTOR<_STD_STRING> scopeNames; + const char* componentStart = start; + while (componentStart < end) + { + const char* componentEnd = componentStart; + while ((componentEnd < end) && (*componentEnd != '@')) { - return NameLookup; + if (!IsWinRTEscapedScopeNameChar(*componentEnd)) + return false; + componentEnd++; } + if (componentEnd == componentStart) + return false; + + scopeNames.emplace_back(componentStart, componentEnd - componentStart); + componentStart = componentEnd + 1; } - else if (reader.Peek() >= '0' && reader.Peek() <= '9') + + for (const auto& scopeName: scopeNames) { - return NameBackref; + DemangledNamePart scope = MakeNameSegment(scopeName); + nameList.insert(nameList.begin(), scope); + nameBackrefList.PushNameBackref(std::move(scope)); } - return NameString; -} - - -void Demangle::DemangleNameTypeString(string& out) -{ - out = reader.ReadUntil('@'); + m_reader.SetRaw(end + 2); + return true; } void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, - string& out) + _STD_STRING& out) { - TypeBuilder rtti; - switch (reader.Read()) + switch (m_reader.Read()) { case '0': { - if (reader.Peek() != '?') - throw DemangleException(); - reader.Consume(); - - bool _const = false, _volatile = false, isMember = false; - auto suffix = DemanglePointerSuffix(); - DemangleModifiers(_const, _volatile, isMember); + bool _const = false, _volatile = false; + uint8_t suffix = 0; + if (m_reader.ConsumeIf('?')) + { + bool isMember = false; + suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); + DemangleModifiers(_const, _volatile, isMember); + } - QualifiedName name; - //m_logger->Indent(); - rtti = DemangleVarType(nameBackrefList, false, name); - //m_logger->Dedent(); + DemangledTypeNode rtti = DemangleVarType(nameBackrefList, false); rtti.SetConst(_const); rtti.SetVolatile(_volatile); - rtti.SetPointerSuffix(suffix); - out = rtti.GetString() + " `RTTI Type Descriptor' "; + rtti.SetPointerSuffixBits(suffix); + out = rtti.GetString() + " `RTTI Type Descriptor'"; classFunctionType = RttiTypeDescriptor; break; } @@ -834,11 +1138,10 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, out = "`RTTI Base Class Descriptor at ("; for (int i = 0; i < 4; i++) { - int64_t num = 0; - DemangleNumber(num); + int64_t num = DecodeEncodedSignedNumber(); if (i > 0) { - out += ","; + out += ", "; } out += to_string(num); } @@ -862,12 +1165,15 @@ void Demangle::DemangleNameTypeRtti(BNNameType& classFunctionType, } -void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) +void Demangle::DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case '?': functionType = NoNameType; break; + case '0': functionType = ConstructorNameType; break; + case '1': functionType = ConstructorNameType; out = "~"; break; // destructor + case 'B': functionType = OperatorReturnTypeNameType; out = "operator"; break; // conversion operator case '2': functionType = OperatorNewNameType; break; case '3': functionType = OperatorDeleteNameType; break; case '4': functionType = OperatorAssignNameType; break; @@ -903,8 +1209,8 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'Z': functionType = OperatorMinusEqualNameType; break; case '_': { - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE(" {}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case '0': functionType = OperatorDivideEqualNameType; break; case '1': functionType = OperatorModulusEqualNameType; break; @@ -942,23 +1248,46 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) case 'W': // Fallthrough case 'Z': functionType = NoNameType; break; case '_': - m_logger->LogDebug(" %s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + { + MSVC_TRACE(" {}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (const char extendedNameType = m_reader.Read()) { case 'A': functionType = ManagedVectorConstructorIteratorNameType; break; case 'B': functionType = ManagedVectorDestructorIteratorNameType; break; case 'C': functionType = EHVectorCopyConstructorIteratorNameType; break; - case 'D': functionType = EHVectorVBaseConstructorIteratorNameType; break; - case 'E': functionType = DynamicInitializerNameType; break; - case 'F': functionType = DynamicAtExitDestructorNameType; break; + // ??__D is the *copy* variant per LLVM (MicrosoftDemangle.cpp:701). + // Previously routed to EHVectorVBaseConstructorIteratorNameType + // (the non-copy enum used by ??_O), which dropped the "copy" word. + case 'D': functionType = EHVectorVBaseCopyConstructorIteratorNameType; break; + // ??__E and ??__F are not reached here — they're handled at the + // top level in DemangleSymbol, matching LLVM's special-intrinsic + // dispatch. See DemangleDynamicInitFini. + case 'E': // fall through — unreachable in practice + case 'F': functionType = (extendedNameType == 'E') ? DynamicInitializerNameType : DynamicAtExitDestructorNameType; break; case 'G': functionType = VectorCopyConstructorIteratorNameType; break; case 'H': functionType = VectorVBaseCopyConstructorIteratorNameType; break; case 'I': functionType = ManagedVectorCopyConstructorIteratorNameType; break; - case 'J': functionType = LocalStaticGuardNameType; break; - case 'K': functionType = UserDefinedLiteralOperatorNameType; break; + case 'J': functionType = LocalStaticThreadGuardNameType; break; + case 'K': + { + // User-defined literal operator: ??__K@ + // LLVM's demangleLiteralOperatorIdentifier consumes a simple + // string terminated by '@' as the literal suffix and renders it + // as `operator ""`. The outer DemangleName loop then + // picks up any enclosing scope chain as a normal prefix. + functionType = UserDefinedLiteralOperatorNameType; + _STD_STRING suffix = m_reader.ReadUntil('@'); + if (suffix.empty()) + throw DemangleException("??__K requires a non-empty literal suffix"); + out = "operator \"\"" + suffix; + break; + } + case 'L': functionType = NoNameType; out = "operator co_await"; break; + case 'M': functionType = NoNameType; out = "operator<=>"; break; // spaceship operator default: throw DemangleException("Demangle Lookup Failed"); // fall through } break; + } default: throw DemangleException("Demangle Lookup Failed"); } @@ -966,108 +1295,145 @@ void Demangle::DemangleTypeNameLookup(string& out, BNNameType& functionType) } default: throw DemangleException("Demangle Lookup Failed"); } - out = Type::GetNameTypeString(functionType); + if (out.empty()) + out = Type::GetNameTypeString(functionType); } -string Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) +DemangledNamePart Demangle::DemangleTemplateInstantiationName(BackrefList& nameBackrefList) { - string out; - BackrefList templateBackref; - reader.Consume(2); - m_logger->LogDebug("DemangleTemplateInstantiationName: '%s'\n", reader.GetRaw()); - if (reader.Peek() >= '0' && reader.Peek() <= '9') + DemangledNamePart out; + MSVC_TRACE("DemangleTemplateInstantiationName: '{}'", m_reader.GetRaw()); + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + char next = m_reader.PeekOr(); + if (next >= '0' && next <= '9') { - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); } else { - DemangleNameTypeString(out); + _STD_STRING name; + DemangleNameTypeString(name); + out = MakeNameSegment(name); } - nameBackrefList.PushStringBackref(out); + nameBackrefList.PushNameBackref(out); return out; } -string Demangle::DemangleTemplateParams(vector& params, BackrefList& nameBackrefList, string& out) +DemangledNamePart Demangle::DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList) { - //m_logger->Indent(); - DemangleVariableList(params, nameBackrefList); - //m_logger->Dedent(); - m_logger->LogDebug("VariableList done\n"); - out += "<"; - for (size_t i = 0; i < params.size(); i++) + DemangledNamePart out; + BNNameType dummyFunctionType = NoNameType; + MSVC_TRACE("DemangleTemplateInstantiationNameInLocalContext: '{}'", m_reader.GetRaw()); + { - if (i == 0) - { - out += params[i].type->GetString(); - } - else - { - out += "," + params[i].type->GetString(); - } + _STD_VECTOR params; + bool backrefEligible = true; + BackrefContextSwitch localContext(nameBackrefList); + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + out = DemangleUnqualifiedSymbolName(nameBackrefList, dummyFunctionType, backrefEligible); + if (backrefEligible && dummyFunctionType == NoNameType) + nameBackrefList.PushNameBackref(out); + DemangleTemplateParams(params, nameBackrefList, out); } - if (out[out.size()-1] == '>') - out += " "; //Be c++03 compliant where we can - out += ">"; - nameBackrefList.PushStringBackref(out); + // DemangleTemplateParams pushed into the temporary local context above. + // Record the completed specialization again after BackrefContextSwitch + // restores the enclosing context. + nameBackrefList.PushTemplateSpecialization(out); + nameBackrefList.PushNameBackref(out); return out; } -// void Demangle::DemangleInitFiniStub(bool destructor, QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) -// { -// bool isStatic = false; -// if (reader.Peek() == '?') -// { -// reader.Consume(); -// isStatic = true; -// } -// string out = DemangleUnqualifiedSymbolName(nameList, nameBackrefList, classFunctionType); -// } - -string Demangle::DemangleUnqualifiedSymbolName(QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType) +void Demangle::DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, DemangledNamePart& out) { - string out; - if (reader.PeekString(2) == "?$") + NestingGuard nestingGuard(*this); + params.clear(); + const bool nestedTemplateContext = (m_templateParamDepth > 0); + struct NameBackrefScopeGuard + { + BackrefList& backrefs; + size_t typeCount; + size_t nameCount; + ~NameBackrefScopeGuard() + { + backrefs.typeList.resize(typeCount); + backrefs.nameList.resize(nameCount); + } + }; + struct TemplateDepthGuard + { + size_t& depth; + TemplateDepthGuard(size_t& depth): depth(depth) { depth++; } + ~TemplateDepthGuard() { depth--; } + }; + { - reader.Consume(2); - out = DemangleTemplateInstantiationName(nameBackrefList); - nameList.insert(nameList.begin(), out); + TemplateDepthGuard depthGuard(m_templateParamDepth); + NameBackrefScopeGuard scopeGuard { + nameBackrefList, + nameBackrefList.typeList.size(), + nameBackrefList.nameList.size() + }; + + DemangleVariableList(params, nameBackrefList, false); } - else if (reader.Peek() == '?') + + out.SetTemplateArguments(params); + nameBackrefList.PushTemplateSpecialization(out); + if (nestedTemplateContext) + nameBackrefList.PushNameBackref(out); +} + + +DemangledNamePart Demangle::DemangleUnqualifiedSymbolName(BackrefList& nameBackrefList, BNNameType& classFunctionType, + bool& backrefEligible) +{ + backrefEligible = true; + DemangledNamePart out; + _STD_STRING text; + if (m_reader.ConsumeIf('?')) { - reader.Consume(); - DemangleTypeNameLookup(out, classFunctionType); + text.clear(); + DemangleTypeNameLookup(text, classFunctionType); + out = MakeNameSegment(text); + // Lookup-based operator names are not normal identifier components and + // should not satisfy later scope backrefs such as strong_ordering@0@. + backrefEligible = false; } - else if (reader.Peek() >= '0' && reader.Peek() <= '9') + else if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') { - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); } else { - DemangleNameTypeString(out); + DemangleNameTypeString(text); + out = MakeNameSegment(text); } return out; } -TypeBuilder Demangle::DemangleString() +DemangledTypeNode Demangle::DemangleString(NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); // ??_C@_@ - if (reader.Peek() != '_') + if (!m_reader.ConsumeIf('_')) { throw DemangleException("Invalid mangled string name"); } - reader.Consume(); // Wide char flag (1 yes / 0 no) bool isWideChar = false; - switch (reader.Peek()) + switch (m_reader.Read()) { case '1': + case '2': // UTF-16/UTF-32 encoding variants + case '3': isWideChar = true; break; case '0': @@ -1075,66 +1441,67 @@ TypeBuilder Demangle::DemangleString() default: throw DemangleException("Invalid mangled string name"); } - reader.Consume(); // Length is just a number - int64_t lengthRaw; - DemangleNumber(lengthRaw); - if (lengthRaw < 0) - { - throw DemangleException("Invalid mangled string name"); - } - uint64_t length = (uint64_t)lengthRaw; + uint64_t length = DecodeEncodedUnsignedNumber(); - m_logger->LogDebug("%s: Before CRC32 '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Before CRC32 '{}'", __FUNCTION__, m_reader.GetRaw()); // CRC32 (ignored) - while (reader.Peek() != '@') + while (m_reader.Peek() != '@') { // Usually 8 bytes but I've seen it be 7 for some ungodly reason - reader.Consume(); + m_reader.Consume(); } - reader.Consume(); + m_reader.Consume(); bool truncated = false; - string name = ""; - TypeBuilder type; + _STD_STRING name; + _STD_STRING literalPrefix; + DemangledTypeNode type; // String bytes if (isWideChar) { - m_logger->LogDebug("%s: Wide string '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - truncated = (length > 64); - while (reader.Peek() != '@') + MSVC_TRACE("{}: Wide string '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + literalPrefix = "L"; + // Track the last wide char so we can detect missing null terminator. + bool lastWideCharWasNull = false; + size_t wcharCount = 0; + while (m_reader.Peek() != '@') { - uint16_t wch; - DemangleWideChar(wch); - - uint8_t chs[2]; - chs[0] = wch & 0xFF; - chs[1] = wch >> 8; + char highByte = DemangleChar(); + char lowByte = DemangleChar(); + uint8_t chs[2] = {static_cast(lowByte), static_cast(highByte)}; + lastWideCharWasNull = (chs[0] == 0) && (chs[1] == 0); + wcharCount++; // TODO: This is actually UCS2 but we don't have an easy decoder for that utf8name += Unicode::UTF16ToUTF8(&chs[0], 2); } - reader.Consume(); + m_reader.Consume(); + + // MSVC string literals always mangle their trailing null. A payload + // that doesn't end in a wide null means the original was too long to + // fit in the mangling and was truncated. Matches LLVM's demangler. + if (wcharCount == 0 || !lastWideCharWasNull) + truncated = true; name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); } else { - m_logger->LogDebug("%s: Non-wide string '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: Non-wide string '{}'", __FUNCTION__, m_reader.GetRaw()); uint64_t numNulls = 0; size_t endNulls = 0; - vector chars; - while (reader.Peek() != '@') + _STD_VECTOR chars; + while (m_reader.Peek() != '@') { - char ch; - DemangleChar(ch); + char ch = DemangleChar(); if (ch == 0) { numNulls++; @@ -1146,233 +1513,435 @@ TypeBuilder Demangle::DemangleString() } chars.push_back(ch); } - reader.Consume(); + m_reader.Consume(); - if (length > (uint64_t)chars.size() + 1) + if (length > static_cast(chars.size()) + 1) { truncated = true; } + // MSVC includes the trailing '\0' in the mangled payload. If the last + // byte isn't a null, the original string was truncated to fit the + // encoding's size limit — LLVM signals this with a `...` suffix. + if (!chars.empty() && chars.back() != 0) + truncated = true; - // Now time to guess encoding - if (chars.size() % 1 != 0) + // Now time to guess encoding. Only take a wide-character guess if both + // the decoded byte payload and declared array length are aligned for it. + const size_t payloadBytes = chars.size() - endNulls; + if ((payloadBytes % 4 == 0) && (length % 4 == 0) && numNulls > length * 2 / 3) { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); - } - else - { - if (chars.size() % 4 == 0 && numNulls > length * 2 / 3) + MSVC_TRACE("{}: Looks like UTF32 '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + for (size_t i = 0; i < payloadBytes; i += 4) { - m_logger->LogDebug("%s: Looks like UTF32 '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - for (size_t i = 0; i < chars.size() - endNulls; i += 4) - { - utf8name += Unicode::UTF32ToUTF8(chars.data() + i); - } - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(4), length / 4); + utf8name += Unicode::UTF32ToUTF8(chars.data() + i); } - else if (numNulls > length / 3) + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "U"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(4), length / 4); + } + else if ((payloadBytes % 2 == 0) && (length % 2 == 0) && numNulls > length / 3) + { + MSVC_TRACE("{}: Looks like UTF16 '{}'", __FUNCTION__, m_reader.GetRaw()); + _STD_STRING utf8name; + for (size_t i = 0; i < payloadBytes; i += 2) { - m_logger->LogDebug("%s: Looks like UTF16 '%s'\n", __FUNCTION__, reader.GetRaw()); - string utf8name; - for (size_t i = 0; i < chars.size() - endNulls; i += 2) - { - utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); - } - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); - type = Type::ArrayType(Type::WideCharType(2), length / 2); + utf8name += Unicode::UTF16ToUTF8(chars.data() + i, 2); } - else - { - m_logger->LogDebug("%s: Looks like UTF8 '%s'\n", __FUNCTION__, reader.GetRaw()); + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, utf8name.data(), utf8name.size()); + literalPrefix = "L"; + type = DemangledTypeNode::ArrayType(DemangledTypeNode::WideCharType(2), length / 2); + } + else + { + MSVC_TRACE("{}: Looks like UTF8 '{}'", __FUNCTION__, m_reader.GetRaw()); - name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); - type = Type::ArrayType(Type::IntegerType(1, true), length); - } + name = Unicode::ToEscapedString(Unicode::GetBlocksForNames({}), false, chars.data(), chars.size() - endNulls); + type = DemangledTypeNode::ArrayType(DemangledTypeNode::IntegerType(1, true), length); } } - if (truncated) - { - name += "..."; - } - m_varName.push_back(name); + symbolName.clear(); + symbolName.push_back(MakeNameSegment(fmt::bnformat("{}\"{}\"{}", literalPrefix, name, truncated ? "..." : ""))); return type; } -TypeBuilder Demangle::DemangleTypeInfoName() +DemangledTypeNode Demangle::DemangleTypeInfoName(NameList& symbolName) { - if (reader.Read() != '?') + if (m_reader.Read() != '?') throw DemangleException("Unknown raw name type"); bool _const = false; bool _volatile = false; bool isMember = false; DemangleModifiers(_const, _volatile, isMember); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); - QualifiedName name; - TypeBuilder type = DemangleVarType(m_backrefList, false, name); + DemangledTypeNode type = DemangleVarType(m_backrefList, false); type.SetConst(_const); type.SetVolatile(_volatile); switch (type.GetClass()) { case NamedTypeReferenceClass: - m_varName = type.GetNamedTypeReference()->GetName(); - return type; + { + // Match LLVM's demangler: a raw type-info name (.?A...) renders as + // ` `RTTI Type Descriptor Name''`. Bake the type + // keyword + name into the symbol's qualified name, then return a + // fresh NamedType marked RttiTypeDescriptor so BN's core type + // formatter skips its own class/struct prefix - this mirrors the + // treatment of ??_R0 in DemangleNameTypeRtti case '0'. + _STD_STRING rendered = type.GetString() + " `RTTI Type Descriptor Name'"; + symbolName = { MakeNameSegment(rendered) }; + NameList rttiTypeName = type.GetName(); + if (rttiTypeName.empty()) + for (const auto& segment: type.RenderTypeNameSegments()) + rttiTypeName.push_back(MakeNameSegment(segment)); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, std::move(rttiTypeName)); + newType.SetNameType(RttiTypeDescriptor); + return newType; + } default: throw DemangleException("Unexpected type of RTTI Type Name"); } } -void Demangle::DemangleName(QualifiedName& nameList, +void Demangle::PrependNameComponent(NameList& nameList, DemangledNamePart name) +{ + nameList.insert(nameList.begin(), std::move(name)); +} + + +void Demangle::AppendStringName(NameList& nameList, BackrefList& nameBackrefList) +{ + _STD_STRING text; + DemangleNameTypeString(text); + DemangledNamePart name = MakeNameSegment(text); + PrependNameComponent(nameList, name); + nameBackrefList.PushNameBackref(std::move(name)); +} + + +void Demangle::FinalizeConstructorTemplateName(NameList& nameList, size_t nameListSizeAtEntry, bool pending) +{ + if (!pending) + return; + + if (nameList.size() <= nameListSizeAtEntry + 1) + throw DemangleException("Constructor template missing class scope"); + + DemangledNamePart& constructorTemplateName = nameList.back(); + if (!constructorTemplateName.HasTemplateArguments()) + throw DemangleException("Invalid constructor template name"); + + // `??$?0...@Class@@` is a templated constructor. LLVM models `?0` as a + // structor identifier and attaches the parsed enclosing class to it after + // the qualified name is complete; Wine's undname does the same as a string + // post-process. Keep the parsed template args and only fill in the + // constructor's base name here: + // `?0` becomes `Class`. + constructorTemplateName.SetBase(nameList[nameList.size() - 2].GetString() + + constructorTemplateName.GetBase()); +} + + +bool Demangle::FunctionTypeHasPointerSuffix(char functionType) +{ + return functionType != 'C' && functionType != 'D' && functionType != 'K' && functionType != 'L' + && functionType != 'S' && functionType != 'T' && functionType != 'Y' && functionType != 'Z'; +} + + +_STD_STRING Demangle::FormatFunctionScopeSignature(const DemangledTypeNode& type, const NameList& scopeName) +{ + _STD_STRING out = type.GetTypeAndName(FinalizeNameList(scopeName)); + while (!out.empty() && out.back() == ' ') + out.pop_back(); + return out; +} + + +void Demangle::AppendLocalScope(NameList& nameList, BackrefList& nameBackrefList, uint64_t scopeOrdinal, + bool typeNameContext) +{ + NameList scopeName; + BNNameType scopeFunctionType = NoNameType; + DemangleName(scopeName, scopeFunctionType, nameBackrefList, typeNameContext); + + if (m_reader.Length() == 0) + throw DemangleException("Missing local scope function encoding"); + + char ft = m_reader.Read(); + if (ft == '9' && m_reader.PeekOr() == '@') + { + PrependNameComponent(nameList, MakeNameSegment("`" + to_string(scopeOrdinal) + "'")); + nameList.insert(nameList.begin(), scopeName.begin(), scopeName.end()); + return; + } + if (ft < 'A' || ft > 'Z') + throw DemangleException("Invalid local scope function encoding"); + + DemangledTypeNode scopeType = DemangleFunction( + scopeFunctionType, FunctionTypeHasPointerSuffix(ft), nameBackrefList).type; + + PrependNameComponent(nameList, MakeNameSegment("`" + to_string(scopeOrdinal) + "'")); + PrependNameComponent(nameList, MakeNameSegment("`" + FormatFunctionScopeSignature(scopeType, scopeName) + "'")); +} + + +bool Demangle::TryAppendLocalScopeAt(NameList& nameList, BackrefList& nameBackrefList, + const char* encodedNumberStart, bool typeNameContext) +{ + struct LocalScopeParseCheckpoint + { + Demangle& demangler; + BackrefList& backrefs; + NameList& nameList; + const char* reader; + NameList savedNameList; + size_t typeBackrefs; + size_t nameBackrefs; + size_t templateBackrefs; + + LocalScopeParseCheckpoint(Demangle& demangler, NameList& nameList, BackrefList& backrefs) : + demangler(demangler), + backrefs(backrefs), + nameList(nameList), + reader(demangler.m_reader.GetRaw()), + savedNameList(nameList), + typeBackrefs(backrefs.typeList.size()), + nameBackrefs(backrefs.nameList.size()), + templateBackrefs(backrefs.templateList.size()) + { + } + + void Restore() + { + demangler.m_reader.SetRaw(reader); + nameList = savedNameList; + backrefs.typeList.resize(typeBackrefs); + backrefs.nameList.resize(nameBackrefs); + backrefs.templateList.resize(templateBackrefs); + } + }; + + LocalScopeParseCheckpoint checkpoint(*this, nameList, nameBackrefList); + + m_reader.SetRaw(encodedNumberStart); + uint64_t scopeOrdinal = 0; + try + { + scopeOrdinal = DecodeEncodedUnsignedNumber(); + } + catch (DemangleException&) + { + checkpoint.Restore(); + return false; + } + + if (m_reader.PeekMatch("??", 2)) + { + AppendLocalScope(nameList, nameBackrefList, scopeOrdinal, typeNameContext); + return true; + } + + checkpoint.Restore(); + return false; +} + + +void Demangle::DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList) + BackrefList& nameBackrefList, + bool typeNameContext) { - string out; - BNNameType functionType; - BNNameType dummyFunctionType; - vector params; - while(1) + NestingGuard nestingGuard(*this); + // NameList is stored outermost-first for QualifiedName, but MSVC encodes + // names leaf-first. Ordinary parsed components are prepended; constructor + // and destructor branches recurse to parse the class scope, then append the + // synthesized leaf intentionally. + size_t nameListSizeAtEntry = nameList.size(); + bool pendingConstructorTemplateName = false; + + DemangledNamePart out; + _STD_STRING outText; + BNNameType functionType = NoNameType; + BNNameType dummyFunctionType = NoNameType; + _STD_VECTOR params; + + size_t strippedNestedNamePrefixes = 0; + while(true) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (GetNameType()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + if (m_reader.ConsumeIf("??@")) { - case NameString: - m_logger->LogDebug("Demangle String\n"); - DemangleNameTypeString(out); - nameList.insert(nameList.begin(), out); - m_logger->LogDebug("Pushing backref NameString %s", out.c_str()); - nameBackrefList.PushStringBackref(out); - m_logger->LogDebug("nameList.front(): %s\n", nameList.front().c_str()); - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - break; - case NameLookup: - m_logger->LogDebug("Demangle Lookup\n"); - DemangleTypeNameLookup(out, functionType); - classFunctionType = functionType; - nameList.insert(nameList.begin(), out); - break; - case NameBackref: - m_logger->LogDebug("Demangle Backref"); - out = nameBackrefList.GetStringBackref(reader.Read() - '0'); - m_logger->LogDebug("Demangle Backref: %s", out.c_str()); - nameList.insert(nameList.begin(), out); - break; - case NameTemplate: + AppendStringName(nameList, nameBackrefList); + } + else if (m_reader.ConsumeIf("??")) { - m_logger->LogDebug("Demangle Template: '%s'\n", reader.GetRaw()); - BackrefList templateBackref; - out = DemangleUnqualifiedSymbolName(nameList, templateBackref, functionType); - m_logger->LogDebug("Pushing backref NameTemplate %s", out.c_str()); - templateBackref.PushStringBackref(out); - m_logger->LogDebug("Demangling Template variables %s\n", reader.GetRaw()); - DemangleTemplateParams(params, templateBackref, out); - nameList.insert(nameList.begin(), out); - nameBackrefList.PushStringBackref(out); - break; + if (m_nestingDepth + strippedNestedNamePrefixes >= MAX_DEMANGLE_NESTING_DEPTH) + throw DemangleException("Demangle nesting depth exceeded"); + strippedNestedNamePrefixes++; + continue; } - case NameConstructor: - m_logger->LogDebug("NameConstructor\n"); - classFunctionType = ConstructorNameType; - DemangleName(nameList, dummyFunctionType, nameBackrefList); - if (nameList.size() == 0) - throw DemangleException(); - nameList.push_back(nameList[nameList.size()-1]); - return; - case NameDestructor: - classFunctionType = ConstructorNameType; - m_logger->LogDebug("NameDestructor\n"); - DemangleName(nameList, dummyFunctionType, nameBackrefList); - if (nameList.size() == 0) - throw DemangleException(); - nameList.push_back("~" + nameList[nameList.size()-1]); - return; - case NameRtti: - m_logger->LogDebug("NameRtti\n"); - DemangleNameTypeRtti(classFunctionType, nameBackrefList, out); - nameList.insert(nameList.begin(), out); - break; - // case NameDynamicInitializer: - // m_logger->LogDebug("NameDynamicInitializer\n"); - // DemangleInitFiniStub(false); - // break; - // case NameDynamicAtExitDestructor: - // m_logger->LogDebug("NameDynamicAtExitDestructor\n"); - // DemangleInitFiniStub(false); - // break; - case NameReturn: - m_logger->LogDebug("NameReturn\n"); - classFunctionType = OperatorReturnTypeNameType; - if (reader.PeekString(2) == "?$") + else if (m_reader.PeekMatch("?$", 2)) + { + MSVC_TRACE("Demangle Template: '{}'", m_reader.GetRaw()); + if (typeNameContext || (m_templateParamDepth > 0) || (nameList.size() > nameListSizeAtEntry)) { - out = DemangleTemplateInstantiationName(nameBackrefList); + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + if (!m_reader.ConsumeIf("?$")) + throw DemangleException(); + BNNameType localFunctionType = NoNameType; + bool backrefEligible = true; + out = DemangleUnqualifiedSymbolName(nameBackrefList, localFunctionType, backrefEligible); + if (backrefEligible && localFunctionType == NoNameType) + { + MSVC_TRACE("Pushing backref NameTemplate {}", out.GetString()); + nameBackrefList.PushNameBackref(out); + } + MSVC_TRACE("Demangling Template variables {}", m_reader.GetRaw()); DemangleTemplateParams(params, nameBackrefList, out); + if (localFunctionType == ConstructorNameType) + { + classFunctionType = ConstructorNameType; + pendingConstructorTemplateName = true; + } + } + PrependNameComponent(nameList, out); + } + else if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') + { + MSVC_TRACE("Demangle Backref"); + out = nameBackrefList.GetNameBackref(m_reader.Read() - '0'); + MSVC_TRACE("Demangle Backref: {}", out.GetString()); + PrependNameComponent(nameList, out); + } + else if (m_reader.ConsumeIf('?')) + { + if (char next = m_reader.PeekOr(); next >= 'a' && next <= 'z') + { + // Lowercase after ? indicates a non-standard extension name + // (e.g., ??null$initializer$ for thread-safe static init guards). + AppendStringName(nameList, nameBackrefList); + } + else if (m_reader.PeekMatch("A0x", 3)) + { + m_reader.Consume(); + DemangleNameTypeString(outText); // discard compiler-generated hash + out = MakeNameSegment("`anonymous namespace'"); + PrependNameComponent(nameList, out); + nameBackrefList.PushNameBackref(std::move(out)); + } + else if (m_reader.ConsumeIf("_R")) + { + MSVC_TRACE("NameRtti"); + DemangleNameTypeRtti(classFunctionType, nameBackrefList, outText); + out = MakeNameSegment(outText); + PrependNameComponent(nameList, out); } else { - DemangleNameTypeString(out); - nameBackrefList.PushStringBackref(out); + bool parsedScopePrefix = false; + if (nameList.size() > nameListSizeAtEntry) + { + parsedScopePrefix = TryAppendLocalScopeAt(nameList, nameBackrefList, m_reader.GetRaw(), typeNameContext) || + TryDemangleWinRTEscapedScopeName(nameList, nameBackrefList); + } + + if (!parsedScopePrefix) + { + if (m_reader.ConsumeIf('0')) + { + MSVC_TRACE("NameConstructor"); + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (nameList.empty()) + throw DemangleException(); + nameList.push_back(nameList[nameList.size()-1]); + return; + } + if (m_reader.ConsumeIf('1')) + { + MSVC_TRACE("NameDestructor"); + classFunctionType = ConstructorNameType; + DemangleName(nameList, dummyFunctionType, nameBackrefList, typeNameContext); + if (nameList.empty()) + throw DemangleException(); + nameList.push_back(MakeNameSegment("~" + nameList[nameList.size()-1].GetString())); + return; + } + if (m_reader.ConsumeIf('B')) + { + MSVC_TRACE("NameReturn"); + classFunctionType = OperatorReturnTypeNameType; + if (m_reader.PeekMatch("?$", 2)) + { + if (m_templateParamDepth > 0) + { + out = DemangleTemplateInstantiationNameInLocalContext(nameBackrefList); + } + else + { + out = DemangleTemplateInstantiationName(nameBackrefList); + DemangleTemplateParams(params, nameBackrefList, out); + } + } + else + { + DemangleNameTypeString(outText); + out = MakeNameSegment(outText); + nameBackrefList.PushNameBackref(out); + } + PrependNameComponent(nameList, out); + } + else + { + MSVC_TRACE("Demangle Lookup"); + outText.clear(); + DemangleTypeNameLookup(outText, functionType); + out = MakeNameSegment(outText); + classFunctionType = functionType; + PrependNameComponent(nameList, out); + // Check if this is a scope specifier. Scope specifiers are ? + // followed by either @?? or directly ?? (for digit scopes like ?3??func@...) + // When nameList has prior components, the operator name is actually a scope index + // Also handle dynamic init/dtor wrapping ??@ (MD5 hash) + if (m_reader.ConsumeIf("??@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + PrependNameComponent(nameList, MakeNameSegment("??@" + hash + "@")); + // Consume the trailing @ (name terminator) — the ??@hash@ pattern + // is followed by @@ (end of scoped name) before the function type + if (m_reader.Length() > 0) + m_reader.ConsumeIf('@'); + } + } + } } - nameList.insert(nameList.begin(), out); - break; - default: - throw DemangleException(); } - if (nameList.StringSize() > MAX_DEMANGLE_LENGTH) - throw DemangleException(); - if (reader.Peek() == '@') + else + { + AppendStringName(nameList, nameBackrefList); + } + if (m_reader.ConsumeIf('@')) { - reader.Consume(); + FinalizeConstructorTemplateName(nameList, nameListSizeAtEntry, pendingConstructorTemplateName); return; } } } -Ref Demangle::GetCallingConventionForType(BNCallingConventionName ccName) -{ - string name; - switch (ccName) - { - case NoCallingConvention: name = ""; break; - case CdeclCallingConvention: name = "cdecl"; break; - case PascalCallingConvention: name = "pascal"; break; - case ThisCallCallingConvention: name = "thiscall"; break; - case STDCallCallingConvention: name = "stdcall"; break; - case FastcallCallingConvention: name = "fastcall"; break; - case CLRCallCallingConvention: name = "clrcall"; break; - case EabiCallCallingConvention: name = "eabi"; break; - case VectorCallCallingConvention: name = "vectorcall"; break; - case SwiftCallingConvention: name = "swiftcall"; break; - case SwiftAsyncCallingConvention: name = "swiftasync"; break; - default: break; - } - - if (m_platform) - { - for (const auto& cc : m_platform->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - } - for (const auto& cc : m_arch->GetCallingConventions()) - { - if (cc->GetName() == name) - return cc; - } - return nullptr; -} BNCallingConventionName Demangle::DemangleCallingConvention() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - switch (reader.Read()) + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + switch (m_reader.Read()) { case 'A': //Exported function case 'B': return CdeclCallingConvention; @@ -1397,298 +1966,323 @@ BNCallingConventionName Demangle::DemangleCallingConvention() } } -set Demangle::DemanglePointerSuffix() + +void Demangle::ConsumeExtendedModifierPrefix() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - set suffix; - if (reader.Peek() == '@') + while (m_reader.ConsumeIf("$A")) + { + } +} + + +uint8_t Demangle::DemanglePointerSuffix() +{ + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + uint8_t suffix = 0; + if (m_reader.PeekOr() == '@') return suffix; - char elm = reader.Peek(); - for (int i = 0; i < 5; i++, elm = reader.Peek()) + char elm = m_reader.PeekOr(); + for (int i = 0; i < 5; i++, elm = m_reader.PeekOr()) { if (elm == 'E') - suffix.insert(suffix.end(), Ptr64Suffix); + suffix |= (1u << Ptr64Suffix); else if (elm == 'F') - suffix.insert(suffix.end(), UnalignedSuffix); + suffix |= (1u << UnalignedSuffix); else if (elm == 'G') - suffix.insert(suffix.end(), ReferenceSuffix); + suffix |= (1u << ReferenceSuffix); else if (elm == 'H') - suffix.insert(suffix.end(), LvalueSuffix); + suffix |= (1u << LvalueSuffix); else if (elm == 'I') - suffix.insert(suffix.end(), RestrictSuffix); + suffix |= (1u << RestrictSuffix); else break; - reader.Consume(1); + m_reader.Consume(); } return suffix; } void Demangle::DemangleModifiers(bool& _const, bool& _volatile, bool &isMember) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - if (reader.Peek() == '@') - return; - + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + // Always write the out params, even when `@` marks the no-modifiers case. _const = false; _volatile = false; isMember = false; - char elm = reader.Read(); - switch (elm) + if (m_reader.PeekOr() == '@') + return; + + switch (m_reader.Read()) { case 'A': break; - case 'B': _const = true; break; + case 'B': //fall through case 'J': _const = true; break; - case 'C': _volatile = true; break; - case 'G': _volatile = true; break; + case 'C': //fall through + case 'G': //fall through case 'K': _volatile = true; break; - case 'D': _const = true; _volatile = true; break; - case 'H': _const = true; _volatile = true; break; + case 'D': //fall through + case 'H': //fall through case 'L': _const = true; _volatile = true; break; - case '6': break; - case '7': break; - case 'M': break; + case '6': //fall through + case '7': //fall through + case 'M': //fall through case 'N': break; case 'O': _volatile = true; break; case 'P': _volatile = true; _const = true; break; case 'Q': isMember = true; break; - case 'U': break; + case 'U': //fall through case 'Y': break; case 'R': _const = true; isMember = true; break; - case 'V': _const = true; break; + case 'V': //fall through case 'Z': _const = true; break; case 'S': _volatile = true; isMember = true; break; - case 'W': _volatile = true; break; + case 'W': //fall through case '0': _volatile = true; break; case 'T': _const = true; _volatile = true; isMember = true; break; - case 'X': _const = true; _volatile = true; break; + case 'X': //fall through case '1': _const = true; _volatile = true; break; - case '8': break; - case '9': break; + case '8': //fall through + case '9': //fall through case '2': break; case '3': _const = true; break; case '4': _volatile = true; break; case '5': _const = true; _volatile = true; break; case '_': - elm = reader.Read(); - if (elm == 'A' || elm == 'B') + switch (m_reader.Read()) { - //For unhandled "member" and "based" parameters + case 'A': + case 'B': + case 'C': + case 'D': + // Accepted but not currently modeled. break; - } - else if (elm == 'C' || elm == 'D') - { - //For unhandled "member" and "based" parameters - break; - } - else - { + default: throw DemangleException(); } break; default: throw DemangleException(); } - return; } -TypeBuilder Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& nameBackrefList, int funcClass) +bool Demangle::FunctionClassNeedsImplicitThis(int funcClass) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - bool _const = false, _volatile = false, isMember = false; - set suffix; - TypeBuilder returnType; + return funcClass != NoneFunctionClass + && (funcClass & StaticFunctionClass) != StaticFunctionClass + && (funcClass & GlobalFunctionClass) != GlobalFunctionClass; +} + + +void Demangle::AppendThunkAdjustorToName(NameList& nameList, const ThunkAdjustor& adjustor) +{ + switch (adjustor.kind) + { + case ThunkAdjustorKind::Static: + AppendToLastNameSegment(nameList, "`adjustor{" + to_string(adjustor.adjustor) + "}'"); + return; + case ThunkAdjustorKind::Vtordisp: + AppendToLastNameSegment(nameList, "`vtordisp{" + to_string(adjustor.vtorDispOffset) + ", " + + to_string(adjustor.staticOffset) + "}'"); + return; + case ThunkAdjustorKind::Vtordispex: + AppendToLastNameSegment(nameList, "`vtordispex{" + to_string(adjustor.vbptrOffset) + ", " + + to_string(adjustor.vbOffsetOffset) + ", " + to_string(adjustor.vtorDispOffset) + ", " + + to_string(adjustor.staticOffset) + "}'"); + return; + } +} + + +void Demangle::SetImplicitThisParameter(DemangledTypeNode& type, BNNameType classFunctionType, const NameList& enclosingName) +{ + NameList thisName = enclosingName; + if (classFunctionType != OperatorReturnTypeNameType && !thisName.empty()) + thisName.pop_back(); + auto thisNamedType = DemangledTypeNode::NamedType(TypedefNamedTypeClass, std::move(thisName)); + type.SetImplicitThisParameter(DemangledTypeNode::PointerType( + std::move(thisNamedType), false, false, PointerReferenceType)); +} + + +void Demangle::ApplySymbolFunctionContext(DemangledFunction& function, NameList& symbolName, + BNNameType classFunctionType, int funcClass) +{ + if (function.thunkAdjustor) + AppendThunkAdjustorToName(symbolName, *function.thunkAdjustor); + if (FunctionClassNeedsImplicitThis(funcClass)) + SetImplicitThisParameter(function.type, classFunctionType, symbolName); +} + + +Demangle::DemangledFunction Demangle::DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, + BackrefList& nameBackrefList, int funcClass) +{ + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + bool _const = false, _volatile = false; + uint8_t suffix = 0; + DemangledTypeNode returnType; BNCallingConventionName cc; + std::optional thunkAdjustor; - //Demangle adjustor which we don't do anything with for now + // Thunk adjustors are part of the function grammar, but the symbol parser + // owns the name that displays them. if ((funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass) { - int64_t adjustor; - DemangleNumber(adjustor); - m_varName.back() += "`adjustor{" + to_string(adjustor) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Static; + adjustor.adjustor = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } else if ((funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) { if ((funcClass & VirtualThunkExFunctionClass) == VirtualThunkExFunctionClass) { - int64_t vbptrOffset; - int64_t vbOffsetOffset; - int64_t vtorDispOffset; - int64_t staticOffset; - DemangleNumber(vbptrOffset); - DemangleNumber(vbOffsetOffset); - DemangleNumber(vtorDispOffset); - DemangleNumber(staticOffset); - m_varName.back() += "`vtordispex{" + to_string(vbptrOffset) + ", " + to_string(vbOffsetOffset) + ", " + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Vtordispex; + adjustor.vbptrOffset = DecodeEncodedSignedInt32(); + adjustor.vbOffsetOffset = DecodeEncodedSignedInt32(); + adjustor.vtorDispOffset = DecodeEncodedSignedInt32(); + adjustor.staticOffset = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } else { - int64_t vtorDispOffset; - int64_t staticOffset; - DemangleNumber(vtorDispOffset); - DemangleNumber(staticOffset); - m_varName.back() += "`vtordisp{" + to_string(vtorDispOffset) + ", " + to_string(staticOffset) + "}'"; + ThunkAdjustor adjustor {}; + adjustor.kind = ThunkAdjustorKind::Vtordisp; + adjustor.vtorDispOffset = DecodeEncodedSignedInt32(); + adjustor.staticOffset = DecodeEncodedUnsignedNumber(); + thunkAdjustor = adjustor; } } if (pointerSuffix) { + bool isMember = false; suffix = DemanglePointerSuffix(); + ConsumeExtendedModifierPrefix(); DemangleModifiers(_const, _volatile, isMember); } - if (reader.Peek() == '?') - reader.Consume(); + m_reader.ConsumeIf('?'); cc = DemangleCallingConvention(); bool shouldHaveReturnType = true; - if (reader.Peek() == '@') + if (m_reader.ConsumeIf('@')) { //No return type shouldHaveReturnType = false; - reader.Consume(); - m_logger->LogDebug("Function has no return type %s", reader.GetRaw()); + MSVC_TRACE("Function has no return type {}", m_reader.GetRaw()); } else { //Demangle function return type - bool return_const = false, return_volatile = false, isMember = false; - set return_suffix; + bool return_const = false, return_volatile = false; + uint8_t return_suffix = 0; bool hasModifiers = false; //Check for modifiers before return type - if (reader.Peek() == '?') + if (m_reader.ConsumeIf('?')) { - reader.Consume(1); + bool localIsMember = false; return_suffix = DemanglePointerSuffix(); - DemangleModifiers(return_const, return_volatile, isMember); + DemangleModifiers(return_const, return_volatile, localIsMember); hasModifiers = true; } - QualifiedName name; - m_logger->LogDebug("Demangle function return type %s", reader.GetRaw()); - //m_logger->Indent(); - returnType = DemangleVarType(nameBackrefList, true, name); - m_logger->LogDebug("Return type: %s", returnType.GetString().c_str()); - //m_logger->Dedent(); + MSVC_TRACE("Demangle function return type {}", m_reader.GetRaw()); + returnType = DemangleVarType(nameBackrefList, true); + MSVC_TRACE("Return type: {}", returnType.GetString()); + // '...' (varargs) is only legal as the trailing parameter marker, + // never as a return type. Reject so we don't build a bogus type. + if (returnType.GetClass() == VarArgsTypeClass) + throw DemangleException("Varargs ('Z') is not a valid function return type"); if (hasModifiers) { returnType.SetConst(return_const); returnType.SetVolatile(return_volatile); - returnType.SetPointerSuffix(return_suffix); + returnType.SetPointerSuffixBits(return_suffix); } } - if (reader.Peek() == '@') - reader.Consume(); + m_reader.ConsumeIf('@'); - m_logger->LogDebug("\tDemangle Function Parameters %s", reader.GetRaw()); - vector params; - bool needsThisPtr = false; - if (cc == ThisCallCallingConvention) - { - needsThisPtr = true; - } - if (funcClass != NoneFunctionClass) - { - if ((funcClass & VirtualFunctionClass) == VirtualFunctionClass - || (funcClass & StaticThunkFunctionClass) == StaticThunkFunctionClass - || (funcClass & VirtualThunkFunctionClass) == VirtualThunkFunctionClass) - { - needsThisPtr = true; - } - else if ((funcClass & StaticFunctionClass) != StaticFunctionClass - && (funcClass & GlobalFunctionClass) != GlobalFunctionClass) - { - needsThisPtr = true; - } - } - - if (needsThisPtr) - { - // Insert implicit "this" parameter for thiscall - // TODO: Replace this with calling convention / platform callbacks to insert thisptr (ask rss) - QualifiedName thisName = m_varName; - if (thisName.size() > 0) - thisName.erase(thisName.end() - 1); - params.push_back(FunctionParameter("this", Type::PointerType(m_arch, Type::NamedType(thisName, Type::VoidType())), DefaultLocationSource, {})); - } + MSVC_TRACE("\tDemangle Function Parameters {}", m_reader.GetRaw()); + _STD_VECTOR params; - DemangleVariableList(params, m_backrefList); + DemangleVariableList(params, nameBackrefList); + m_reader.ConsumeIf('Z'); - if (params.size() >= 1 && params.back().type->GetClass() == VoidTypeClass) + if (!params.empty() && params.back().type && params.back().type->GetClass() == VoidTypeClass) params.pop_back(); - // TODO: fix calling convention - Ref returnTypeObj; - if (shouldHaveReturnType) - returnTypeObj = returnType.Finalize(); - else - returnTypeObj = Type::VoidType(); - TypeBuilder newType = TypeBuilder::FunctionType(returnTypeObj, nullptr, params); + if (!shouldHaveReturnType) + returnType = DemangledTypeNode::VoidType(); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); newType.SetConst(_const); newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + newType.SetPointerSuffixBits(suffix); newType.SetNameType(classFunctionType); newType.SetCallingConventionName(cc); - auto convention = GetCallingConventionForType(cc); - if (convention) - newType.SetCallingConvention(convention); - m_logger->LogDebug("Successfully Created Function Type!\n"); - return newType; + MSVC_TRACE("Successfully Created Function Type!"); + return {std::move(newType), std::move(thunkAdjustor)}; } -TypeBuilder Demangle::DemangleData() +DemangledTypeNode Demangle::DemangleData(BackrefList& varList) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - QualifiedName name; - //m_logger->Indent(); - TypeBuilder newType = DemangleVarType(m_backrefList, false, name); - //m_logger->Dedent(); + DemangledTypeNode newType = DemangleVarType(varList, false); auto suffix = DemanglePointerSuffix(); DemangleModifiers(_const, _volatile, isMember); - newType.SetConst(_const); - newType.SetVolatile(_volatile); - newType.SetPointerSuffix(suffix); + if (newType.GetClass() == PointerTypeClass) + { + newType.AddPointerSuffixBits(suffix); + newType.AddQualifiersToPointerChild(_const, _volatile); + } + else + { + newType.SetConst(_const); + newType.SetVolatile(_volatile); + newType.SetPointerSuffixBits(suffix); + } return newType; } -TypeBuilder Demangle::DemanagleRTTI(BNNameType nameType) +DemangledTypeNode Demangle::DemangleRTTI(BNNameType nameType, const NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; - if (reader.Length() > 0) + if (m_reader.Length() > 0) DemangleModifiers(_const, _volatile, isMember); - QualifiedName typeName = m_varName; - m_logger->LogDebug("new struct type\n"); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + NameList typeName = symbolName; + MSVC_TRACE("new struct type"); + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); newType.SetNameType(nameType); newType.SetConst(_const); newType.SetVolatile(_volatile); - m_logger->LogDebug("log: %s\n", newType.GetString().c_str()); + MSVC_TRACE("log: {}", newType.GetString()); return newType; } -TypeBuilder Demangle::DemangleVTable() +DemangledTypeNode Demangle::DemangleVTable(BackrefList& nameBackrefList, NameList& symbolName) { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); bool _const = false, _volatile = false, isMember = false; DemangleModifiers(_const, _volatile, isMember); - TypeBuilder newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, m_varName)); - if (reader.Peek() != '@') + DemangledTypeNode newType = DemangledTypeNode::NamedType(StructNamedTypeClass, symbolName); + if (m_reader.PeekOr() != '@') { - QualifiedName typeName; + NameList typeName; BNNameType classFunctionType = NoNameType; - DemangleName(typeName, classFunctionType, m_backrefList); - string suffix = m_varName.back(); - m_varName.back() += "{for `" + typeName.GetString() + "'}"; + DemangleName(typeName, classFunctionType, nameBackrefList, true); + if (symbolName.empty()) + throw DemangleException("VTable name missing suffix"); + DemangledNamePart suffix = symbolName.back(); + AppendToLastNameSegment(symbolName, "{for `" + JoinNameList(typeName) + "'}"); typeName.push_back(suffix); - newType = TypeBuilder::NamedType(NamedTypeReference::GenerateAutoDemangledTypeReference( - StructNamedTypeClass, typeName)); + newType = DemangledTypeNode::NamedType(StructNamedTypeClass, typeName); } newType.SetConst(_const); newType.SetVolatile(_volatile); @@ -1697,164 +2291,456 @@ TypeBuilder Demangle::DemangleVTable() } +// ??__E (dynamic initializer) / ??__F (dynamic atexit destructor). +// +// LLVM dispatches these at the top level via demangleSpecialIntrinsic --> +// demangleInitFiniStub. The mangling wraps another symbol (either a variable +// or a function) and emits a new function stub that initializes/destroys it: +// +// ??__E function form, e.g. ??__Efoo@@YAXXZ +// ??__E?@@ variable form, e.g. ??__E?foo@@3HA@@YAXXZ +// +// LLVM's output places the descriptor (`dynamic initializer for ''`) +// at file scope — not as a member of the target's enclosing class — and +// interpolates the target name inside backticks/quotes. For the variable +// form, it additionally renders the variable's type inside the inner +// backtick pair: `dynamic initializer for `int foo''. +Demangle::DemangleContext Demangle::DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList) +{ + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); + + // /d2FH4 may replace a long wrapped target with an MD5 name (??@@). + // Parse it before the optional '?' marker below; otherwise the first '?' + // of the hash spelling is mistaken for IsKnownStaticDataMember. + NameList innerNameList; + BNNameType innerClassFunctionType = NoNameType; + bool isMD5Name = false; + if (m_reader.ConsumeIf("??@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + innerNameList.push_back(MakeNameSegment("??@" + hash + "@")); + isMD5Name = true; + } + + // Optional leading '?' flags the "known static data member" form. LLVM + // calls this IsKnownStaticDataMember — when present, the mangling is + // required to carry two trailing '@' before the outer function encoding + // rather than one. + bool isKnownStaticDataMember = false; + if (!isMD5Name && m_reader.ConsumeIf('?')) + { + isKnownStaticDataMember = true; + } + + // Parse the inner symbol's qualified name exactly as any other symbol + // would. DemangleName handles locally-scoped pieces, anonymous namespaces, + // templates, etc. so a target like + // instance@?1??Get@Globals@@SAAEAU1@XZ@ + // resolves correctly. + if (!isMD5Name) + DemangleName(innerNameList, innerClassFunctionType, backrefList); + + const char* prefix = isDtor + ? "`dynamic atexit destructor for " + : "`dynamic initializer for "; + BNNameType classFunctionType = isDtor + ? DynamicAtExitDestructorNameType + : DynamicInitializerNameType; + + _STD_STRING descriptor; + + if (m_reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F"); + + char next = m_reader.Peek(); + if (next >= '0' && next <= '4') + { + // Variable form: <@-terminators> + // . We don't attach the storage class to + // anything — it exists only to disambiguate variable-vs-function + // inside the wrapper and to match the mangling grammar. + m_reader.Consume(); // storage class + DemangledTypeNode varType = DemangleData(backrefList); + _STD_STRING varTypeStr = varType.GetString(); + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "`" + varTypeStr + " " + innerJoined + "''"; + + // Consume the @-terminators between the inner variable encoding and + // the outer function encoding. LLVM requires two when the optional + // leading '?' was present, one otherwise. + int atCount = isKnownStaticDataMember ? 2 : 1; + for (int i = 0; i < atCount; i++) + { + if (m_reader.Length() == 0 || m_reader.Read() != '@') + throw DemangleException("Expected '@' terminator in ??__E/??__F variable form"); + } + } + else + { + // Function form: the inner symbol's function encoding follows + // directly. The outer stub reuses that encoding (there's no separate + // outer signature). + if (isKnownStaticDataMember) + throw DemangleException("??__E/??__F with leading '?' but no variable form"); + if (isMD5Name) + { + while (m_reader.ConsumeIf('@')) + { + } + } + _STD_STRING innerJoined = JoinNameList(innerNameList); + descriptor = _STD_STRING(prefix) + "'" + innerJoined + "''"; + } + + // Replace the symbol's qualified name with just the descriptor — this is + // what puts the output at file scope with no enclosing class prefix. + NameList descriptorName = { MakeNameSegment(descriptor) }; + + auto parseOuterFunction = [&](bool pointerSuffix, int funcClass, BNMemberAccess access, BNMemberScope scope) { + DemangledFunction function = DemangleFunction(classFunctionType, pointerSuffix, backrefList, funcClass); + ApplySymbolFunctionContext(function, descriptorName, classFunctionType, funcClass); + return DemangleContext{std::move(descriptorName), std::move(function.type), access, scope}; + }; + + // Parse the outer function encoding. MSVC emits a global cdecl stub + // ('Y'/'Z') in practice but we dispatch through the full table for + // robustness (private/public/static/etc.). + if (m_reader.Length() == 0) + throw DemangleException("Truncated ??__E/??__F outer function encoding"); + switch (char funcType = m_reader.Read()) + { + case 'A': //fall through + case 'B': return parseOuterFunction(true, PrivateFunctionClass, PrivateAccess, NoScope ); + case 'C': //fall through + case 'D': return parseOuterFunction(false, PrivateFunctionClass | StaticFunctionClass, PrivateAccess, StaticScope); + case 'I': //fall through + case 'J': return parseOuterFunction(true, ProtectedFunctionClass, ProtectedAccess, NoScope ); + case 'K': //fall through + case 'L': return parseOuterFunction(false, ProtectedFunctionClass | StaticFunctionClass, ProtectedAccess, StaticScope); + case 'Q': //fall through + case 'R': return parseOuterFunction(true, PublicFunctionClass, PublicAccess, NoScope ); + case 'S': //fall through + case 'T': return parseOuterFunction(false, PublicFunctionClass | StaticFunctionClass, PublicAccess, StaticScope); + case 'Y': //fall through + case 'Z': return parseOuterFunction(false, GlobalFunctionClass, NoAccess, NoScope ); + default: + throw DemangleException(_STD_STRING("Unexpected outer function type '") + funcType + "' in ??__E/??__F"); + } +} + Demangle::DemangleContext Demangle::DemangleSymbol() { - m_logger->LogDebug("%s: '%s'\n", __FUNCTION__, reader.GetRaw()); - //m_logger->Indent(); + return DemangleSymbol(m_backrefList); +} + + +Demangle::DemangleContext Demangle::DemangleSymbol(BackrefList& backrefList) +{ + NestingGuard nestingGuard(*this); + MSVC_TRACE("{}: '{}'", __FUNCTION__, m_reader.GetRaw()); BNNameType classFunctionType = NoNameType; - QualifiedName varName; + NameList varName; - if (reader.Peek() == '.') + if (m_reader.ConsumeIf('.')) { - reader.Consume(); - - return { DemangleTypeInfoName(), NoAccess, NoScope }; + NameList typeInfoName; + DemangledTypeNode type = DemangleTypeInfoName(typeInfoName); + return { std::move(typeInfoName), std::move(type), NoAccess, NoScope }; } - if (reader.Read() != '?') + if (m_reader.Read() != '?') { throw DemangleException(); } - DemangleName(varName, classFunctionType, m_backrefList); - m_logger->LogDebug("Done demangling Name: '%s' - '%s'", varName.GetString().c_str(), reader.GetRaw()); - m_varName = varName; + // MD5-hashed names: ??@<32hex>@ + if (m_reader.ConsumeIf("?@")) + { + _STD_STRING hash = m_reader.ReadUntil('@'); + NameList md5Name = { MakeNameSegment("??@" + hash + "@") }; + return { std::move(md5Name), DemangledTypeNode::VoidType(), NoAccess, NoScope }; + } + + // Special intrinsics dispatched at the top level (matches LLVM's + // demangleSpecialIntrinsic). ??__E/??__F have a non-uniform grammar + // that the normal DemangleName scope-chain loop can't express — the + // bytes after the code are a wrapped inner symbol, not scope prefixes. + if (m_reader.ConsumeIf("?__E")) + return DemangleDynamicInitFini(false, backrefList); + if (m_reader.ConsumeIf("?__F")) + return DemangleDynamicInitFini(true, backrefList); + + DemangleName(varName, classFunctionType, backrefList); + MSVC_TRACE("Done demangling Name: '{}' - '{}'", JoinNameList(varName), m_reader.GetRaw()); DemangleContext context; + auto setContext = [&](DemangledTypeNode type, BNMemberAccess access, BNMemberScope scope) { + context.type = std::move(type); + context.access = access; + context.scope = scope; + }; + auto finishContext = [&]() { + context.name = std::move(varName); + return std::move(context); + }; if (classFunctionType == StringNameType) { - context = { DemangleString(), NoAccess, NoScope }; - return context; - } - - char funcType = reader.Read(); - switch(funcType) - { - case '0': context = {DemangleData(), PrivateAccess, StaticScope }; break; - case '1': context = {DemangleData(), ProtectedAccess, StaticScope }; break; - case '2': context = {DemangleData(), PublicAccess, StaticScope }; break; - case '3': context = {DemangleData(), NoAccess, NoScope }; break; - case '4': context = {DemangleData(), NoAccess, NoScope }; break; - case '5': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '6': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '7': context = {DemangleVTable(), NoAccess, NoScope }; break; - case '8': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case '9': context = {DemanagleRTTI(classFunctionType), NoAccess, NoScope }; break; - case 'A': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'B': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass), PrivateAccess, NoScope }; break; - case 'C': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'D': context = {DemangleFunction(classFunctionType, false, m_backrefList, PrivateFunctionClass | StaticFunctionClass), PrivateAccess, StaticScope }; break; - case 'E': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'F': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | VirtualFunctionClass), PrivateAccess, VirtualScope}; break; - case 'G': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'H': context = {DemangleFunction(classFunctionType, true, m_backrefList, PrivateFunctionClass | StaticThunkFunctionClass), PrivateAccess, ThunkScope }; break; - case 'I': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'J': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass), ProtectedAccess, NoScope }; break; - case 'K': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'L': context = {DemangleFunction(classFunctionType, false, m_backrefList, ProtectedFunctionClass | StaticFunctionClass), ProtectedAccess, StaticScope }; break; - case 'M': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'N': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | VirtualFunctionClass), ProtectedAccess, VirtualScope}; break; - case 'O': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'P': context = {DemangleFunction(classFunctionType, true, m_backrefList, ProtectedFunctionClass | StaticThunkFunctionClass), ProtectedAccess, ThunkScope }; break; - case 'Q': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'R': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass), PublicAccess, NoScope }; break; - case 'S': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'T': context = {DemangleFunction(classFunctionType, false, m_backrefList, PublicFunctionClass | StaticFunctionClass), PublicAccess, StaticScope }; break; - case 'U': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'V': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | VirtualFunctionClass), PublicAccess, VirtualScope}; break; - case 'W': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'X': context = {DemangleFunction(classFunctionType, true, m_backrefList, PublicFunctionClass | StaticThunkFunctionClass), PublicAccess, ThunkScope }; break; - case 'Y': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; - case 'Z': context = {DemangleFunction(classFunctionType, false, m_backrefList, GlobalFunctionClass), NoAccess, NoScope }; break; + setContext(DemangleString(varName), NoAccess, NoScope); + return finishContext(); + } + + // ??__J (local static thread guard) and local-scope ??_B guards are + // variables, not functions. The storage marker is '4' (not visible) or '5' + // (visible). Some local guard names then carry a one-digit local ordinal + // instead of a type encoding, e.g. `...@51` -> `{2}`. + char nextSymbolByte = m_reader.PeekOr(); + if ((classFunctionType == LocalStaticThreadGuardNameType) + || (classFunctionType == LocalStaticGuardNameType && m_reader.Length() >= 2 + && (nextSymbolByte == '4' || nextSymbolByte == '5') + && m_reader.PeekAt(1) >= '0' && m_reader.PeekAt(1) <= '9')) + { + if (m_reader.Length() == 0) + throw DemangleException("Truncated local static guard"); + char next = m_reader.Read(); + if (next != '4' && next != '5') + throw DemangleException("local static guard requires variable storage class ('4' or '5'), got '" + _STD_STRING(1, next) + "'"); + if (char next = m_reader.PeekOr(); next >= '0' && next <= '9') + { + int64_t guardOrdinal = m_reader.Read() - '0' + 1; + AppendToLastNameSegment(varName, "{" + to_string(guardOrdinal) + "}"); + setContext(DemangledTypeNode::IntegerType(4, false), NoAccess, NoScope); + return finishContext(); + } + setContext(DemangleData(backrefList), NoAccess, NoScope); + return finishContext(); + } + + auto setDataContext = [&](BNMemberAccess access, BNMemberScope scope) { + setContext(DemangleData(backrefList), access, scope); + }; + auto setFunctionContext = [&](bool pointerSuffix, int funcClass, BNMemberAccess access, BNMemberScope scope) { + DemangledFunction function = DemangleFunction(classFunctionType, pointerSuffix, backrefList, funcClass); + ApplySymbolFunctionContext(function, varName, classFunctionType, funcClass); + setContext(std::move(function.type), access, scope); + }; + + switch(char funcType = m_reader.Read()) + { + case '0': setDataContext(PrivateAccess, StaticScope); break; + case '1': setDataContext(ProtectedAccess, StaticScope); break; + case '2': setDataContext(PublicAccess, StaticScope); break; + case '3': //fall through + case '4': setDataContext(NoAccess, NoScope ); break; + case '5': //fall through + case '6': //fall through + case '7': + setContext(DemangleVTable(backrefList, varName), NoAccess, NoScope); + break; + case '8': //fall through + case '9': + setContext(DemangleRTTI(classFunctionType, varName), NoAccess, NoScope); + break; + case 'A': //fall through + case 'B': setFunctionContext(true, PrivateFunctionClass, PrivateAccess, NoScope ); break; + case 'C': //fall through + case 'D': setFunctionContext(false, PrivateFunctionClass | StaticFunctionClass, PrivateAccess, StaticScope ); break; + case 'E': //fall through + case 'F': setFunctionContext(true, PrivateFunctionClass | VirtualFunctionClass, PrivateAccess, VirtualScope); break; + case 'G': //fall through + case 'H': setFunctionContext(true, PrivateFunctionClass | StaticThunkFunctionClass, PrivateAccess, ThunkScope ); break; + case 'I': //fall through + case 'J': setFunctionContext(true, ProtectedFunctionClass, ProtectedAccess, NoScope ); break; + case 'K': //fall through + case 'L': setFunctionContext(false, ProtectedFunctionClass | StaticFunctionClass, ProtectedAccess, StaticScope ); break; + case 'M': //fall through + case 'N': setFunctionContext(true, ProtectedFunctionClass | VirtualFunctionClass, ProtectedAccess, VirtualScope); break; + case 'O': //fall through + case 'P': setFunctionContext(true, ProtectedFunctionClass | StaticThunkFunctionClass, ProtectedAccess, ThunkScope ); break; + case 'Q': //fall through + case 'R': setFunctionContext(true, PublicFunctionClass, PublicAccess, NoScope ); break; + case 'S': //fall through + case 'T': setFunctionContext(false, PublicFunctionClass | StaticFunctionClass, PublicAccess, StaticScope ); break; + case 'U': //fall through + case 'V': setFunctionContext(true, PublicFunctionClass | VirtualFunctionClass, PublicAccess, VirtualScope); break; + case 'W': //fall through + case 'X': setFunctionContext(true, PublicFunctionClass | StaticThunkFunctionClass, PublicAccess, ThunkScope ); break; + case 'Y': //fall through + case 'Z': setFunctionContext(false, GlobalFunctionClass, NoAccess, NoScope ); break; case '$': { + if (m_reader.ConsumeIf('B')) + { + // Vcall thunk: $B + uint64_t offset = DecodeEncodedUnsignedNumber(); + if (varName.empty()) + throw DemangleException("Vcall thunk missing name"); + varName.back() = MakeNameSegment("`vcall'{" + to_string(offset) + ", {flat}}'"); + // Consume calling convention char + this-type flag char + if (m_reader.Length() >= 1) + m_reader.Consume(); // calling convention (A=cdecl, etc.) + char next = m_reader.PeekOr(); + if (next != '\0' && next != '@') + m_reader.Consume(); // this-type flag + setContext(DemangledTypeNode::VoidType(), NoAccess, NoScope); + break; + } int funcClass = VirtualThunkFunctionClass; - if (reader.Peek() == 'R') + if (m_reader.ConsumeIf('R')) { - reader.Consume(); funcClass |= VirtualThunkExFunctionClass; } - char thunkType = reader.Read(); - switch (thunkType) + switch (char thunkType = m_reader.Read()) { - case '0': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '1': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PrivateFunctionClass), PrivateAccess, ThunkScope}; break; - case '2': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '3': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | ProtectedFunctionClass), ProtectedAccess, ThunkScope}; break; - case '4': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - case '5': context = {DemangleFunction(classFunctionType, true, m_backrefList, funcClass | VirtualFunctionClass | PublicFunctionClass), PublicAccess, ThunkScope}; break; - default: throw DemangleException("Unknown virtual thunk type " + string(1, thunkType)); + case '0': //fall through + case '1': setFunctionContext(true, funcClass | VirtualFunctionClass | PrivateFunctionClass, PrivateAccess, ThunkScope); break; + case '2': //fall through + case '3': setFunctionContext(true, funcClass | VirtualFunctionClass | ProtectedFunctionClass, ProtectedAccess, ThunkScope); break; + case '4': //fall through + case '5': setFunctionContext(true, funcClass | VirtualFunctionClass | PublicFunctionClass, PublicAccess, ThunkScope); break; + default: throw DemangleException("Unknown virtual thunk type " + _STD_STRING(1, thunkType)); } break; } - default: throw DemangleException("Unknown function type " + string(1, funcType)); + default: throw DemangleException("Unknown function type " + _STD_STRING(1, funcType)); } - return context; + return finishContext(); } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName, const Ref& view) +std::pair, QualifiedName> Demangle::Finalize(BinaryView* view) { - outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) - return false; - return DemangleMS(arch, mangledName, outType, outVarName); + DemangleContext context = DemangleSymbol(); + if (m_reader.Length() != 0) + LogDebugF("Demangling Succeeded with trailing characters '{}' in '{}'", m_reader.GetRaw(), m_mangledName); + + Ref platform = m_platform; + if (!platform && view) + platform = view->GetDefaultPlatform(); + + Architecture* arch = m_arch; +#ifdef BINARYNINJACORE_LIBRARY + if (!arch && platform) + arch = platform->GetArchitecture(); + if (!arch && view) + arch = view->GetDefaultArchitecture(); +#else + Ref viewArch; + Ref platformArch; + if (!arch && platform) + { + platformArch = platform->GetArchitecture(); + arch = platformArch.GetPtr(); + } + if (!arch && view) + { + viewArch = view->GetDefaultArchitecture(); + arch = viewArch.GetPtr(); + } +#endif + if (!arch) + throw DemangleException(); + + if (!platform) + platform = arch->GetStandalonePlatform(); + + return {context.type.Finalize(platform.GetPtr()), QualifiedName(FinalizeNameList(context.name))}; } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName, BinaryView* view) +std::pair, QualifiedName> Demangle::Finalize() { - outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) - return false; - return DemangleMS(arch, mangledName, outType, outVarName); + return Finalize(m_view.GetPtr()); } -bool Demangle::DemangleMS(Architecture* arch, const string& mangledName, Ref& outType, - QualifiedName& outVarName) +template +static bool DemangleMSImpl(const _STD_STRING& mangledName, Ref& outType, QualifiedName& outVarName, + DemangleBody&& demangleBody) { outType = nullptr; if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) return false; + try { - Demangle demangle(arch, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); - + auto result = demangleBody(); + outType = std::move(result.first); + outVarName = std::move(result.second); + return true; } - catch (DemangleException &e) + catch (DemangleException& e) { - LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); + LogDebugF("Demangling Failed '{}' '{}'", mangledName, e.what()); + return false; + } + catch (std::exception& e) + { + LogDebugF("Demangling Failed '{}' '{}'", mangledName, e.what()); return false; } - return true; } - -bool Demangle::DemangleMS(const string& mangledName, Ref& outType, +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, QualifiedName& outVarName, const Ref& view) +{ + if (view) + { + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + Demangle demangle(arch, mangledName); + return demangle.Finalize(view.GetPtr()); + }); + } + return DemangleMS(arch, mangledName, outType, outVarName); +} + +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + if (view) + return DemangleMS(arch, mangledName, outType, outVarName, Ref(view)); + return DemangleMS(arch, mangledName, outType, outVarName); +} + +bool Demangle::DemangleMS(Platform* platform, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName) { outType = nullptr; - if (mangledName.empty() || (mangledName[0] != '?' && mangledName[0] != '.')) + if (!platform) return false; - try - { + + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + Demangle demangle(Ref(platform), mangledName); + return demangle.Finalize(); + }); +} + +bool Demangle::DemangleMS(Architecture* arch, const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName) +{ + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + thread_local Demangle demangle(arch, mangledName); + demangle.Reset(arch, mangledName); + return demangle.Finalize(); + }); +} + + +bool Demangle::DemangleMS(const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, const Ref& view) +{ + return DemangleMSImpl(mangledName, outType, outVarName, [&]() { + // Can't use thread_local here — BinaryView overload needs platform/view state Demangle demangle(view, mangledName); - // For now we're throwing away MemberScope and MemberAccess - outType = demangle.DemangleSymbol().type.Finalize(); - outVarName = demangle.GetVarName(); + return demangle.Finalize(); + }); +} - } - catch (DemangleException &e) - { - LogDebugForException(e, "Demangling Failed '%s' '%s;", mangledName.c_str(), e.what()); +bool Demangle::DemangleMS(const _STD_STRING& mangledName, Ref& outType, + QualifiedName& outVarName, BinaryView* view) +{ + outType = nullptr; + if (!view) return false; - } - return true; + return DemangleMS(mangledName, outType, outVarName, Ref(view)); } @@ -1864,18 +2750,18 @@ class MSDemangler: public Demangler MSDemangler(): Demangler("MS") { } - ~MSDemangler() override {} + ~MSDemangler() override = default; - virtual bool IsMangledString(const string& name) override + bool IsMangledString(const _STD_STRING& name) override { - return name[0] == '?'; + return !name.empty() && (name[0] == '?' || name[0] == '.'); } #ifdef BINARYNINJACORE_LIBRARY - virtual bool Demangle(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName, + bool Demangle(Architecture* arch, const _STD_STRING& name, Ref& outType, QualifiedName& outVarName, BinaryView* view) override #else - virtual bool Demangle(Ref arch, const string& name, Ref& outType, QualifiedName& outVarName, + virtual bool Demangle(Ref arch, const _STD_STRING& name, Ref& outType, QualifiedName& outVarName, Ref view) override #endif { @@ -1899,7 +2785,7 @@ extern "C" BINARYNINJAPLUGIN bool CorePluginInit() #endif { - static MSDemangler* demangler = new MSDemangler(); + static auto demangler = new MSDemangler(); Demangler::Register(demangler); return true; } diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index c2eeb79f61..168aa6de73 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -13,8 +13,9 @@ // limitations under the License. #pragma once -#include #include +#include +#include // XXX: Compiled directly into the core for performance reasons // Will still work fine compiled independently, just at about a @@ -25,47 +26,33 @@ #include "architecture.h" #include "binaryview.h" #include "demangle.h" -#include "unicode.h" #define BN BinaryNinjaCore #define _STD_STRING BinaryNinjaCore::string #define _STD_VECTOR BinaryNinjaCore::vector -#define _STD_SET BinaryNinjaCore::set #else #include "binaryninjaapi.h" #define BN BinaryNinja #define _STD_STRING std::string #define _STD_VECTOR std::vector -#define _STD_SET std::set +#endif + +#ifdef BINARYNINJACORE_LIBRARY +#include "demangler/gnu3/demangled_type_node.h" +#else +#include "../gnu3/demangled_type_node.h" #endif class DemangleException: public std::exception { _STD_STRING m_message; public: - DemangleException(_STD_STRING msg="Attempt to read beyond bounds or missing expected character"): m_message(msg){} - virtual const char* what() const noexcept { return m_message.c_str(); } + DemangleException(_STD_STRING msg="Attempt to read beyond bounds or missing expected character"): m_message(std::move(msg)){} + [[nodiscard]] const char* what() const noexcept override { return m_message.c_str(); } }; class Demangle { - enum NameType - { - NameEmpty, - NameString, - NameLookup, - NameBackref, - NameTemplate, - NameConstructor, - NameDestructor, - NameRtti, - NameReturn, - NameDynamicInitializer, - NameDynamicAtExitDestructor, - NameLocalStaticThreadGuard, - NameLocalVftable - }; - enum FunctionClass { NoneFunctionClass = 0, @@ -81,93 +68,302 @@ class Demangle VirtualThunkExFunctionClass = 1 << 9, }; +public: + struct DemangleContext + { + DemangledQualifiedName name; + DemangledTypeNode type; + BNMemberAccess access; + BNMemberScope scope; + }; + +private: class Reader { public: - Reader(_STD_STRING data); - _STD_STRING PeekString(size_t count=1); - char Peek(); - const char* GetRaw(); - char Read(); - _STD_STRING ReadString(size_t count=1); - _STD_STRING ReadUntil(char sentinal); - void Consume(size_t count=1); - size_t Length(); + Reader(const _STD_STRING& data) + { + Reset(data); + } + void Reset(const _STD_STRING& data) + { + m_ptr = data.c_str(); + m_end = data.c_str() + data.size(); + ValidatePrintableAscii(); + } + bool PeekMatch(const char* str, size_t len) const + { + if (len > Length()) + return false; + return memcmp(m_ptr, str, len) == 0; + } + [[nodiscard]] char PeekAt(size_t offset) const + { + if (offset >= Length()) + throw DemangleException(); + return m_ptr[offset]; + } + [[nodiscard]] char Peek() const + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr; + } + [[nodiscard]] char PeekOr(char fallback = '\0') const + { + if (Length() == 0) + return fallback; + return *m_ptr; + } + [[nodiscard]] const char* GetRaw() const { return m_ptr; } + void SetRaw(const char* p) { m_ptr = p; } + [[nodiscard]] char Read() + { + if (m_ptr >= m_end) + throw DemangleException(); + return *m_ptr++; + } + bool ConsumeIf(char ch) + { + if (PeekOr() != ch) + return false; + Consume(); + return true; + } + bool ConsumeIf(const char* str, size_t len) + { + if (!PeekMatch(str, len)) + return false; + Consume(len); + return true; + } + template + bool ConsumeIf(const char (&str)[N]) + { + return ConsumeIf(str, N - 1); + } + void Consume(size_t count = 1) + { + if (count > Length()) + throw DemangleException(); + m_ptr += count; + } + [[nodiscard]] size_t Length() const { return static_cast(m_end - m_ptr); } + _STD_STRING ReadString(size_t count); + _STD_STRING ReadUntil(char sentinel); private: - _STD_STRING m_data; + void ValidatePrintableAscii() const + { + for (const char* p = m_ptr; p < m_end; p++) + if (*p < 0x20 || *p > 0x7e) + throw DemangleException(); + } + const char* m_ptr; + const char* m_end; }; class BackrefList { public: - _STD_VECTOR typeList; - _STD_VECTOR<_STD_STRING> nameList; - const BN::TypeBuilder& GetTypeBackref(size_t reference); - _STD_STRING GetStringBackref(size_t reference); - void PushTypeBackref(BN::TypeBuilder t); - void PushStringBackref(_STD_STRING& s); - void PushFrontStringBackref(_STD_STRING& s); + _STD_VECTOR typeList; + _STD_VECTOR nameList; + _STD_VECTOR templateList; + void Clear() { typeList.clear(); nameList.clear(); templateList.clear(); } + DemangledTypeNode::NodeRef GetTypeBackrefRef(size_t reference); + DemangledNamePart::Ref GetNameBackrefRef(size_t reference); + const DemangledTypeNode& GetTypeBackref(size_t reference); + const DemangledNamePart& GetNameBackref(size_t reference); + DemangledTypeNode::NodeRef PushTypeBackref(DemangledTypeNode::NodeRef t); + DemangledTypeNode::NodeRef PushTypeBackref(const DemangledTypeNode& t); + DemangledTypeNode::NodeRef PushTypeBackref(DemangledTypeNode&& t); + DemangledNamePart::Ref PushNameBackref(DemangledNamePart::Ref t); + DemangledNamePart::Ref PushNameBackref(const DemangledNamePart& t); + DemangledNamePart::Ref PushNameBackref(DemangledNamePart&& t); + DemangledNamePart::Ref PushTemplateSpecialization(DemangledNamePart::Ref t); + DemangledNamePart::Ref PushTemplateSpecialization(const DemangledNamePart& t); + DemangledNamePart::Ref PushTemplateSpecialization(DemangledNamePart&& t); }; - Reader reader; + struct BackrefContextSwitch + { + BackrefList& active; + BackrefList saved; + + BackrefContextSwitch(BackrefList& active); + BackrefContextSwitch(const BackrefContextSwitch&) = delete; + BackrefContextSwitch& operator=(const BackrefContextSwitch&) = delete; + ~BackrefContextSwitch(); + + static void Swap(BackrefList& left, BackrefList& right); + }; + + // Internal name list type - keeps template names structured during parsing. + using NameList = _STD_VECTOR; + + static DemangledNamePart MakeNameSegment(const _STD_STRING& s) + { + return DemangledNamePart(s); + } + + static void AppendToLastNameSegment(NameList& nl, const _STD_STRING& suffix) + { + if (nl.empty()) + throw DemangleException(); + nl.back() = MakeNameSegment(nl.back().GetString() + suffix); + } + + static _STD_STRING JoinNameList(const NameList& nl) + { + if (nl.empty()) return {}; + if (nl.size() == 1) return nl[0].GetString(); + + size_t size = 2 * (nl.size() - 1); + for (const auto& name : nl) + size += name.GetString().size(); + + _STD_STRING out; + out.reserve(size); + out = nl[0].GetString(); + for (size_t i = 1; i < nl.size(); i++) + { + out += ':'; + out += ':'; + out += nl[i].GetString(); + } + return out; + } + + static StringList FinalizeNameList(const NameList& nl) + { + StringList out; + out.reserve(nl.size()); + for (const auto& n: nl) + out.push_back(n.GetString()); + return out; + } + + _STD_STRING m_mangledName; // Owns the string; Reader points into it + Reader m_reader; BackrefList m_backrefList; BN::Architecture* m_arch; BN::Ref m_platform; BN::Ref m_view; - BN::QualifiedName m_varName; - BN::Ref m_logger; - - NameType GetNameType(); - BN::TypeBuilder DemangleVarType(BackrefList& varList, bool isReturn, BN::QualifiedName& name); - void DemangleNumber(int64_t& num); - void DemangleChar(char& ch); - void DemangleWideChar(uint16_t& wch); + size_t m_templateParamDepth = 0; + size_t m_nestingDepth = 0; + class NestingGuard + { + Demangle& m_demangler; + public: + NestingGuard(Demangle& demangler); + ~NestingGuard(); + }; + + static void RewriteTemplateBackrefName(NameList& typeName, const BackrefList& nameBackrefList); + static void PrependNameComponent(NameList& nameList, DemangledNamePart name); + void AppendStringName(NameList& nameList, BackrefList& nameBackrefList); + static void FinalizeConstructorTemplateName(NameList& nameList, size_t nameListSizeAtEntry, bool pending); + static bool FunctionTypeHasPointerSuffix(char functionType); + static _STD_STRING FormatFunctionScopeSignature(const DemangledTypeNode& type, const NameList& scopeName); + void AppendLocalScope(NameList& nameList, BackrefList& nameBackrefList, uint64_t scopeOrdinal, bool typeNameContext); + bool TryAppendLocalScopeAt(NameList& nameList, BackrefList& nameBackrefList, const char* encodedNumberStart, + bool typeNameContext); + _STD_STRING FormatTypeAndName(const DemangledTypeNode& type, const NameList& name) const; + enum class TypeBackrefMode + { + RecordTopLevel, + SuppressTopLevel, + }; + struct EncodedNumber + { + uint64_t magnitude; + bool negative; + }; + enum class ThunkAdjustorKind + { + Static, + Vtordisp, + Vtordispex, + }; + struct ThunkAdjustor + { + ThunkAdjustorKind kind = ThunkAdjustorKind::Static; + uint64_t adjustor = 0; + int32_t vbptrOffset = 0; + int32_t vbOffsetOffset = 0; + int32_t vtorDispOffset = 0; + uint64_t staticOffset = 0; + }; + struct DemangledFunction + { + DemangledTypeNode type; + std::optional thunkAdjustor; + }; + static bool FunctionClassNeedsImplicitThis(int funcClass); + static void AppendThunkAdjustorToName(NameList& nameList, const ThunkAdjustor& adjustor); + static void SetImplicitThisParameter(DemangledTypeNode& type, BNNameType classFunctionType, const NameList& enclosingName); + static void ApplySymbolFunctionContext(DemangledFunction& function, NameList& symbolName, + BNNameType classFunctionType, int funcClass); + DemangledTypeNode DemangleReferencedSymbolValue(BackrefList& varList); + DemangledTypeNode DemangleAutoNonTypeTemplateParam(BackrefList& varList); + DemangledTypeNode DemangleVarType(BackrefList& varList, bool isReturn, + bool includeImplicitThis = true, DemangledTypeNode::NodeRef* outTypeBackref = nullptr, + TypeBackrefMode typeBackrefMode = TypeBackrefMode::RecordTopLevel); + EncodedNumber DecodeEncodedNumber(); + int64_t DecodeEncodedSignedNumber(); + uint64_t DecodeEncodedUnsignedNumber(); + int32_t DecodeEncodedSignedInt32(); + _STD_STRING DecodeEncodedNumberLiteral(); + char DemangleChar(); void DemangleModifiers(bool& _const, bool& _volatile, bool& isMember); - _STD_SET DemanglePointerSuffix(); - void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList); - void DemangleNameTypeRtti(BNNameType& classFunctionType, - BackrefList& nameBackrefList, - _STD_STRING& out, - _STD_STRING& rttiTypeName); + uint8_t DemanglePointerSuffix(); + void DemangleVariableList(_STD_VECTOR& paramList, BackrefList& varList, bool typeBackrefs = true); void DemangleTypeNameLookup(_STD_STRING& out, BNNameType& functionType); + bool TryDemangleWinRTEscapedScopeName(NameList& nameList, BackrefList& nameBackrefList); void DemangleNameTypeString(_STD_STRING& out); - void DemangleNameTypeBackref(_STD_STRING& out, const _STD_VECTOR<_STD_STRING>& backrefList); - void DemangleName(BN::QualifiedName& nameList, + void DemangleName(NameList& nameList, BNNameType& classFunctionType, - BackrefList& nameBackrefList); - BN::Ref GetCallingConventionForType(BNCallingConventionName ccName); + BackrefList& nameBackrefList, + bool typeNameContext = false); BNCallingConventionName DemangleCallingConvention(); - BN::TypeBuilder DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, int funcClass = NoneFunctionClass); - BN::TypeBuilder DemangleData(); + void ConsumeExtendedModifierPrefix(); + DemangledFunction DemangleFunction(BNNameType classFunctionType, bool pointerSuffix, BackrefList& varList, + int funcClass = NoneFunctionClass); + DemangledTypeNode DemangleData(BackrefList& varList); void DemangleNameTypeRtti(BNNameType& classFunctionType, BackrefList& nameBackrefList, _STD_STRING& out); - BN::TypeBuilder DemangleVTable(); - BN::TypeBuilder DemanagleRTTI(BNNameType classFunctionType); - _STD_STRING DemangleTemplateInstantiationName(BackrefList& nameBackrefList); - _STD_STRING DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, _STD_STRING& out); - _STD_STRING DemangleUnqualifiedSymbolName(BN::QualifiedName& nameList, BackrefList& nameBackrefList, BNNameType& classFunctionType); - BN::TypeBuilder DemangleString(); - BN::TypeBuilder DemangleTypeInfoName(); + DemangledTypeNode DemangleVTable(BackrefList& nameBackrefList, NameList& symbolName); + DemangledTypeNode DemangleRTTI(BNNameType classFunctionType, const NameList& symbolName); + DemangledNamePart DemangleTemplateInstantiationNameInLocalContext(BackrefList& nameBackrefList); + DemangledNamePart DemangleTemplateInstantiationName(BackrefList& nameBackrefList); + void DemangleTemplateParams(_STD_VECTOR& params, BackrefList& nameBackrefList, DemangledNamePart& out); + DemangledNamePart DemangleUnqualifiedSymbolName(BackrefList& nameBackrefList, BNNameType& classFunctionType, + bool& backrefEligible); + DemangledTypeNode DemangleString(NameList& symbolName); + DemangledTypeNode DemangleTypeInfoName(NameList& symbolName); + DemangleContext DemangleDynamicInitFini(bool isDtor, BackrefList& backrefList); + DemangleContext DemangleSymbol(BackrefList& backrefList); + std::pair, BN::QualifiedName> Finalize(BN::BinaryView* view); public: - struct DemangleContext - { - BN::TypeBuilder type; - BNMemberAccess access; - BNMemberScope scope; - }; - Demangle(BN::Architecture* arch, _STD_STRING mangledName); - Demangle(BN::Ref view, _STD_STRING mangledName); - Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(BN::Architecture* arch, _STD_STRING mangledName); + Demangle(BN::Ref view, _STD_STRING mangledName); + Demangle(BN::Ref platform, _STD_STRING mangledName); + Demangle(const Demangle&) = delete; + Demangle(Demangle&&) = delete; + Demangle& operator=(const Demangle&) = delete; + Demangle& operator=(Demangle&&) = delete; + void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); DemangleContext DemangleSymbol(); - BN::QualifiedName GetVarName() const { return m_varName; } + std::pair, BN::QualifiedName> Finalize(); // Be careful not to accidentally implicitly cast a BinaryView* to a bool static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, const BN::Ref& view); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); + static bool DemangleMS(BN::Platform* platform, const _STD_STRING& mangledName, BN::Ref& outType, + BN::QualifiedName& outVarName); static bool DemangleMS(BN::Architecture* arch, const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName); @@ -176,4 +372,3 @@ class Demangle static bool DemangleMS(const _STD_STRING& mangledName, BN::Ref& outType, BN::QualifiedName& outVarName, BN::BinaryView* view); }; - diff --git a/plugins/pdb-ng/src/symbol_parser.rs b/plugins/pdb-ng/src/symbol_parser.rs index 125d7c88f7..c15566deab 100644 --- a/plugins/pdb-ng/src/symbol_parser.rs +++ b/plugins/pdb-ng/src/symbol_parser.rs @@ -39,7 +39,7 @@ use crate::PDBParserInstance; use binaryninja::architecture::{Architecture, ArchitectureExt, Register, RegisterId}; use binaryninja::binary_view::BinaryViewBase; use binaryninja::confidence::{Conf, MAX_CONFIDENCE, MIN_CONFIDENCE}; -use binaryninja::demangle::demangle_ms; +use binaryninja::demangle::demangle_ms_with_view; use binaryninja::rc::Ref; use binaryninja::types::{FunctionParameter, QualifiedName, StructureBuilder, Type, TypeClass}; use binaryninja::variable::{Variable, VariableSourceType}; @@ -1813,7 +1813,7 @@ impl<'a, S: Source<'a> + 'a> PDBParserInstance<'a, S> { raw_name: &String, rva: Rva, ) -> Result<(Option>>, Option)> { - let (mut t, mut name) = match demangle_ms(&self.arch, raw_name, true) { + let (mut t, mut name) = match demangle_ms_with_view(&self.arch, raw_name, Some(self.bv)) { Some((name, Some(t))) => (Some(Conf::new(t, DEMANGLE_CONFIDENCE)), name), Some((name, _)) => (None, name), _ => (None, QualifiedName::new(vec![raw_name.clone()])), diff --git a/plugins/rtti/rtti.cpp b/plugins/rtti/rtti.cpp index fde5a6ab43..f713aef77d 100644 --- a/plugins/rtti/rtti.cpp +++ b/plugins/rtti/rtti.cpp @@ -3,6 +3,20 @@ using namespace BinaryNinja; using namespace BinaryNinja::RTTI; +namespace +{ + std::string NormalizeRTTIClassName(std::string name) + { + size_t beginFind = name.find_first_of(' '); + if (beginFind != std::string::npos) + name.erase(0, beginFind + 1); + size_t endFind = name.find(" `RTTI Type Descriptor Name'"); + if (endFind != std::string::npos) + name.erase(endFind, name.length()); + return name; + } +} + Ref RTTI::GetRealSymbol(BinaryView *view, uint64_t relocAddr, uint64_t symAddr) { @@ -24,9 +38,9 @@ std::optional RTTI::DemangleNameMS(BinaryView* view, bool allowMang { QualifiedName demangledName = {}; Ref outType = {}; - if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, true)) + if (!DemangleMS(view->GetDefaultArchitecture(), mangledName, outType, demangledName, view)) return DemangleNameLLVM(allowMangled, mangledName); - return demangledName.GetString(); + return NormalizeRTTIClassName(demangledName.GetString()); } @@ -90,14 +104,7 @@ std::optional RTTI::DemangleNameLLVM(bool allowMangled, const std:: Ref outType = {}; if (!DemangleLLVM(mangledName, demangledName, true)) return allowMangled ? std::optional(mangledName) : std::nullopt; - auto demangledNameStr = demangledName.GetString(); - size_t beginFind = demangledNameStr.find_first_of(' '); - if (beginFind != std::string::npos) - demangledNameStr.erase(0, beginFind + 1); - size_t endFind = demangledNameStr.find(" `RTTI Type Descriptor Name'"); - if (endFind != std::string::npos) - demangledNameStr.erase(endFind, demangledNameStr.length()); - return demangledNameStr; + return NormalizeRTTIClassName(demangledName.GetString()); } diff --git a/rust/src/demangle.rs b/rust/src/demangle.rs index 1f9f8941cc..d6aad25cee 100644 --- a/rust/src/demangle.rs +++ b/rust/src/demangle.rs @@ -165,6 +165,46 @@ pub fn demangle_ms( } } +pub fn demangle_ms_with_view( + arch: &CoreArchitecture, + mangled_name: &str, + view: Option<&BinaryView>, +) -> Option<(QualifiedName, Option>)> { + let mangled_name = mangled_name.to_cstr(); + let mut out_type: *mut BNType = std::ptr::null_mut(); + let mut out_name: *mut *mut std::os::raw::c_char = std::ptr::null_mut(); + let mut out_size: usize = 0; + let res = unsafe { + BNDemangleMSWithOptions( + arch.handle, + mangled_name.as_ptr(), + &mut out_type, + &mut out_name, + &mut out_size, + view.map(|v| v.handle).unwrap_or(std::ptr::null_mut()), + ) + }; + + match res { + true => { + assert!(!out_name.is_null()); + let names: Vec<_> = unsafe { ArrayGuard::::new(out_name, out_size, ()) } + .iter() + .map(str::to_string) + .collect(); + unsafe { BNFreeDemangledName(&mut out_name, out_size) }; + + let out_type = match out_type.is_null() { + true => None, + false => Some(unsafe { Type::ref_from_raw(out_type) }), + }; + + Some((names.into(), out_type)) + } + false => None, + } +} + #[derive(PartialEq, Eq, Hash)] pub struct Demangler { pub(crate) handle: *mut BNDemangler, diff --git a/view/pe/coffview.cpp b/view/pe/coffview.cpp index e009b91db3..a36aff64a3 100644 --- a/view/pe/coffview.cpp +++ b/view/pe/coffview.cpp @@ -1531,7 +1531,7 @@ void COFFView::AddCOFFSymbol(BNSymbolType type, const string& dll, const string& { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; diff --git a/view/pe/peview.cpp b/view/pe/peview.cpp index a793eeb3a3..6af6283eaf 100644 --- a/view/pe/peview.cpp +++ b/view/pe/peview.cpp @@ -3567,7 +3567,7 @@ void PEView::AddPESymbol(BNSymbolType type, const string& dll, const string& nam { QualifiedName demangledName; Ref demangledType; - if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, nullptr, m_simplifyTemplates)) + if (DemangleGeneric(m_arch, rawName, demangledType, demangledName, this, m_simplifyTemplates)) { shortName = demangledName.GetString(); fullName = shortName; From 3c09292c93a572bf5b14b580e8e8793afccc1f4b Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Fri, 5 Jun 2026 21:42:54 -0400 Subject: [PATCH 2/4] Fix GNU3 template and backref handling Use shared demangler type nodes for substitutions and nested names. This preserves structure for template arguments, expression arguments, argument packs, and lambda auto parameters instead of relying on stale formatted strings. --- demangler/gnu3/demangle_gnu3.cpp | 1104 ++++++++++++++++++------------ demangler/gnu3/demangle_gnu3.h | 71 +- 2 files changed, 733 insertions(+), 442 deletions(-) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index f2130a4e1a..2c0ef8d8ac 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -23,14 +23,23 @@ #ifdef BINARYNINJACORE_LIBRARY using namespace BinaryNinjaCore; -#define GetClass GetTypeClass #else using namespace BinaryNinja; using namespace std; #endif -#define MAX_DEMANGLE_LENGTH 262144 +static constexpr size_t MAX_DEMANGLE_NESTING_DEPTH = 1024; + +static BNTypeClass GetFinalizedTypeClass(const Ref& type) +{ +#ifdef BINARYNINJACORE_LIBRARY + return type->GetTypeClass(); +#else + return type->GetClass(); +#endif +} + #define hash(x,y) (64 * x + y) #undef GNUDEMANGLE_DEBUG @@ -52,13 +61,7 @@ void MyLogDebug(const char* fmt, ...) #define MyLogDebug(...) do {} while(0) #endif -static inline void rtrim(string &s) -{ - s.erase(find_if(s.rbegin(), s.rend(), [](int c) { return !isspace(c); }).base(), s.end()); -} - - -static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) +static size_t TotalStringSize(const StringList& v) { size_t n = 0; for (const auto& s : v) @@ -67,32 +70,66 @@ static size_t TotalStringSize(const _STD_VECTOR<_STD_STRING>& v) } -static string GetTemplateString(const vector& args) +static string JoinNameSegments(const StringList& name) { - // Pre-calculate total length to avoid reallocations - size_t total = 2; // "<" + ">" - for (size_t i = 0; i < args.size(); i++) + if (name.empty()) + return {}; + if (name.size() == 1) + return name[0]; + + string out; + out.reserve(TotalStringSize(name) + (name.size() - 1) * 2); + out += name[0]; + for (size_t i = 1; i < name.size(); i++) { - if (i != 0) - total += 2; // ", " - total += args[i].size(); + out += "::"; + out += name[i]; } - total += 1; // possible " " before ">" + return out; +} + - string name; - name.reserve(total); - name += '<'; - for (size_t i = 0; i < args.size(); i++) +static bool TemplateArgsReferenceTemplateParam(const string& raw) +{ + if (raw.empty() || (raw[0] != 'I' && raw[0] != 'J')) + return false; + + size_t i = 0; + size_t depth = 0; + while (i < raw.size()) { - if (i != 0) - name += ", "; - name += args[i]; + char c = raw[i++]; + if (c == 'I' || c == 'J') + { + depth++; + continue; + } + if (c == 'E') + { + if (depth == 0) + return false; + depth--; + if (depth == 0) + return false; + continue; + } + if (c == 'T') + return true; + if (c >= '0' && c <= '9') + { + size_t len = c - '0'; + while (i < raw.size() && raw[i] >= '0' && raw[i] <= '9') + len = (len * 10) + (raw[i++] - '0'); + i = std::min(raw.size(), i + len); + } } - rtrim(name); - if (name.back() == '>') - name += " "; //Be c++03 compliant where we can - name += '>'; - return name; + return false; +} + + +static DemangledNamePart NameSegmentWithTemplateArgs(const string& name, vector args) +{ + return DemangledNamePart(name, std::move(args), true); } @@ -110,6 +147,7 @@ static string GetOperator(char elm1, char elm2) case hash('s','z'): return "sizeof"; case hash('a','t'): return "alignof"; case hash('a','z'): return "alignof"; + case hash('a','w'): return "co_await"; case hash('n','x'): return "noexcept"; case hash('s','Z'): return "sizeof..."; case hash('s','P'): return "sizeof..."; @@ -316,27 +354,49 @@ string DemangleGNU3Reader::ReadString(size_t count) // ===== DemangleGNU3 implementation ===== -DemangleGNU3::DemangleGNU3(Architecture* arch, const string& mangledName) : +DemangleGNU3::DemangleGNU3(Platform* platform, const string& mangledName) : m_reader(mangledName), - m_arch(arch), + m_platform(platform), + m_lastTypeRef(nullptr), m_isParameter(false), m_shouldDeleteReader(true), m_topLevel(true), m_isOperatorOverload(false), - m_permitForwardTemplateRefs(false) + m_parsingLambdaParams(false), + m_lambdaTemplateParamBase(0), + m_permitForwardTemplateRefs(false), + m_inLocalName(false), + m_nestingDepth(0) { MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); } -void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) +DemangleGNU3::NestingGuard::NestingGuard(DemangleGNU3& demangler) : m_demangler(demangler) +{ + m_demangler.m_nestingDepth++; + if (m_demangler.m_nestingDepth > MAX_DEMANGLE_NESTING_DEPTH) + { + m_demangler.m_nestingDepth--; + throw DemangleException("Detected adversarial mangled string"); + } +} + + +DemangleGNU3::NestingGuard::~NestingGuard() +{ + m_demangler.m_nestingDepth--; +} + + +void DemangleGNU3::Reset(Platform* platform, const string& mangledName) { m_reader.Reset(mangledName); - m_arch = arch; - m_varName.clear(); + m_platform = platform; m_substitute.clear(); m_templateSubstitute.clear(); m_functionSubstitute.clear(); + m_lastTypeRef = nullptr; m_lastName.clear(); m_nameType = {}; m_localType = {}; @@ -345,13 +405,16 @@ void DemangleGNU3::Reset(Architecture* arch, const string& mangledName) m_shouldDeleteReader = true; m_topLevel = true; m_isOperatorOverload = false; + m_parsingLambdaParams = false; + m_lambdaTemplateParamBase = 0; m_permitForwardTemplateRefs = false; m_pendingForwardRefs.clear(); m_inLocalName = false; + m_nestingDepth = 0; } -DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) +DemangledTypeNode DemangleGNU3::CreateUnknownType(const StringList& s) { return DemangledTypeNode::NamedType(UnknownNamedTypeClass, s); } @@ -359,28 +422,100 @@ DemangledTypeNode DemangleGNU3::CreateUnknownType(const QualifiedName& s) DemangledTypeNode DemangleGNU3::CreateUnknownType(const string& s) { - return DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{s}); + return DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{s}); +} + + +static DemangledQualifiedName CopyQualifiedName(const DemangledTypeNode& type) +{ + return type.GetName(); } void DemangleGNU3::ExtendTypeName(DemangledTypeNode& type, const string& extend) { - if (type.NameStringSize() + extend.size() > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); + if (type.GetClass() != NamedTypeReferenceClass) + return; + DemangledQualifiedName name = CopyQualifiedName(type); + if (name.empty()) { - auto& qn = type.GetMutableTypeName(); - if (qn.size() > 0) - qn.back() += extend; - else - qn.push_back(extend); + name.emplace_back(extend); + type.SetName(std::move(name)); + return; } + + name.back().AppendBase(extend); + type.SetName(std::move(name)); } -void DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) +void DemangleGNU3::ApplyTemplateArgs(DemangledTypeNode& type, ParamList args) { - m_templateSubstitute.push_back(type); + if (type.GetClass() != NamedTypeReferenceClass) + return; + + DemangledQualifiedName qn = CopyQualifiedName(type); + if (qn.empty()) + qn.emplace_back(""); + + qn.back().SetTemplateArguments(std::move(args), true); + type.SetName(std::move(qn)); +} + + +void DemangleGNU3::AppendTypeName(DemangledTypeNode& type, const DemangledTypeNode& extend) +{ + if (type.GetClass() != NamedTypeReferenceClass) + return; + + DemangledQualifiedName newName = CopyQualifiedName(type); + DemangledQualifiedName extendName = CopyQualifiedName(extend); + newName.reserve(newName.size() + extendName.size()); + newName.insert(newName.end(), extendName.begin(), extendName.end()); + type.SetName(std::move(newName)); +} + + +string DemangleGNU3::LastTypeNameSegmentBase(const DemangledTypeNode& type) +{ + const auto& qn = type.GetName(); + if (!qn.empty()) + return qn.back().GetBase(); + return {}; +} + + +bool DemangleGNU3::LastTypeNameSegmentHasTemplateArguments(const DemangledTypeNode& type) +{ + const auto& qn = type.GetName(); + if (qn.empty()) + return false; + return qn.back().HasTemplateArguments(); +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(NodeRef type) +{ + if (type) + m_templateSubstitute.push_back(std::move(type)); + return type; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(const DemangledTypeNode& type) +{ + auto ref = DemangledTypeNode::CreateSharedCopy(type); + m_templateSubstitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushTemplateType(DemangledTypeNode&& type) +{ + auto ref = DemangledTypeNode::CreateShared(std::move(type)); + m_templateSubstitute.push_back(ref); + return ref; } @@ -389,25 +524,53 @@ const DemangledTypeNode& DemangleGNU3::GetTemplateType(size_t ref) { if (ref >= m_templateSubstitute.size()) throw DemangleException(); - return m_templateSubstitute[ref]; + if (!m_templateSubstitute[ref]) + throw DemangleException(); + return *m_templateSubstitute[ref]; } #endif -void DemangleGNU3::PushType(const DemangledTypeNode& type) +DemangleGNU3::NodeRef DemangleGNU3::PushType(NodeRef type) { - m_substitute.push_back(type); + if (type) + m_substitute.push_back(std::move(type)); + return type; } -const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) +DemangleGNU3::NodeRef DemangleGNU3::PushType(const DemangledTypeNode& type) +{ + auto ref = DemangledTypeNode::CreateSharedCopy(type); + m_substitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::PushType(DemangledTypeNode&& type) +{ + auto ref = DemangledTypeNode::CreateShared(std::move(type)); + m_substitute.push_back(ref); + return ref; +} + + +DemangleGNU3::NodeRef DemangleGNU3::GetTypeRef(size_t ref) { if (ref >= m_substitute.size()) throw DemangleException(); + if (!m_substitute[ref]) + throw DemangleException(); return m_substitute[ref]; } +const DemangledTypeNode& DemangleGNU3::GetType(size_t ref) +{ + return *GetTypeRef(ref); +} + + #ifdef GNUDEMANGLE_DEBUG void DemangleGNU3::PrintTables() { @@ -456,6 +619,7 @@ string DemangleGNU3::DemangleSourceName() DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); bool old_isparam; @@ -466,6 +630,7 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } DemangledTypeNode retType = DemangleType(); + NodeRef retTypeRef = m_lastTypeRef; ParamList params; old_isparam = m_isParameter; @@ -475,16 +640,21 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) while (m_reader.Peek() != 'E') { DemangledTypeNode param = DemangleType(); + NodeRef paramRef = m_lastTypeRef; if (param.GetClass() == VoidTypeClass) continue; MyLogDebug("Var_%d - %s\n", i++, param.GetString().c_str()); - m_functionSubstitute.back().push_back(param); - params.push_back({"", std::make_shared(std::move(param))}); + if (!paramRef) + paramRef = DemangledTypeNode::CreateShared(std::move(param)); + m_functionSubstitute.back().push_back(paramRef); + params.push_back({"", paramRef}); } m_reader.Consume(); m_functionSubstitute.pop_back(); m_isParameter = old_isparam; - DemangledTypeNode newType = DemangledTypeNode::FunctionType(std::move(retType), nullptr, std::move(params)); + if (!retTypeRef) + retTypeRef = DemangledTypeNode::CreateShared(std::move(retType)); + DemangledTypeNode newType = DemangledTypeNode::FunctionType(retTypeRef, nullptr, std::move(params)); PushType(newType); newType.SetConst(cnst); @@ -498,46 +668,28 @@ DemangledTypeNode DemangleGNU3::DemangleFunction(bool cnst, bool vltl) } -string DemangleGNU3::ForwardRefPlaceholder(size_t index) -{ - return "\x01FWDREF:" + to_string(index) + "\x01"; -} - - -void DemangleGNU3::ResolveForwardTemplateRefs(DemangledTypeNode& type, const vector& args) +void DemangleGNU3::ResolveForwardTemplateRefs(DemangledTypeNode&, const ParamList& args) { if (m_pendingForwardRefs.empty()) return; - auto& segs = type.GetMutableTypeName(); - bool resolved = false; - for (const auto& fr : m_pendingForwardRefs) + for (const auto& ref : m_pendingForwardRefs) { - string placeholder = ForwardRefPlaceholder(fr.index); - string replacement = (fr.index < args.size()) ? args[fr.index] : "auto"; - for (auto& seg : segs) - { - size_t pos; - while ((pos = seg.find(placeholder)) != string::npos) - { - seg.replace(pos, placeholder.size(), replacement); - resolved = true; - } - } + if (!ref.typeRef) + continue; + if (ref.index >= args.size() || !args[ref.index].type) + throw DemangleException(); + *ref.typeRef = *args[ref.index].type; } - // Only clear the pending list when we actually resolved something. Inner - // nested-name 'I' handlers (e.g. template args of types nested inside the - // cv-operator result type) may call here with a type that does not contain - // the placeholder; we must not discard the pending entry in that case so - // that the correct outer 'I' handler can still resolve it. - if (resolved) - m_pendingForwardRefs.clear(); + m_pendingForwardRefs.clear(); } -DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() +DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution(NodeRef* outTypeRef) { indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); + if (outTypeRef) + *outTypeRef = nullptr; size_t number = 0; char elm = m_reader.Peek(); if (elm == '_') @@ -568,15 +720,32 @@ DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() dedent(); if (number < m_templateSubstitute.size()) - return m_templateSubstitute[number]; + { + if (!m_templateSubstitute[number]) + throw DemangleException(); + if (outTypeRef) + *outTypeRef = m_templateSubstitute[number]; + return *m_templateSubstitute[number]; + } // If forward template references are permitted (e.g. inside a cv conversion - // operator type), return a placeholder that will be resolved once the outer - // template args are known. + // operator type), return a shared placeholder node whose contents will be + // replaced once the outer template args are known. if (m_permitForwardTemplateRefs) { - m_pendingForwardRefs.push_back({number}); - return CreateUnknownType(ForwardRefPlaceholder(number)); + auto typeRef = DemangledTypeNode::CreateShared(CreateUnknownType("auto")); + m_pendingForwardRefs.push_back({number, typeRef}); + if (outTypeRef) + *outTypeRef = typeRef; + return *typeRef; + } + + if (m_parsingLambdaParams && number >= m_lambdaTemplateParamBase) + { + auto typeRef = DemangledTypeNode::CreateShared(CreateUnknownType("auto")); + if (outTypeRef) + *outTypeRef = typeRef; + return *typeRef; } throw DemangleException(); @@ -585,12 +754,14 @@ DemangledTypeNode DemangleGNU3::DemangleTemplateSubstitution() DemangledTypeNode DemangleGNU3::DemangleType() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s : %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); + m_lastTypeRef = nullptr; DemangledTypeNode type; + NodeRef typeRef = nullptr; bool cnst = false, vltl = false, rstrct = false; bool substitute = false; - QualifiedName name; DemangleCVQualifiers(cnst, vltl, rstrct); @@ -602,8 +773,9 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (vltl) type.SetVolatile(true); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); - PushType(type); + type.SetPointerSuffixBits(1u << RestrictSuffix); + typeRef = PushType(type); + m_lastTypeRef = typeRef; return type; } @@ -613,14 +785,14 @@ DemangledTypeNode DemangleGNU3::DemangleType() { if (isdigit(m_reader.Peek()) || m_reader.Peek() == '_' || isupper(m_reader.Peek())) { - type = DemangleSubstitution(); + type = DemangleSubstitution(&typeRef); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; substitute = true; } } @@ -630,24 +802,26 @@ DemangledTypeNode DemangleGNU3::DemangleType() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - auto qn = type.GetTypeName(); - qn.insert(qn.begin(), "std"); - type.SetTypeName(std::move(qn)); + auto qn = CopyQualifiedName(type); + qn.insert(qn.begin(), DemangledNamePart("std")); + type.SetName(std::move(qn)); substitute = true; } else { - type = DemangleSubstitution(); + type = DemangleSubstitution(&typeRef); } if (m_reader.Peek() == 'I') { m_reader.Consume(); - if (substitute) + bool dependentTemplatePrefix = LastTypeNameSegmentBase(type) == "basic_ostream" && + TemplateArgsReferenceTemplateParam("I" + m_reader.PeekString(m_reader.Length())); + if (substitute && !dependentTemplatePrefix) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; substitute = true; } } @@ -663,25 +837,25 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Peek() == 's') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(StructNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(StructNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'u') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(UnionNamedTypeClass, _STD_VECTOR<_STD_STRING>{DemangleSourceName()}); + type = DemangledTypeNode::NamedType(UnionNamedTypeClass, StringList{DemangleSourceName()}); break; } else if (m_reader.Peek() == 'e') { m_reader.Consume(); - type = DemangledTypeNode::NamedType(EnumNamedTypeClass, QualifiedName({DemangleSourceName()}), - m_arch->GetDefaultIntegerSize(), m_arch->GetDefaultIntegerSize()); + type = DemangledTypeNode::NamedTypeWithDefaultIntegerWidth( + EnumNamedTypeClass, StringList{DemangleSourceName()}); break; } //Template Substitution - type = DemangleTemplateSubstitution(); + type = DemangleTemplateSubstitution(&typeRef); // In forward-ref mode (cv conversion operator type parsing), do not consume // trailing IE — it belongs to the enclosing nested-name and will be // processed by DemangleNestedName's 'I' case, which resolves forward refs. @@ -691,32 +865,41 @@ DemangledTypeNode DemangleGNU3::DemangleType() m_reader.Consume(); if (substitute) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); + typeRef = nullptr; } break; } case 'P': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, PointerReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, PointerReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, PointerReferenceType); + substitute = true; break; } case 'R': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, ReferenceReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, ReferenceReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, ReferenceReferenceType); + substitute = true; break; } case 'O': - { - DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::PointerType(m_arch, std::move(child), cnst, vltl, RValueReferenceType); - substitute = true; + { + NodeRef childRef = nullptr; + DemangledTypeNode child = DemangleType(); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::PointerType(childRef, cnst, vltl, RValueReferenceType) : + DemangledTypeNode::PointerType(std::move(child), cnst, vltl, RValueReferenceType); + substitute = true; break; } case 'C': //TODO:complex @@ -727,17 +910,20 @@ DemangledTypeNode DemangleGNU3::DemangleType() // Vendor-extended type: U [] // Commonly used for Objective-C block pointers: // U13block_pointer -> "void (params...) block_pointer" - string extName = DemangleSourceName(); + DemangledNamePart extName(DemangleSourceName()); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector targs; + ParamList targs; DemangleTemplateArgs(targs); if (!targs.empty()) - extName += GetTemplateString(targs); + extName.SetTemplateArguments(std::move(targs), true); } DemangledTypeNode inner = DemangleType(); - type = CreateUnknownType(inner.GetString() + " " + extName); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + auto extType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, DemangledQualifiedName{std::move(extName)}); + NodeRef extNameRef = DemangledTypeNode::CreateShared(std::move(extType)); + type = DemangledTypeNode::PostfixType(innerRef, " ", extNameRef); substitute = true; break; } @@ -745,31 +931,31 @@ DemangledTypeNode DemangleGNU3::DemangleType() { // Vendor extended type: u [] // e.g. u14__remove_cvref, u20__remove_reference_t - string extName = DemangleSourceName(); + DemangledNamePart extName(DemangleSourceName()); if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector targs; + ParamList targs; DemangleTemplateArgs(targs); if (!targs.empty()) - extName += GetTemplateString(targs); + extName.SetTemplateArguments(std::move(targs), true); } - type = CreateUnknownType(extName); + type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, DemangledQualifiedName{std::move(extName)}); substitute = true; break; } case 'v': type = DemangledTypeNode::VoidType(); break; - case 'w': type = DemangledTypeNode::IntegerType(4, false, "wchar_t"); break; //TODO: verify + case 'w': type = DemangledTypeNode::WideCharType(4, "wchar_t"); break; //TODO: verify case 'b': type = DemangledTypeNode::BoolType(); break; - case 'c': type = DemangledTypeNode::IntegerType(1, true, "char"); break; + case 'c': type = DemangledTypeNode::IntegerType(1, true); break; case 'a': type = DemangledTypeNode::IntegerType(1, true, "signed char"); break; case 'h': type = DemangledTypeNode::IntegerType(1, false); break; case 's': type = DemangledTypeNode::IntegerType(2, true); break; case 't': type = DemangledTypeNode::IntegerType(2, false); break; case 'i': type = DemangledTypeNode::IntegerType(4, true); break; case 'j': type = DemangledTypeNode::IntegerType(4, false); break; - case 'l': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); break; //long - case 'm': type = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), false); break; //ulong + case 'l': type = DemangledTypeNode::AddressSizedIntegerType(true); break; //long + case 'm': type = DemangledTypeNode::AddressSizedIntegerType(false); break; //ulong case 'x': type = DemangledTypeNode::IntegerType(8, true); break; case 'y': type = DemangledTypeNode::IntegerType(8, false); break; case 'n': type = DemangledTypeNode::IntegerType(16, true); break; @@ -782,12 +968,11 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'M': // TODO: Make into pointer to function member { DemangledTypeNode memberName = DemangleType(); + NodeRef memberNameRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(memberName)); DemangledTypeNode member = DemangleType(); - string fullName = member.GetStringBeforeName() + "(" + memberName.GetString() + "::*)" + member.GetStringAfterName(); - //member.SetScope(NonStaticScope); - //DemangledTypeNode ptr = DemangledTypeNode::PointerType(m_arch, member, cnst, vltl); - //QualifiedName qn({memberName.GetString(), "*"}); - type = CreateUnknownType(fullName); + NodeRef memberRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(member)); + type = DemangledTypeNode::MemberPointerType(memberRef, CopyQualifiedName(*memberNameRef), cnst, vltl); + type.SetParenthesizedMemberPointer(true); substitute = true; break; } @@ -799,20 +984,21 @@ DemangledTypeNode DemangleGNU3::DemangleType() case 'e': type = DemangledTypeNode::FloatType(16, "decimal128"); break; case 'f': type = DemangledTypeNode::FloatType(4, "decimal32"); break; case 'h': type = DemangledTypeNode::FloatType(2); break; - case 'i': type = DemangledTypeNode::IntegerType(4, true, "char32_t"); break; - case 's': type = DemangledTypeNode::IntegerType(2, true, "char16_t"); break; + case 'i': type = DemangledTypeNode::WideCharType(4, "char32_t"); break; + case 's': type = DemangledTypeNode::WideCharType(2, "char16_t"); break; case 'a': type = CreateUnknownType("auto"); break; //auto type case 'c': type = CreateUnknownType("decltype(auto)"); break; //decltype(auto) case 'n': { - static const QualifiedName stdNullptrTName(vector{"std", "nullptr_t"}); + static const StringList stdNullptrTName(vector{"std", "nullptr_t"}); type = CreateUnknownType(stdNullptrTName); break; } case 'p': { DemangledTypeNode inner = DemangleType(); - type = CreateUnknownType(inner.GetString() + "..."); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + type = DemangledTypeNode::PostfixType(innerRef, "..."); break; } case 't': @@ -827,8 +1013,11 @@ DemangledTypeNode DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); + NodeRef childRef = nullptr; DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::ArrayType(std::move(child), size); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::ArrayType(childRef, size) : + DemangledTypeNode::ArrayType(std::move(child), size); break; } default: @@ -837,7 +1026,7 @@ DemangledTypeNode DemangleGNU3::DemangleType() } break; case 'N': - type = DemangleNestedName(); + type = DemangleNestedName(nullptr, false); substitute = true; break; case 'A': @@ -849,14 +1038,17 @@ DemangledTypeNode DemangleGNU3::DemangleType() uint64_t size = DemangleNumber(); if (m_reader.Read() != '_') throw DemangleException(); + NodeRef childRef = nullptr; DemangledTypeNode child = DemangleType(); - type = DemangledTypeNode::ArrayType(std::move(child), size); + childRef = m_lastTypeRef; + type = childRef ? DemangledTypeNode::ArrayType(childRef, size) : + DemangledTypeNode::ArrayType(std::move(child), size); } else { //[] _ //Since our type system doesn't support expressions as dimensions - //we instead demangle this as just a string. + //we preserve the element type node and render a synthetic name at finalization. string dimension = "[]"; if (m_reader.Peek() != '_') { @@ -865,8 +1057,9 @@ DemangledTypeNode DemangleGNU3::DemangleType() if (m_reader.Read() != '_') throw DemangleException(); - const string typeString = DemangleType().GetString() + dimension; - type = CreateUnknownType(typeString); + DemangledTypeNode inner = DemangleType(); + NodeRef innerRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(inner)); + type = DemangledTypeNode::PostfixType(innerRef, std::move(dimension)); } substitute = true; break; @@ -875,10 +1068,10 @@ DemangledTypeNode DemangleGNU3::DemangleType() m_reader.UnRead(); type = DemangleName(); - auto nameList = type.GetTypeName(); - if (nameList.size() < 1) + string lastName = LastTypeNameSegmentBase(type); + if (lastName.empty()) throw DemangleException(); - m_lastName = nameList.back(); + m_lastName = lastName; substitute = true; if (m_reader.Peek() == 'I') @@ -886,38 +1079,40 @@ DemangledTypeNode DemangleGNU3::DemangleType() substitute = false; m_reader.Consume(); PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); PushType(type); } } } if (substitute) - PushType(type); + typeRef = PushType(type); + m_lastTypeRef = typeRef; dedent(); return type; } -DemangledTypeNode DemangleGNU3::DemangleSubstitution() +DemangledTypeNode DemangleGNU3::DemangleSubstitution(NodeRef* outTypeRef) { - static const QualifiedName stdAllocatorName(vector{"std", "allocator"}); - static const QualifiedName stdBasicStringName(vector{"std", "basic_string"}); - static const QualifiedName stdIostreamName(vector{"std", "iostream"}); - static const QualifiedName stdIstreamName(vector{"std", "istream"}); - static const QualifiedName stdOstreamName(vector{"std", "ostream"}); - static const QualifiedName stdStringName(vector{"std", "string"}); - static const QualifiedName stdName(vector{"std"}); + if (outTypeRef) + *outTypeRef = nullptr; + static const StringList stdAllocatorName(vector{"std", "allocator"}); + static const StringList stdBasicStringName(vector{"std", "basic_string"}); + static const StringList stdIostreamName(vector{"std", "iostream"}); + static const StringList stdIstreamName(vector{"std", "istream"}); + static const StringList stdOstreamName(vector{"std", "ostream"}); + static const StringList stdStringName(vector{"std", "string"}); + static const StringList stdName(vector{"std"}); indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm; elm = m_reader.Read(); - QualifiedName name; + StringList name; size_t number = 0; switch (elm) { @@ -965,10 +1160,13 @@ DemangledTypeNode DemangleGNU3::DemangleSubstitution() } dedent(); - const DemangledTypeNode& resolved = GetType(number); - const auto& segs = resolved.GetTypeName(); - if (!segs.empty()) - m_lastName = segs.back(); + auto ref = GetTypeRef(number); + const DemangledTypeNode& resolved = *ref; + string lastName = LastTypeNameSegmentBase(resolved); + if (!lastName.empty()) + m_lastName = lastName; + if (outTypeRef) + *outTypeRef = ref; return resolved; } m_lastName = name.back(); @@ -1023,7 +1221,7 @@ string DemangleGNU3::DemanglePrimaryExpression() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); char elm1 = '\0'; string out; - QualifiedName tmpList; + StringList tmpList; bool oldTopLevel; //expr-primary if (m_reader.PeekString(2) == "_Z") @@ -1040,7 +1238,7 @@ string DemangleGNU3::DemanglePrimaryExpression() DemangledTypeNode t = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; m_templateSubstitute = std::move(savedTemplateSubstitute); - out += t.GetTypeAndName(tmpList); + out += t.GetTypeAndName(tmpList, m_platform.GetPtr()); dedent() return out; } @@ -1055,7 +1253,7 @@ string DemangleGNU3::DemanglePrimaryExpression() DemangledTypeNode t2 = DemangleSymbol(tmpList); m_topLevel = oldTopLevel; m_templateSubstitute = std::move(savedTemplateSubstitute2); - out += t2.GetTypeAndName(tmpList); + out += t2.GetTypeAndName(tmpList, m_platform.GetPtr()); dedent(); return out; } @@ -1157,7 +1355,7 @@ string DemangleGNU3::DemangleExpressionList() expr += ", "; const string e = DemangleExpression(); expr += e; - m_functionSubstitute.back().push_back(CreateUnknownType(e)); + m_functionSubstitute.back().push_back(DemangledTypeNode::CreateShared(CreateUnknownType(e))); first = false; } m_functionSubstitute.pop_back(); @@ -1230,6 +1428,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() case hash('s','z'): case hash('a','t'): case hash('a','z'): + case hash('a','w'): case hash('n','x'): case hash('s','Z'): case hash('s','P'): @@ -1294,13 +1493,13 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() string name; name = "'lambda"; vector lambdaParams; - // Generic lambdas encode 'auto' params as T_, T0_, T1_... which reference - // the lambda's own operator() template params, not any outer template scope. - // Save and replace the template substitution table with 'auto' placeholders. - auto savedTemplateSubstitute = m_templateSubstitute; - m_templateSubstitute.clear(); - for (int autoIdx = 0; autoIdx < 16; autoIdx++) - m_templateSubstitute.push_back(CreateUnknownType("auto")); + // Generic lambdas encode 'auto' params as template params. Preserve any + // enclosing template substitutions, and synthesize lambda-local autos + // lazily only when a template-param reference does not resolve. + bool savedParsingLambdaParams = m_parsingLambdaParams; + size_t savedLambdaTemplateParamBase = m_lambdaTemplateParamBase; + m_parsingLambdaParams = true; + m_lambdaTemplateParamBase = m_templateSubstitute.size(); do { DemangledTypeNode param = DemangleType(); @@ -1309,7 +1508,8 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() lambdaParams.push_back(std::move(param)); }while (m_reader.Peek() != 'E'); m_reader.Consume(); - m_templateSubstitute = std::move(savedTemplateSubstitute); + m_parsingLambdaParams = savedParsingLambdaParams; + m_lambdaTemplateParamBase = savedLambdaTemplateParamBase; if (isdigit(m_reader.Peek())) { @@ -1328,6 +1528,7 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() name += ")"; m_lastName = name; outType = CreateUnknownType(name); + PushType(outType); break; } case hash('U','t'): @@ -1353,16 +1554,18 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() // The conversion operator type may reference template params (T_, T0_, ...) // that aren't yet in m_templateSubstitute (they're defined by a following // IE in the enclosing nested name). Set m_permitForwardTemplateRefs so - // that DemangleTemplateSubstitution() returns a placeholder instead of + // that DemangleTemplateSubstitution() returns a shared placeholder instead of // throwing, and don't consume trailing IE in the T case of DemangleType. // The outer DemangleNestedName case 'I' will parse those args and call - // ResolveForwardTemplateRefs() to patch the placeholders. + // ResolveForwardTemplateRefs() to replace those placeholders with the real args. bool savedPermit = m_permitForwardTemplateRefs; m_pendingForwardRefs.clear(); m_permitForwardTemplateRefs = true; DemangledTypeNode cvType = DemangleType(); + NodeRef cvTypeRef = m_lastTypeRef ? m_lastTypeRef : DemangledTypeNode::CreateShared(std::move(cvType)); m_permitForwardTemplateRefs = savedPermit; - outType = CreateUnknownType("operator " + cvType.GetString()); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, + DemangledQualifiedName{DemangledNamePart("operator ", std::move(cvTypeRef))}); break; } default: @@ -1386,18 +1589,16 @@ DemangledTypeNode DemangleGNU3::DemangleUnqualifiedName() { m_reader.Consume(); string tag = "[abi:" + DemangleSourceName() + "]"; - auto qn = outType.GetTypeName(); - if (!qn.empty()) - qn.back() += tag; - outType.SetTypeName(std::move(qn)); - m_lastName = qn.empty() ? tag : qn.back(); + ExtendTypeName(outType, tag); + string lastName = LastTypeNameSegmentBase(outType); + m_lastName = lastName.empty() ? tag : lastName; } dedent(); return outType; } -QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() +StringList DemangleGNU3::DemangleBaseUnresolvedName() { // ::= # unresolved name // ::= on # unresolved operator-function-id @@ -1407,7 +1608,7 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() indent() MyLogDebug("%s: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - QualifiedName out; + StringList out; if (m_reader.Length() > 1) { const string str = m_reader.PeekString(2); @@ -1420,9 +1621,9 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - out.back() += GetTemplateString(args); + out.back() = NameSegmentWithTemplateArgs(out.back(), std::move(args)).GetString(); PushType(CreateUnknownType(out)); } } @@ -1441,9 +1642,9 @@ QualifiedName DemangleGNU3::DemangleBaseUnresolvedName() if (m_reader.Peek() == 'I') { m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - out.back() += GetTemplateString(args); + out.back() = NameSegmentWithTemplateArgs(out.back(), std::move(args)).GetString(); } } } @@ -1468,10 +1669,9 @@ DemangledTypeNode DemangleGNU3::DemangleUnresolvedType() { PushType(type); m_reader.Consume(); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); PushType(type); } else @@ -1607,9 +1807,9 @@ string DemangleGNU3::DemangleExpression() return GetOperator(elm1, elm2) + "(" + DemangleTypeString() + ")"; case hash('s','P'): { - vector args; + ParamList args; DemangleTemplateArgs(args); - return "sizeof...(" + GetTemplateString(args) + ")..."; + return "sizeof...(" + NameSegmentWithTemplateArgs("", std::move(args)).GetString() + ")..."; } case hash('s','p'): return "(" + DemangleExpression() + ")..."; @@ -1695,7 +1895,7 @@ string DemangleGNU3::DemangleExpression() if (!firstArg) args += ", "; const string e = DemangleExpression(); args += e; - m_functionSubstitute.back().push_back(CreateUnknownType(e)); + m_functionSubstitute.back().push_back(DemangledTypeNode::CreateShared(CreateUnknownType(e))); firstArg = false; } m_functionSubstitute.pop_back(); @@ -1761,7 +1961,9 @@ string DemangleGNU3::DemangleExpression() out = (elementNum == 0) ? "fp" : "fp" + std::to_string(elementNum - 1); break; } - type = m_functionSubstitute[listNumber][elementNum]; + if (!m_functionSubstitute[listNumber][elementNum]) + throw DemangleException(); + type = *m_functionSubstitute[listNumber][elementNum]; } else if (isdigit(elm) || isupper(elm)) { @@ -1776,7 +1978,9 @@ string DemangleGNU3::DemangleExpression() out = "fp" + std::to_string(elementNum - 1); break; } - type = m_functionSubstitute[listNumber][elementNum]; + if (!m_functionSubstitute[listNumber][elementNum]) + throw DemangleException(); + type = *m_functionSubstitute[listNumber][elementNum]; } else { @@ -1819,22 +2023,22 @@ string DemangleGNU3::DemangleExpression() { out += DemangleSourceName(); // Push bare name (before template args) to substitution table. - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); // Also push the template instantiation (name+args). - PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, _STD_VECTOR<_STD_STRING>{out})); + PushType(DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{out})); } out += "::"; }while (m_reader.Peek() != 'E'); m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } if (isdigit(m_reader.Peek())) @@ -1859,10 +2063,10 @@ string DemangleGNU3::DemangleExpression() PushType(CreateUnknownType(out)); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); DemangleTemplateArgs(args); // consumes the trailing 'E' - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); // Also push the template instantiation. PushType(CreateUnknownType(out)); hadTemplateArgs = true; @@ -1874,7 +2078,7 @@ string DemangleGNU3::DemangleExpression() // so check rather than unconditionally consuming. if (m_reader.Peek() == 'E') m_reader.Consume(); - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); return out; } else @@ -1890,7 +2094,7 @@ string DemangleGNU3::DemangleExpression() const string segName = DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); DemangleTemplateArgs(args); if (isdigit(m_reader.Peek())) @@ -1898,13 +2102,14 @@ string DemangleGNU3::DemangleExpression() // Another source name follows — intermediate qualifier. // Push to the substitution table, mirroring what the // N-prefix sr branch does for each nested qualifier. - PushType(CreateUnknownType(out + segName + GetTemplateString(args))); - out += segName + GetTemplateString(args) + "::"; + string segment = NameSegmentWithTemplateArgs(segName, std::move(args)).GetString(); + PushType(CreateUnknownType(out + segment)); + out += segment + "::"; } else { // No more source names — this template-id is the final name. - out += segName + GetTemplateString(args); + out += NameSegmentWithTemplateArgs(segName, std::move(args)).GetString(); return out; } } @@ -1916,7 +2121,7 @@ string DemangleGNU3::DemangleExpression() } } // peek is not a digit: fall back for operator-names ("on") / destructor-names ("dn"). - out += DemangleBaseUnresolvedName().GetString(); + out += JoinNameSegments(DemangleBaseUnresolvedName()); } return out; default: @@ -1924,11 +2129,11 @@ string DemangleGNU3::DemangleExpression() out = DemangleSourceName(); if (m_reader.Peek() == 'I') { - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - out += GetTemplateString(args); + out = NameSegmentWithTemplateArgs(out, std::move(args)).GetString(); } break; } @@ -1936,55 +2141,88 @@ string DemangleGNU3::DemangleExpression() } -void DemangleGNU3::DemangleTemplateArgs(vector& args, bool* hadNonTypeArg) +bool DemangleGNU3::DemangleTemplateArg(ParamList& args, bool* hadNonTypeArg) { - indent(); - MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode tmp; + NodeRef tmpRef; bool tmpValid = false; string expr; bool topLevel; - const string lastName = m_lastName; - while (m_reader.Peek() != 'E') + switch (m_reader.Read()) { - switch (m_reader.Read()) - { - case 'L': - expr = DemanglePrimaryExpression(); - args.push_back(expr); - tmp = CreateUnknownType(expr); - tmpValid = true; - if (hadNonTypeArg) *hadNonTypeArg = true; - break; - case 'X': - args.push_back(DemangleExpression()); - if (m_reader.Read() != 'E') - throw DemangleException(); - if (hadNonTypeArg) *hadNonTypeArg = true; - break; - case 'I': // GCC sometimes uses I...E for argument packs instead of J...E - case 'J': + case 'L': + expr = DemanglePrimaryExpression(); + tmp = CreateUnknownType(expr); + tmpRef = DemangledTypeNode::CreateShared(std::move(tmp)); + args.push_back({"", tmpRef}); + tmpValid = true; + if (hadNonTypeArg) *hadNonTypeArg = true; + break; + case 'X': + { + DemangledTypeNode exprNode = CreateUnknownType(DemangleExpression()); + args.push_back({"", DemangledTypeNode::CreateShared(std::move(exprNode))}); + if (m_reader.Read() != 'E') + throw DemangleException(); + if (hadNonTypeArg) *hadNonTypeArg = true; + break; + } + case 'I': // GCC sometimes uses I...E for argument packs instead of J...E + case 'J': + { + size_t prevTemplateSize = m_templateSubstitute.size(); + DemangleTemplateArgs(args, hadNonTypeArg); + if (m_topLevel && m_templateSubstitute.size() == prevTemplateSize) + PushTemplateType(CreateUnknownType("auto")); + break; + } + case 'T': + if (m_reader.Peek() == 'n') { - size_t prevTemplateSize = m_templateSubstitute.size(); - DemangleTemplateArgs(args); - if (m_topLevel && m_templateSubstitute.size() == prevTemplateSize) - PushTemplateType(CreateUnknownType("auto")); - break; - } - default: - m_reader.UnRead(); + // ::= + // ::= Tn # non-type parameter + // + // The declaration names a synthetic non-type template parameter + // for the following argument. Binary Ninja does not print those + // synthetic parameter names, so consume the declaration type and + // keep only the actual following template argument. + m_reader.Consume(); topLevel = m_topLevel; m_topLevel = false; - tmp = DemangleType(); + DemangleType(); m_topLevel = topLevel; - args.push_back(tmp.GetString()); - tmpValid = true; - } - if (m_topLevel && tmpValid) - { - MyLogDebug("Adding template ref: %s\n", tmp.GetString().c_str()); - PushTemplateType(tmp); + return DemangleTemplateArg(args, hadNonTypeArg); } + [[fallthrough]]; + default: + m_reader.UnRead(); + topLevel = m_topLevel; + m_topLevel = false; + tmp = DemangleType(); + m_topLevel = topLevel; + tmpRef = DemangledTypeNode::CreateShared(std::move(tmp)); + args.push_back({"", tmpRef}); + tmpValid = true; + } + if (m_topLevel && tmpValid) + { + MyLogDebug("Adding template ref: %s\n", tmpRef ? tmpRef->GetString().c_str() : ""); + PushTemplateType(tmpRef); + } + return true; +} + + +void DemangleGNU3::DemangleTemplateArgs(ParamList& args, bool* hadNonTypeArg) +{ + NestingGuard nestingGuard(*this); + indent(); + MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); + const string lastName = m_lastName; + while (m_reader.Peek() != 'E') + { + if (!DemangleTemplateArg(args, hadNonTypeArg)) + break; } m_reader.Consume(); m_lastName = lastName; @@ -1993,8 +2231,9 @@ void DemangleGNU3::DemangleTemplateArgs(vector& args, bool* hadNonTypeAr } -DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) +DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs, bool pushBareTemplatePrefix) { + NestingGuard nestingGuard(*this); /* This can be either a qualified name like: "foo::bar::bas" or it can be a qualified type like: "foo::bar::bas & const" thus we return either @@ -2027,7 +2266,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) indent(); MyLogDebug("%s:: '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); - DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, QualifiedName()); + DemangledTypeNode type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, StringList{}); bool cnst = false, vltl = false, rstrct = false; bool ref = false; bool rvalueRef = false; @@ -2073,7 +2312,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) { if (!base) throw DemangleException(); - vector args; + ParamList args; bool hadNonType = false; DemangleTemplateArgs(args, allTypeTemplateArgs ? &hadNonType : nullptr); if (allTypeTemplateArgs) @@ -2084,8 +2323,7 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) // type parsing itself where m_permitForwardTemplateRefs is true). if (!m_permitForwardTemplateRefs) ResolveForwardTemplateRefs(type, args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); isTemplate = true; break; } @@ -2099,16 +2337,8 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) if (!isTemplate) { type.SetNameType(newType.GetNameType()); - auto aNames = type.GetTypeName(); - auto bNames = newType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; - newName.reserve(aNames.size() + bNames.size()); - newName.insert(newName.end(), aNames.begin(), aNames.end()); - newName.insert(newName.end(), bNames.begin(), bNames.end()); - if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); - type.SetNTR(type.GetNTRClass(), newName); - type.SetHasTemplateArguments(false); + AppendTypeName(type, newType); + type.SetNTRType(newType.GetNTRClass()); } // Consume any ABI tags (B ) following this name component. // These appear as suffixes on in the Itanium ABI: @@ -2123,11 +2353,12 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) string savedLastName = m_lastName; string abiTag = DemangleSourceName(); m_lastName = savedLastName; - auto& segs = type.GetMutableTypeName(); - if (!segs.empty()) - segs.back() += "[abi:" + abiTag + "]"; + ExtendTypeName(type, "[abi:" + abiTag + "]"); } - if (substitute && m_reader.Peek() != 'E') + bool dependentTemplatePrefix = !pushBareTemplatePrefix && m_reader.Peek() == 'I' && + LastTypeNameSegmentBase(type) == "basic_ostream" && + TemplateArgsReferenceTemplateParam(m_reader.PeekString(m_reader.Length())); + if (substitute && m_reader.Peek() != 'E' && !dependentTemplatePrefix) { //Those template arguments were not the primary arguments so clear them from the sub listType while (m_templateSubstitute.size() > startSize) @@ -2160,10 +2391,11 @@ DemangledTypeNode DemangleGNU3::DemangleNestedName(bool* allTypeTemplateArgs) DemangledTypeNode DemangleGNU3::DemangleLocalName() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode type; - QualifiedName varName; + StringList varName; // The local function has its own template scope. Save the outer template // substitution table and set m_topLevel = true so that when the local // function's template args are parsed (e.g. handleMessageDelayed), @@ -2177,11 +2409,9 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() m_inLocalName = true; type = DemangleSymbol(varName); m_inLocalName = savedInLocalName; - m_topLevel = oldTopLevel; - m_templateSubstitute = std::move(savedTemplateSubstitute); if (varName.size() > 0) - varName.back() += (type.GetStringAfterName()); + varName.back() += type.GetStringAfterName(m_platform.GetPtr()); else varName.push_back(type.GetString()); @@ -2199,23 +2429,20 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() // DemangledTypeNode tmpType = DemangleName(); type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); - auto aNames = type.GetTypeName(); - auto bNames = tmpType.GetTypeName(); - _STD_VECTOR<_STD_STRING> newName; - newName.reserve(aNames.size() + bNames.size()); - newName.insert(newName.end(), aNames.begin(), aNames.end()); - newName.insert(newName.end(), bNames.begin(), bNames.end()); - if (TotalStringSize(newName) > MAX_DEMANGLE_LENGTH) - throw DemangleException("Detected adversarial mangled string"); - type.SetTypeName(std::move(newName)); + AppendTypeName(type, tmpType); + type.SetNTRType(tmpType.GetNTRClass()); type.SetConst(tmpType.IsConst()); type.SetVolatile(tmpType.IsVolatile()); - type.SetPointerSuffix(tmpType.GetPointerSuffix()); + type.SetPointerSuffixBits(tmpType.GetPointerSuffixBits()); + m_templateSubstitute = std::move(savedTemplateSubstitute); + m_topLevel = oldTopLevel; } else { m_reader.Consume(); type = DemangledTypeNode::NamedType(UnknownNamedTypeClass, varName); + m_templateSubstitute = std::move(savedTemplateSubstitute); + m_topLevel = oldTopLevel; } // [] //TODO: What do we do with discriminators? @@ -2241,6 +2468,7 @@ DemangledTypeNode DemangleGNU3::DemangleLocalName() DemangledTypeNode DemangleGNU3::DemangleName() { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s '%s'\n", __FUNCTION__, m_reader.GetRaw().c_str()); /* @@ -2264,9 +2492,9 @@ DemangledTypeNode DemangleGNU3::DemangleName() { m_reader.Consume(1); type = DemangleUnqualifiedName(); - auto qn = type.GetTypeName(); - qn.insert(qn.begin(), "std"); - type.SetTypeName(std::move(qn)); + auto qn = CopyQualifiedName(type); + qn.insert(qn.begin(), DemangledNamePart("std")); + type.SetName(std::move(qn)); substitute = true; } else @@ -2279,10 +2507,9 @@ DemangledTypeNode DemangleGNU3::DemangleName() m_reader.Consume(); if (substitute) PushType(type); - vector args; + ParamList args; DemangleTemplateArgs(args); - ExtendTypeName(type, GetTemplateString(args)); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); // Push the template instantiation (e.g. std::swap) so that the // substitution table matches what the encoder built. The encoder adds // both the unscoped-template-name (prefix, already pushed above) and @@ -2316,14 +2543,11 @@ DemangledTypeNode DemangleGNU3::DemangleName() { PushType(type); // - vector args; + ParamList args; m_reader.Consume(); // DemangleTemplateArgs(args); - LogDebug("Typename: %s", type.GetTypeName()[0].c_str()); - ExtendTypeName(type, GetTemplateString(args)); - LogDebug("Typename: %s", type.GetTypeName()[0].c_str()); - type.SetHasTemplateArguments(true); + ApplyTemplateArgs(type, std::move(args)); } } dedent(); @@ -2331,17 +2555,19 @@ DemangledTypeNode DemangleGNU3::DemangleName() } -DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) +DemangledTypeNode DemangleGNU3::DemangleSymbol(StringList& varName) { + NestingGuard nestingGuard(*this); indent(); MyLogDebug("%s: %s\n", __FUNCTION__, m_reader.GetRaw().c_str()); DemangledTypeNode returnType; + NodeRef returnTypeRef = nullptr; bool isReturnTypeUnknown = false; DemangledTypeNode type; ParamList params; bool cnst = false, vltl = false, rstrct = false; bool oldTopLevel; - QualifiedName name; + StringList name; /* ::= @@ -2387,7 +2613,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{name.GetString() + " [transaction clone]" + t.GetStringAfterName()}); + StringList{JoinNameSegments(name) + " [transaction clone]" + t.GetStringAfterName(m_platform.GetPtr())}); } case 'V': { @@ -2437,7 +2663,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { // Guard variable (original behavior) DemangledTypeNode t = DemangleSymbol(name); - varName.push_back("guard_variable_for_" + t.GetTypeAndName(name)); + varName.push_back("guard_variable_for_" + t.GetTypeAndName(name, m_platform.GetPtr())); type = DemangledTypeNode::IntegerType(1, false); if (m_reader.Length() == 0) return type; @@ -2576,7 +2802,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) annotation += ']'; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{routineName + annotation}); + StringList{routineName + annotation}); } default: throw DemangleException(); @@ -2628,7 +2854,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"covariant_return_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"covariant_return_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'C': { @@ -2638,7 +2864,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) throw DemangleException(); return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); + StringList{"construction_vtable_for_" + DemangleTypeString() + "-in-" + t.GetString()}); } case 'D': LogWarn("Unsupported: 'typeinfo common proxy'\n"); @@ -2656,7 +2882,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"non-virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"non-virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'H': // TLS init function { @@ -2665,11 +2891,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_init_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_init_function_for_" + t.GetTypeAndName(name, m_platform.GetPtr())}); } case 'I': return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"typeinfo_for_" + DemangleTypeString()}); + StringList{"typeinfo_for_" + DemangleTypeString()}); case 'J': LogWarn("Unsupported: 'java class'\n"); throw DemangleException(); @@ -2684,7 +2910,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) { DemangledTypeNode t = DemangleType(); return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"VTT_for_" + t.GetString()}); + StringList{"VTT_for_" + t.GetString()}); } case 'v': // virtual thunk { @@ -2699,11 +2925,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"virtual_thunk_to_" + name.GetString() + t.GetStringAfterName()}); + StringList{"virtual_thunk_to_" + JoinNameSegments(name) + t.GetStringAfterName(m_platform.GetPtr())}); } case 'V': //Vtable return DemangledTypeNode::NamedType(StructNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"vtable_for_" + DemangleTypeString()}); + StringList{"vtable_for_" + DemangleTypeString()}); case 'W': // TLS wrapper function { oldTopLevel = m_topLevel; @@ -2711,7 +2937,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) DemangledTypeNode t = DemangleSymbol(name); m_topLevel = oldTopLevel; return DemangledTypeNode::NamedType(UnknownNamedTypeClass, - _STD_VECTOR<_STD_STRING>{"tls_wrapper_function_for_" + t.GetTypeAndName(name)}); + StringList{"tls_wrapper_function_for_" + t.GetTypeAndName(name, m_platform.GetPtr())}); } default: throw DemangleException(); @@ -2732,10 +2958,9 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) return type; } - varName = type.GetTypeName(); cnst = type.IsConst(); vltl = type.IsVolatile(); - auto suffix = type.GetPointerSuffix(); + auto suffix = type.GetPointerSuffixBits(); if (m_reader.Peek() == 'J') { m_reader.Consume(); @@ -2750,24 +2975,25 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) string savedLastName = m_lastName; string abiTag = DemangleSourceName(); m_lastName = savedLastName; - auto& segs = type.GetMutableTypeName(); - if (!segs.empty()) - segs.back() += "[abi:" + abiTag + "]"; + ExtendTypeName(type, "[abi:" + abiTag + "]"); } + const bool nameRequiresReturnType = m_isParameter || LastTypeNameSegmentHasTemplateArguments(type); + varName = type.RenderTypeNameSegments(m_platform.GetPtr()); if (m_isOperatorOverload || type.GetNameType() == ConstructorNameType || type.GetNameType() == DestructorNameType) { returnType = DemangledTypeNode::VoidType(); } - else if (m_isParameter || type.HasTemplateArguments()) + else if (nameRequiresReturnType) { returnType = DemangleType(); + returnTypeRef = m_lastTypeRef; } else { isReturnTypeUnknown = true; - returnType = DemangledTypeNode::IntegerType(m_arch->GetAddressSize(), true); + returnType = DemangledTypeNode::AddressSizedIntegerType(true); } m_functionSubstitute.push_back({}); @@ -2799,6 +3025,7 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) if (m_reader.PeekString(2) == "@@") break; DemangledTypeNode param = DemangleType(); + NodeRef paramRef = m_lastTypeRef; if (param.GetClass() == VoidTypeClass) { if (m_reader.Peek() == 'E') @@ -2808,9 +3035,11 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) } break; } - m_functionSubstitute.back().push_back(param); bool isVarArgs = param.GetClass() == VarArgsTypeClass; - params.push_back({"", std::make_shared(std::move(param))}); + if (!paramRef) + paramRef = DemangledTypeNode::CreateShared(std::move(param)); + m_functionSubstitute.back().push_back(paramRef); + params.push_back({"", paramRef}); if (isVarArgs) { if (m_reader.Peek() == 'E') @@ -2824,19 +3053,21 @@ DemangledTypeNode DemangleGNU3::DemangleSymbol(QualifiedName& varName) m_functionSubstitute.pop_back(); m_isParameter = false; - type = DemangledTypeNode::FunctionType(std::move(returnType), nullptr, std::move(params)); + if (!returnTypeRef) + returnTypeRef = DemangledTypeNode::CreateShared(std::move(returnType)); + type = DemangledTypeNode::FunctionType(returnTypeRef, nullptr, std::move(params)); if (isReturnTypeUnknown) type.SetReturnTypeConfidence(BN_MINIMUM_CONFIDENCE); - type.SetPointerSuffix(suffix); + type.SetPointerSuffixBits(suffix); type.SetConst(cnst); type.SetVolatile(vltl); if (rstrct) - type.SetPointerSuffix({RestrictSuffix}); + type.SetPointerSuffixBits(1u << RestrictSuffix); // PrintTables(); - MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName().c_str(), varName.GetString().c_str(), - type.GetStringAfterName().c_str()); + MyLogDebug("Done: %s%s%s\n", type.GetStringBeforeName(m_platform.GetPtr()).c_str(), JoinNameSegments(varName).c_str(), + type.GetStringAfterName(m_platform.GetPtr()).c_str()); dedent(); return type; @@ -2892,121 +3123,146 @@ bool DemangleGNU3Static::DemangleGlobalHeader(string& name, string& header) } -bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, QualifiedName& outVarName) +namespace { - // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). - // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 - // -> "invocation_function_for_block_in_dyld::_main(...)" - static const string blockInvokeSuffix = "_block_invoke"; - size_t blockPos = name.rfind(blockInvokeSuffix); - if (blockPos != string::npos) - { - // Verify the suffix is _block_invoke optionally followed by [._] only - string tail = name.substr(blockPos + blockInvokeSuffix.size()); - bool validSuffix = tail.empty(); - if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) - { - size_t i = 1; - while (i < tail.size() && isdigit((unsigned char)tail[i])) - i++; - validSuffix = (i == tail.size() && i > 1); - } - if (validSuffix) - { - // Extract the base symbol: everything before _block_invoke - string base = name.substr(0, blockPos); - // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it - size_t zPos = base.find_first_not_of('_'); - if (zPos != string::npos && base[zPos] == 'Z') + static bool DemangleStringGNU3Segments( + Platform* platform, const string& name, Ref& outType, StringList& outVarName) + { + // Handle _block_invoke[.N] and _block_invoke_N suffixes (Clang/Apple block invocations). + // E.g. ____ZN4dyld5_mainEPK12macho_headermiPPKcS5_S5_Pm_block_invoke.110 + // -> "invocation_function_for_block_in_dyld::_main(...)" + static const string blockInvokeSuffix = "_block_invoke"; + size_t blockPos = name.rfind(blockInvokeSuffix); + if (blockPos != string::npos) + { + // Verify the suffix is _block_invoke optionally followed by [._] only + string tail = name.substr(blockPos + blockInvokeSuffix.size()); + bool validSuffix = tail.empty(); + if (!validSuffix && (tail[0] == '.' || tail[0] == '_')) + { + size_t i = 1; + while (i < tail.size() && isdigit((unsigned char)tail[i])) + i++; + validSuffix = (i == tail.size() && i > 1); + } + if (validSuffix) { - string normalized = "_" + base.substr(zPos); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, normalized, baseType, baseName)) + // Extract the base symbol: everything before _block_invoke + string base = name.substr(0, blockPos); + // Normalize leading underscores: find 'Z' after underscores, keep one '_' before it + size_t zPos = base.find_first_not_of('_'); + if (zPos != string::npos && base[zPos] == 'Z') { - outVarName.clear(); - outVarName.push_back("invocation_function_for_block_in_" + baseName.GetString()); - outType = baseType; - return true; + string normalized = "_" + base.substr(zPos); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(platform, normalized, baseType, baseName)) + { + outVarName.clear(); + outVarName.push_back("invocation_function_for_block_in_" + JoinNameSegments(baseName)); + outType = baseType; + return true; + } } } } - } - // Handle macOS thread-local variable initializer suffix: $tlv$init - // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. - static const string tlvInitSuffix = "$tlv$init"; - if (name.size() > tlvInitSuffix.size() && - name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) - { - string base = name.substr(0, name.size() - tlvInitSuffix.size()); - Ref baseType; - QualifiedName baseName; - if (DemangleStringGNU3(arch, base, baseType, baseName)) + // Handle macOS thread-local variable initializer suffix: $tlv$init + // E.g. __ZL9recursive$tlv$init -> demangle "__ZL9recursive" then annotate. + static const string tlvInitSuffix = "$tlv$init"; + if (name.size() > tlvInitSuffix.size() && + name.compare(name.size() - tlvInitSuffix.size(), tlvInitSuffix.size(), tlvInitSuffix) == 0) { - outVarName = baseName; - if (outVarName.size() > 0) - outVarName[outVarName.size() - 1] += "$tlv$init"; - else - outVarName.push_back("$tlv$init"); - outType = baseType; - return true; + string base = name.substr(0, name.size() - tlvInitSuffix.size()); + Ref baseType; + StringList baseName; + if (DemangleStringGNU3Segments(platform, base, baseType, baseName)) + { + outVarName = std::move(baseName); + if (outVarName.size() > 0) + outVarName[outVarName.size() - 1] += "$tlv$init"; + else + outVarName.push_back("$tlv$init"); + outType = baseType; + return true; + } } - } - string encoding = name; - string header; - bool foundHeader = DemangleGlobalHeader(encoding, header); - - if (!encoding.compare(0, 2, "_Z")) - encoding = encoding.substr(2); - else if (!encoding.compare(0, 3, "__Z")) - encoding = encoding.substr(3); - else if (foundHeader && !header.empty()) - { - outVarName.clear(); - outVarName.push_back(header); - outVarName.push_back(encoding); - outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(); - return true; - } - else - return false; + string encoding = name; + string header; + bool foundHeader = DemangleGNU3Static::DemangleGlobalHeader(encoding, header); - thread_local DemangleGNU3 demangle(arch, encoding); - demangle.Reset(arch, encoding); - try - { - outType = demangle.DemangleSymbol(outVarName).Finalize(); + if (!encoding.compare(0, 2, "_Z")) + encoding = encoding.substr(2); + else if (!encoding.compare(0, 3, "__Z")) + encoding = encoding.substr(3); + else if (foundHeader && !header.empty()) + { + outVarName.clear(); + outVarName.push_back(header); + outVarName.push_back(encoding); + outType = DemangledTypeNode::NamedType(UnknownNamedTypeClass, outVarName).Finalize(platform); + return true; + } + else + return false; - if (outVarName.size() == 0) + thread_local DemangleGNU3 demangle(platform, encoding); + demangle.Reset(platform, encoding); + try { - if (outType->GetClass() == NamedTypeReferenceClass && outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) - { - outVarName = outType->GetTypeName(); - outType = nullptr; - } - else if (outType->GetClass() == NamedTypeReferenceClass) + outType = demangle.DemangleSymbol(outVarName).Finalize(platform); + + if (outVarName.size() == 0) { - auto typeName = outType->GetTypeName(); - if (typeName.size() > 0) - outVarName = "_" + typeName[typeName.size() - 1]; + if (GetFinalizedTypeClass(outType) == NamedTypeReferenceClass && + outType->GetNamedTypeReference()->GetTypeReferenceClass() == UnknownNamedTypeClass) + { + const auto typeName = outType->GetTypeName(); + outVarName = StringList(typeName.begin(), typeName.end()); + outType = nullptr; + } + else if (GetFinalizedTypeClass(outType) == NamedTypeReferenceClass) + { + auto typeName = outType->GetTypeName(); + if (typeName.size() > 0) + outVarName = StringList{"_" + typeName[typeName.size() - 1]}; + } } - } - if (foundHeader && !header.empty()) + if (foundHeader && !header.empty()) + outVarName.insert(outVarName.begin(), header); + } + catch (const DemangleException&) { - outVarName.insert(outVarName.begin(), header); + return false; } + return true; } - catch (std::exception&) - { +} + + +bool DemangleGNU3Static::DemangleStringGNU3(Platform* platform, const string& name, Ref& outType, + QualifiedName& outVarName) +{ + StringList outVarNameSegments; + if (!DemangleStringGNU3Segments(platform, name, outType, outVarNameSegments)) return false; - } + outVarName = QualifiedName(outVarNameSegments); return true; } +bool DemangleGNU3Static::DemangleStringGNU3(Architecture* arch, const string& name, Ref& outType, + QualifiedName& outVarName) +{ + Ref platform; + if (arch) + platform = arch->GetStandalonePlatform(); + return DemangleStringGNU3(platform.GetPtr(), name, outType, outVarName); +} + + // ===== Explicit template instantiation ===== @@ -3033,6 +3289,16 @@ class GNU3Demangler: public Demangler Ref view) override #endif { + if (view) + { + auto platform = view->GetDefaultPlatform(); + if (platform) +#ifdef BINARYNINJACORE_LIBRARY + return DemangleGNU3Static::DemangleStringGNU3(platform, name, outType, outVarName); +#else + return DemangleGNU3Static::DemangleStringGNU3(platform.GetPtr(), name, outType, outVarName); +#endif + } return DemangleGNU3Static::DemangleStringGNU3(arch, name, outType, outVarName); } }; diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 8ee0fca624..94e9a36326 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -94,13 +94,15 @@ class DemangleGNU3Reader class DemangleGNU3 { using ParamList = _STD_VECTOR; + using NodeRef = DemangledTypeNode::NodeRef; + using NodeRefList = _STD_VECTOR; - BN::QualifiedName m_varName; DemangleGNU3Reader m_reader; - BN::Architecture* m_arch; - _STD_VECTOR m_substitute; - _STD_VECTOR m_templateSubstitute; - _STD_VECTOR<_STD_VECTOR> m_functionSubstitute; + BN::Ref m_platform; + NodeRefList m_substitute; + NodeRefList m_templateSubstitute; + _STD_VECTOR m_functionSubstitute; + NodeRef m_lastTypeRef; _STD_STRING m_lastName; BNNameType m_nameType; bool m_localType; @@ -109,19 +111,32 @@ class DemangleGNU3 bool m_shouldDeleteReader; bool m_topLevel; bool m_isOperatorOverload; + bool m_parsingLambdaParams; + size_t m_lambdaTemplateParamBase; // Forward template reference support (for cv conversion operator types). // When m_permitForwardTemplateRefs is true, DemangleTemplateSubstitution() - // returns a placeholder instead of throwing for out-of-bounds template params. - // m_pendingForwardRefs records which param indices have placeholders so that - // ResolveForwardTemplateRefs() can patch them once template args are known. + // returns a shared placeholder node instead of throwing for out-of-bounds + // template params. m_pendingForwardRefs records those nodes so that + // ResolveForwardTemplateRefs() can replace their contents once args are known. bool m_permitForwardTemplateRefs; bool m_inLocalName; - struct ForwardRef { size_t index; }; + size_t m_nestingDepth; + struct ForwardRef + { + size_t index; + NodeRef typeRef; + }; _STD_VECTOR m_pendingForwardRefs; - void ResolveForwardTemplateRefs(DemangledTypeNode& type, const _STD_VECTOR<_STD_STRING>& args); - static _STD_STRING ForwardRefPlaceholder(size_t index); + class NestingGuard + { + DemangleGNU3& m_demangler; + public: + NestingGuard(DemangleGNU3& demangler); + ~NestingGuard(); + }; + void ResolveForwardTemplateRefs(DemangledTypeNode& type, const ParamList& args); enum SymbolType { Function, FunctionWithReturn, Data, VTable, Rtti, Name}; - BN::QualifiedName DemangleBaseUnresolvedName(); + StringList DemangleBaseUnresolvedName(); DemangledTypeNode DemangleUnresolvedType(); _STD_STRING DemangleUnarySuffixExpression(const _STD_STRING& op); _STD_STRING DemangleUnaryPrefixExpression(const _STD_STRING& op); @@ -138,20 +153,30 @@ class DemangleGNU3 DemangledTypeNode DemangleLocalName(); void DemangleCVQualifiers(bool& cnst, bool& vltl, bool& rstrct); - DemangledTypeNode DemangleSubstitution(); - DemangledTypeNode DemangleTemplateSubstitution(); - void DemangleTemplateArgs(_STD_VECTOR<_STD_STRING>& args, bool* hadNonTypeArg = nullptr); + DemangledTypeNode DemangleSubstitution(NodeRef* outTypeRef = nullptr); + DemangledTypeNode DemangleTemplateSubstitution(NodeRef* outTypeRef = nullptr); + bool DemangleTemplateArg(ParamList& args, bool* hadNonTypeArg = nullptr); + void DemangleTemplateArgs(ParamList& args, bool* hadNonTypeArg = nullptr); DemangledTypeNode DemangleFunction(bool cnst, bool vltl); DemangledTypeNode DemangleType(); int64_t DemangleNumber(); - DemangledTypeNode DemangleNestedName(bool* allTypeTemplateArgs = nullptr); - void PushTemplateType(const DemangledTypeNode& type); - void PushType(const DemangledTypeNode& type); + DemangledTypeNode DemangleNestedName(bool* allTypeTemplateArgs = nullptr, bool pushBareTemplatePrefix = true); + NodeRef PushTemplateType(NodeRef type); + NodeRef PushTemplateType(const DemangledTypeNode& type); + NodeRef PushTemplateType(DemangledTypeNode&& type); + NodeRef PushType(NodeRef type); + NodeRef PushType(const DemangledTypeNode& type); + NodeRef PushType(DemangledTypeNode&& type); + NodeRef GetTypeRef(size_t ref); const DemangledTypeNode& GetType(size_t ref); - DemangledTypeNode CreateUnknownType(const BN::QualifiedName& s); + DemangledTypeNode CreateUnknownType(const StringList& s); DemangledTypeNode CreateUnknownType(const _STD_STRING& s); static void ExtendTypeName(DemangledTypeNode& type, const _STD_STRING& extend); + static void ApplyTemplateArgs(DemangledTypeNode& type, ParamList args); + static void AppendTypeName(DemangledTypeNode& type, const DemangledTypeNode& extend); + static _STD_STRING LastTypeNameSegmentBase(const DemangledTypeNode& type); + static bool LastTypeNameSegmentHasTemplateArguments(const DemangledTypeNode& type); #ifdef GNUDEMANGLE_DEBUG const DemangledTypeNode& GetTemplateType(size_t ref); @@ -159,10 +184,9 @@ class DemangleGNU3 #endif public: - DemangleGNU3(BN::Architecture* arch, const _STD_STRING& mangledName); - void Reset(BN::Architecture* arch, const _STD_STRING& mangledName); - DemangledTypeNode DemangleSymbol(BN::QualifiedName& varName); - BN::QualifiedName GetVarName() const { return m_varName; } + DemangleGNU3(BN::Platform* platform, const _STD_STRING& mangledName); + void Reset(BN::Platform* platform, const _STD_STRING& mangledName); + DemangledTypeNode DemangleSymbol(StringList& varName); }; @@ -172,5 +196,6 @@ class DemangleGNU3Static static bool IsGNU3MangledString(const _STD_STRING& name); static bool DemangleGlobalHeader(_STD_STRING& name, _STD_STRING& header); + static bool DemangleStringGNU3(BN::Platform* platform, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName); static bool DemangleStringGNU3(BN::Architecture* arch, const _STD_STRING& name, BN::Ref& outType, BN::QualifiedName& outVarName); }; From c5532909b6c8b889a7d97a5719c61df549cb02a6 Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Wed, 10 Jun 2026 10:12:48 -0400 Subject: [PATCH 3/4] Move demangler type nodes out of GNU3 Share DemangledTypeNode across demangler implementations and add MAX_DEMANGLE_NODE_LENGTH for bounded formatting. --- demangler/{gnu3 => }/demangled_type_node.cpp | 0 demangler/{gnu3 => }/demangled_type_node.h | 0 demangler/gnu3/CMakeLists.txt | 4 +++- demangler/gnu3/demangle_gnu3.cpp | 3 +++ demangler/gnu3/demangle_gnu3.h | 2 +- demangler/msvc/CMakeLists.txt | 4 ++-- demangler/msvc/demangle_msvc.h | 6 +----- 7 files changed, 10 insertions(+), 9 deletions(-) rename demangler/{gnu3 => }/demangled_type_node.cpp (100%) rename demangler/{gnu3 => }/demangled_type_node.h (100%) diff --git a/demangler/gnu3/demangled_type_node.cpp b/demangler/demangled_type_node.cpp similarity index 100% rename from demangler/gnu3/demangled_type_node.cpp rename to demangler/demangled_type_node.cpp diff --git a/demangler/gnu3/demangled_type_node.h b/demangler/demangled_type_node.h similarity index 100% rename from demangler/gnu3/demangled_type_node.h rename to demangler/demangled_type_node.h diff --git a/demangler/gnu3/CMakeLists.txt b/demangler/gnu3/CMakeLists.txt index f957aa7708..02abad58a6 100644 --- a/demangler/gnu3/CMakeLists.txt +++ b/demangler/gnu3/CMakeLists.txt @@ -5,7 +5,9 @@ project(demangle_gnu3) file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c - *.h) + *.h + ../demangled_type_node.cpp + ../demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/gnu3/demangle_gnu3.cpp b/demangler/gnu3/demangle_gnu3.cpp index 2c0ef8d8ac..2121533764 100644 --- a/demangler/gnu3/demangle_gnu3.cpp +++ b/demangler/gnu3/demangle_gnu3.cpp @@ -30,6 +30,7 @@ using namespace std; static constexpr size_t MAX_DEMANGLE_NESTING_DEPTH = 1024; +static constexpr size_t MAX_DEMANGLE_NODE_LENGTH = 8192; static BNTypeClass GetFinalizedTypeClass(const Ref& type) { @@ -343,6 +344,8 @@ string DemangleGNU3Reader::ReadString(size_t count) { if (count > Length()) throw DemangleException(); + if (count > MAX_DEMANGLE_NODE_LENGTH) + throw DemangleException("Demangled node exceeds maximum length"); const string out = m_data.substr(m_offset, count); m_offset += count; diff --git a/demangler/gnu3/demangle_gnu3.h b/demangler/gnu3/demangle_gnu3.h index 94e9a36326..0e06baa1ba 100644 --- a/demangler/gnu3/demangle_gnu3.h +++ b/demangler/gnu3/demangle_gnu3.h @@ -35,7 +35,7 @@ #define _STD_VECTOR std::vector #endif -#include "demangled_type_node.h" +#include "demangler/demangled_type_node.h" class DemangleException: public std::exception { diff --git a/demangler/msvc/CMakeLists.txt b/demangler/msvc/CMakeLists.txt index 3536c899ab..f89aee5135 100644 --- a/demangler/msvc/CMakeLists.txt +++ b/demangler/msvc/CMakeLists.txt @@ -6,8 +6,8 @@ file(GLOB SOURCES CONFIGURE_DEPENDS *.cpp *.c *.h - ../gnu3/demangled_type_node.cpp - ../gnu3/demangled_type_node.h) + ../demangled_type_node.cpp + ../demangled_type_node.h) if(DEMO) add_library(${PROJECT_NAME} STATIC ${SOURCES}) diff --git a/demangler/msvc/demangle_msvc.h b/demangler/msvc/demangle_msvc.h index 168aa6de73..bb5e94ecb5 100644 --- a/demangler/msvc/demangle_msvc.h +++ b/demangler/msvc/demangle_msvc.h @@ -36,11 +36,7 @@ #define _STD_VECTOR std::vector #endif -#ifdef BINARYNINJACORE_LIBRARY -#include "demangler/gnu3/demangled_type_node.h" -#else -#include "../gnu3/demangled_type_node.h" -#endif +#include "demangler/demangled_type_node.h" class DemangleException: public std::exception { From 13c87ea9742baa851f606fd28dd75379a4889c7d Mon Sep 17 00:00:00 2001 From: Peter LaFosse Date: Mon, 22 Jun 2026 20:51:24 -0400 Subject: [PATCH 4/4] Fix demangler no-type result handling --- demangle.cpp | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/demangle.cpp b/demangle.cpp index bb7eb1326b..faf27ac21e 100644 --- a/demangle.cpp +++ b/demangle.cpp @@ -7,7 +7,7 @@ namespace BinaryNinja { bool DemangleGeneric(Ref arch, const std::string& name, Ref& outType, QualifiedName& outVarName, Ref view, bool simplify) { - BNType* apiType; + BNType* apiType = nullptr; BNQualifiedName apiVarName; bool success = BNDemangleGeneric( arch->m_object, name.c_str(), &apiType, &apiVarName, view ? view->m_object : nullptr, simplify); @@ -15,8 +15,7 @@ namespace BinaryNinja { if (!success) return false; - if (apiType) - outType = new Type(apiType); + outType = apiType ? new Type(apiType) : nullptr; outVarName = QualifiedName::FromAPIObject(&apiVarName); BNFreeQualifiedName(&apiVarName); return true; @@ -89,7 +88,7 @@ namespace BinaryNinja { bool DemangleGNU3(Ref arch, const std::string& mangledName, Ref& outType, QualifiedName& outVarName, const bool simplify) { - BNType* localType; + BNType* localType = nullptr; char** localVarName = nullptr; size_t localSize = 0; if (!BNDemangleGNU3(arch->GetObject(), mangledName.c_str(), &localType, &localVarName, &localSize, simplify)) @@ -242,7 +241,7 @@ namespace BinaryNinja { bool CoreDemangler::Demangle(Ref arch, const std::string& name, Ref& outType, QualifiedName& outVarName, Ref view) { - BNType* apiType; + BNType* apiType = nullptr; BNQualifiedName apiVarName; bool success = BNDemanglerDemangle( m_object, arch->m_object, name.c_str(), &apiType, &apiVarName, view ? view->m_object : nullptr); @@ -250,8 +249,7 @@ namespace BinaryNinja { if (!success) return false; - if (apiType) - outType = new Type(apiType); + outType = apiType ? new Type(apiType) : nullptr; outVarName = QualifiedName::FromAPIObject(&apiVarName); BNFreeQualifiedName(&apiVarName); return true;