Skip to content

New string concept #39

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 22 additions & 19 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
CMAKE_MINIMUM_REQUIRED( VERSION 3.10 )
PROJECT( cppcore )
SET ( CPPCORE_VERSION_MAJOR 0 )
SET ( CPPCORE_VERSION_MINOR 1 )
SET ( CPPCORE_VERSION_PATCH 0 )
SET ( CPPCORE_VERSION ${CPPCORE_VERSION_MAJOR}.${CPPCORE_VERSION_MINOR}.${CPPCORE_VERSION_PATCH} )
SET ( PROJECT_VERSION "${CPPCORE_VERSION}" )
SET( CPPCORE_VERSION_MAJOR 0 )
SET( CPPCORE_VERSION_MINOR 1 )
SET( CPPCORE_VERSION_PATCH 0 )
SET( CPPCORE_VERSION ${CPPCORE_VERSION_MAJOR}.${CPPCORE_VERSION_MINOR}.${CPPCORE_VERSION_PATCH} )
SET( PROJECT_VERSION "${CPPCORE_VERSION}" )

find_package(GTest)

Expand All @@ -17,11 +17,11 @@ option( CPPCORE_BUILD_UNITTESTS
"Build unit tests."
ON
)
option( CPPCORE_ASAN
option(CPPCORE_ASAN
"Enable AddressSanitizer."
OFF
)
option( CPPCORE_UBSAN
option(CPPCORE_UBSAN
"Enable Undefined Behavior sanitizer."
OFF
)
Expand All @@ -38,9 +38,9 @@ link_directories(
${CMAKE_HOME_DIRECTORY}/
)

SET( CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/lib )
SET( CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/lib )
SET( CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin )
SET(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/lib )
SET(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/lib )
SET(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_HOME_DIRECTORY}/bin )

if( WIN32 AND NOT CYGWIN )
set( CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /EHsc" ) # Force to always compile with W4
Expand All @@ -56,38 +56,39 @@ elseif ( "${CMAKE_CXX_COMPILER_ID}" STREQUAL "Clang" )
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -Wno-long-long -g -pedantic -std=c++11")
endif()

IF (ASSIMP_ASAN)
IF(CPPCORE_ASAN)
MESSAGE(STATUS "AddressSanitizer enabled")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=address")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=address")
ENDIF()

IF (ASSIMP_UBSAN)
IF(CPPCORE_UBSAN)
MESSAGE(STATUS "Undefined Behavior sanitizer enabled")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
SET(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
SET(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fsanitize=undefined -fno-sanitize-recover=all")
ENDIF()

SET ( cppcore_src
SET(cppcore_src
code/cppcore.cpp
include/cppcore/CPPCoreCommon.h
)

SET ( cppcore_common_src
SET(cppcore_common_src
include/cppcore/Common/Hash.h
include/cppcore/Common/TStringBase.h
include/cppcore/Common/TStringView.h
include/cppcore/Common/Variant.h
include/cppcore/Common/Sort.h
include/cppcore/Common/TBitField.h
include/cppcore/Common/TOptional.h
)

SET( cppcore_random_src
SET(cppcore_random_src
include/cppcore/Random/RandomGenerator.h
code/Random/RandomGenerator.cpp
)

SET ( cppcore_container_src
SET(cppcore_container_src
include/cppcore/Container/THashMap.h
include/cppcore/Container/TArray.h
include/cppcore/Container/TStaticArray.h
Expand All @@ -96,7 +97,7 @@ SET ( cppcore_container_src
include/cppcore/Container/TStaticArray.h
)

SET ( cppcore_memory_src
SET(cppcore_memory_src
include/cppcore/Memory/MemUtils.h
include/cppcore/Memory/TDefaultAllocator.h
include/cppcore/Memory/TStackAllocator.h
Expand Down Expand Up @@ -137,6 +138,8 @@ IF( CPPCORE_BUILD_UNITTESTS )
test/common/SortTest.cpp
test/common/TBitFieldTest.cpp
test/common/TOptionalTest.cpp
test/common/TStringViewTest.cpp
test/common/TStringBaseTest.cpp
)

SET( cppcore_container_test_src
Expand Down
16 changes: 9 additions & 7 deletions include/cppcore/CPPCoreCommon.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,19 +45,19 @@ namespace cppcore {
#endif

#ifdef CPPCORE_WINDOWS
# define CPPCORE_TAG_DLL_EXPORT __declspec(dllexport)
# define CPPCORE_TAG_DLL_IMPORT __declspec(dllimport )
# define CPPCORE_TAG_DLL_EXPORT __declspec(dllexport)
# define CPPCORE_TAG_DLL_IMPORT __declspec(dllimport )
# ifdef CPPCORE_BUILD
# define DLL_CPPCORE_EXPORT CPPCORE_TAG_DLL_EXPORT
# define DLL_CPPCORE_EXPORT CPPCORE_TAG_DLL_EXPORT
# else
# define DLL_CPPCORE_EXPORT CPPCORE_TAG_DLL_IMPORT
# define DLL_CPPCORE_EXPORT CPPCORE_TAG_DLL_IMPORT
# endif
// All disabled warnings for windows
# pragma warning( disable : 4251 ) // <class> needs to have dll-interface to be used by clients of class <class>
# define CPPCORE_STACK_ALLOC(size) ::alloca(size)
# define CPPCORE_STACK_ALLOC(size) ::alloca(size)
#else
# define DLL_CPPCORE_EXPORT __attribute__((visibility("default")))
# define CPPCORE_STACK_ALLOC(size) __builtin_alloca(size)
# define DLL_CPPCORE_EXPORT __attribute__((visibility("default")))
# define CPPCORE_STACK_ALLOC(size) __builtin_alloca(size)
#endif

//-------------------------------------------------------------------------------------------------
Expand Down Expand Up @@ -107,4 +107,6 @@ public: \
//-------------------------------------------------------------------------------------------------
#define CPPCORE_ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))

using HashId = uint64_t;

} // Namespace cppcore
2 changes: 1 addition & 1 deletion include/cppcore/Common/Sort.h
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ namespace cppcore {
}

/// @brief Implements a binary search algorithm.
/// @tparam T The type of the value
/// @tparam T The type of the value
/// @param key The key to search for
/// @param array The data to search in
/// @param num The number of elements to search
Expand Down
172 changes: 70 additions & 102 deletions include/cppcore/Common/TStringBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -23,30 +23,11 @@ CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#pragma once

#include "string.h"
#include <cppcore/Common/Hash.h>
#include <cppcore/CPPCoreCommon.h>
#include <malloc.h>

namespace cppcore {

template <class T>
struct Allocator {
inline T *alloc(size_t size, size_t alignment) {
return (T*) _aligned_malloc(size, alignment);
}

inline void dealloc(T *ptr) {
_aligned_free(ptr);
}

inline static size_t countChars(const T *ptr) {
if (nullptr != ptr) {
return 0;
}

return (::strlen(ptr));
}
};

//-------------------------------------------------------------------------------------------------
/// @class TStringBase
/// @ingroup CPPCore
Expand All @@ -57,135 +38,122 @@ template <class T>
class TStringBase {
public:
/// @brief The default class constructor.
TStringBase() noexcept;
TStringBase() noexcept = default;

/// @brief The class constructor with a pointer showing to the data buffer.
/// @param pPtr [in] The data buffer.
TStringBase(const T *pPtr);
TStringBase(const T *pPtr, size_t size);

/// @brief The class destructor.
~TStringBase();

void set(const T *ptr);
void set(const T *ptr, size_t size);
void clear();
void reset();
size_t size() const;
size_t capacity() const;
const T *c_str() const;

/// @brief Helper method to copy data into the string.
/// @param base [inout] The string data to copy in.
/// @param pPtr [in] The data source.
static void copyFrom(TStringBase<T> &base, const T *pPtr);
static void copyFrom(TStringBase<T> &base, const T *pPtr, size_t size);

bool operator == (const TStringBase<T> &rhs) const;
bool operator != (const TStringBase<T> &rhs) const;

T *m_pStringBuffer;
size_t m_size;
size_t m_capacity;
Allocator<T> mAllocator;
private:
static constexpr size_t InitSize = 256;
T mBuffer[InitSize] = {'\0'};
T *mStringBuffer{nullptr};
size_t mSize{0};
size_t mCapacity{256};
HashId mHashId{0};
};

template <class T>
inline TStringBase<T>::TStringBase() noexcept :
m_pStringBuffer(nullptr),
m_size(0),
m_capacity(0),
mAllocator() {
// empty
inline TStringBase<T>::TStringBase(const T *pPtr, size_t size) {
copyFrom(*this, pPtr, size);
mHashId = THash<HashId>::toHash(pPtr, mSize);
}
Comment on lines +75 to 78
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Fix hash calculation in constructor.

The hash is calculated using the input pointer pPtr and mSize before copyFrom completes. This could result in incorrect hash values since mSize may not be properly set yet, and the hash should be calculated from the stored data, not the input data.

 inline TStringBase<T>::TStringBase(const T *pPtr, size_t size) {
     copyFrom(*this, pPtr, size);
-    mHashId = THash<HashId>::toHash(pPtr, mSize);
+    mHashId = THash<HashId>::toHash(c_str(), mSize);
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
inline TStringBase<T>::TStringBase(const T *pPtr, size_t size) {
copyFrom(*this, pPtr, size);
mHashId = THash<HashId>::toHash(pPtr, mSize);
}
inline TStringBase<T>::TStringBase(const T *pPtr, size_t size) {
copyFrom(*this, pPtr, size);
mHashId = THash<HashId>::toHash(c_str(), mSize);
}
🤖 Prompt for AI Agents
In include/cppcore/Common/TStringBase.h around lines 75 to 78, the constructor
calculates the hash using pPtr and mSize before copyFrom sets mSize and copies
the data. To fix this, move the hash calculation to after the copyFrom call and
use the internal data pointer and mSize to compute the hash, ensuring the hash
reflects the stored data correctly.


template <class T>
inline TStringBase<T>::TStringBase(const T *pPtr) :
m_pStringBuffer(nullptr),
m_size(0),
m_capacity(0),
mAllocator() {
copyFrom(*this, pPtr);
inline TStringBase<T>::~TStringBase() {
clear();
}

template <class T>
inline TStringBase<T>::~TStringBase() {
if (m_pStringBuffer) {
mAllocator.dealloc(m_pStringBuffer);
m_pStringBuffer = nullptr;
inline void TStringBase<T>::set(const T *ptr, size_t size) {
reset();
if (nullptr != ptr) {
copyFrom(*this, ptr, size);
}
}

template <class T>
inline void TStringBase<T>::set(const T *ptr) {
mAllocator.dealloc(m_pStringBuffer);
if (nullptr != ptr) {
copyFrom(*this, ptr);
}
inline void TStringBase<T>::reset() {
mSize = 0u;
}

template <class T>
inline void TStringBase<T>::copyFrom(TStringBase<T> &base, const T *ptr) {
if (nullptr != ptr) {
base.m_size = Allocator<T>::countChars(ptr);
if (base.m_size) {
base.m_capacity = base.m_size + 1;
base.m_pStringBuffer = base.mAllocator.alloc(base.m_capacity, 16);
#ifdef CPPCORE_WINDOWS
::strncpy_s(base.m_pStringBuffer, base.m_capacity, ptr, base.m_size);
#else
::strncpy(base.m_pStringBuffer, ptr, base.m_size);
#endif
base.m_pStringBuffer[base.m_size] = '\0';
}
}
inline size_t TStringBase<T>::size() const {
return mSize;
}

template <class T>
inline bool TStringBase<T>::operator==( const TStringBase<T> &rhs ) const {
if (rhs.m_size != m_size) {
return false;
}
inline size_t TStringBase<T>::capacity() const {
return mCapacity;
}

for (size_t i = 0; i < m_size; ++i) {
if (rhs.m_pStringBuffer[i] != m_pStringBuffer[i]) {
return false;
}
template <class T>
inline const T *TStringBase<T>::c_str() const {
if (mStringBuffer != nullptr) {
return mStringBuffer;
}

return true;
return mBuffer;
}

template <class T>
inline bool TStringBase<T>::operator!=(const TStringBase<T> &rhs) const {
return !(*this == rhs);
inline void TStringBase<T>::clear() {
if (mStringBuffer != nullptr) {
delete [] mStringBuffer;
mStringBuffer = nullptr;
mCapacity = InitSize;
}
mSize = 0;
}
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🛠️ Refactor suggestion

Reset hash ID when clearing string data.

The clear() method should also reset the hash ID to maintain consistency.

 inline void TStringBase<T>::clear() {
     if (mStringBuffer != nullptr) {
         delete [] mStringBuffer;
         mStringBuffer = nullptr;
         mCapacity = InitSize;
     }
     mSize = 0;
+    mHashId = 0;
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
inline void TStringBase<T>::clear() {
if (mStringBuffer != nullptr) {
delete [] mStringBuffer;
mStringBuffer = nullptr;
mCapacity = InitSize;
}
mSize = 0;
}
inline void TStringBase<T>::clear() {
if (mStringBuffer != nullptr) {
delete [] mStringBuffer;
mStringBuffer = nullptr;
mCapacity = InitSize;
}
mSize = 0;
mHashId = 0;
}
🤖 Prompt for AI Agents
In include/cppcore/Common/TStringBase.h around lines 118 to 125, the clear()
method deletes the string buffer and resets size and capacity but does not reset
the hash ID. Modify the clear() method to also reset the hash ID member variable
to its initial state to ensure consistency after clearing the string data.


template <class TCharType>
class TStringView {
public:
TStringView(TCharType *ptr);
~TStringView();
size_t size() const;
TCharType *data() const;

private:
TCharType *mPtr;
size_t mLen;
};

template <class TCharType>
inline TStringView<TCharType>::TStringView(TCharType *ptr) :
mPtr(ptr),
mLen(0) {
if (nullptr != mPtr) {
mLen = strlen(ptr);
template <class T>
inline void TStringBase<T>::copyFrom(TStringBase<T> &base, const T *ptr, size_t size) {
if (ptr != nullptr) {
T *targetPtr = base.mBuffer;

if (size > 0) {
if (size > base.mCapacity) {
base.mStringBuffer = new T[size];
base.mCapacity = size;
targetPtr = base.mStringBuffer;
}
memcpy(targetPtr, ptr, size);
base.mSize = size;
}
}
}
Comment on lines +129 to 143
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue

Address unsafe memory operations and missing null termination.

The copyFrom method has several critical issues:

  1. Using memcpy for potentially non-trivial types is unsafe
  2. No null termination for C-style strings
  3. Missing size validation
 inline void TStringBase<T>::copyFrom(TStringBase<T> &base, const T *ptr, size_t size) {
+    static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
     if (ptr != nullptr) {
         T *targetPtr = base.mBuffer;
         
         if (size > 0) {
-            if (size > base.mCapacity) {
-                base.mStringBuffer = new T[size];
-                base.mCapacity = size;
+            size_t requiredCapacity = size + 1; // +1 for null terminator
+            if (requiredCapacity > base.mCapacity) {
+                base.mStringBuffer = new T[requiredCapacity];
+                base.mCapacity = requiredCapacity;
                 targetPtr = base.mStringBuffer;
             }
             memcpy(targetPtr, ptr, size);
+            targetPtr[size] = T{}; // Null termination
             base.mSize = size;
         }
     }
 }
📝 Committable suggestion

‼️ IMPORTANT
Carefully review the code before committing. Ensure that it accurately replaces the highlighted code, contains no missing lines, and has no issues with indentation. Thoroughly test & benchmark the code to ensure it meets the requirements.

Suggested change
inline void TStringBase<T>::copyFrom(TStringBase<T> &base, const T *ptr, size_t size) {
if (ptr != nullptr) {
T *targetPtr = base.mBuffer;
if (size > 0) {
if (size > base.mCapacity) {
base.mStringBuffer = new T[size];
base.mCapacity = size;
targetPtr = base.mStringBuffer;
}
memcpy(targetPtr, ptr, size);
base.mSize = size;
}
}
}
inline void TStringBase<T>::copyFrom(TStringBase<T> &base, const T *ptr, size_t size) {
static_assert(std::is_trivially_copyable_v<T>, "T must be trivially copyable");
if (ptr != nullptr) {
T *targetPtr = base.mBuffer;
if (size > 0) {
size_t requiredCapacity = size + 1; // +1 for null terminator
if (requiredCapacity > base.mCapacity) {
base.mStringBuffer = new T[requiredCapacity];
base.mCapacity = requiredCapacity;
targetPtr = base.mStringBuffer;
}
memcpy(targetPtr, ptr, size);
targetPtr[size] = T{}; // Null termination
base.mSize = size;
}
}
}
🤖 Prompt for AI Agents
In include/cppcore/Common/TStringBase.h around lines 128 to 142, the copyFrom
method uses memcpy which is unsafe for non-trivial types, lacks null termination
for C-style strings, and does not validate the size properly. Replace memcpy
with element-wise copy or std::copy to safely handle non-trivial types, ensure
the copied string is null-terminated by adding a terminator if applicable, and
add checks to validate that size does not exceed capacity or other logical
constraints before copying.


template <class TCharType>
inline TStringView<TCharType>::~TStringView() {
// empty
}
template <class T>
inline bool TStringBase<T>::operator == (const TStringBase<T> &rhs) const {
if (rhs.mSize != mSize) {
return false;
}

template <class TCharType>
inline size_t TStringView<TCharType>::size() const {
return mLen;

return mHashId == rhs.mHashId;
}

template <class TCharType>
inline TCharType *TStringView<TCharType>::data() const {
template <class T>
inline bool TStringBase<T>::operator != (const TStringBase<T> &rhs) const {
return !(*this == rhs);
}

} // namespace cppcore
Loading