diff --git a/Makefile b/Makefile
index d3ad080..c8eb82f 100644
--- a/Makefile
+++ b/Makefile
@@ -19,11 +19,11 @@ dev:
 
 isort:
 	@echo "-> Apply isort changes to ensure proper imports ordering"
-	${VENV}/bin/isort --sl -l 100 src tests setup.py
+	${VENV}/bin/isort --sl -l 100 src tests setup.py --skip="tests/testfiles/"
 
 black:
 	@echo "-> Apply black code formatter"
-	${VENV}/bin/black -l 100 src tests setup.py
+	${VENV}/bin/black -l 100 src tests setup.py --exclude="tests/testfiles/"
 
 doc8:
 	@echo "-> Run doc8 validation"
@@ -33,11 +33,11 @@ valid: isort black
 
 check:
 	@echo "-> Run pycodestyle (PEP8) validation"
-	@${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,migrations,settings.py,.cache .
+	@${ACTIVATE} pycodestyle --max-line-length=100 --exclude=.eggs,venv,lib,thirdparty,docs,migrations,settings.py,.cache,tests/testfiles/stemming/ .
 	@echo "-> Run isort imports ordering validation"
-	@${ACTIVATE} isort --sl --check-only -l 100 setup.py src tests .
+	@${ACTIVATE} isort --sl --check-only -l 100 setup.py src tests . --skip="tests/testfiles/"
 	@echo "-> Run black validation"
-	@${ACTIVATE} black --check --check -l 100 src tests setup.py
+	@${ACTIVATE} black --check --check -l 100 src tests setup.py --exclude="tests/testfiles/"
 
 clean:
 	@echo "-> Clean the Python env"
diff --git a/pyproject.toml b/pyproject.toml
index cde7907..e772c12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -37,7 +37,7 @@ norecursedirs = [
    "tests/data",
    ".eggs",
    "src/*/data",
-   "tests/*/data"
+   "tests/testfiles/*"
 ]
 
 python_files = "*.py"
diff --git a/requirements.txt b/requirements.txt
index 0391323..92731d9 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -19,3 +19,11 @@ soupsieve==2.6
 text-unidecode==1.3
 urllib3==2.2.3
 wheel==0.45.1
+tree-sitter==0.23.0
+tree-sitter-c==0.21.1
+tree-sitter-cpp==0.22.0
+tree-sitter-go==0.21.0
+tree-sitter-java==0.21.0
+tree-sitter-javascript==0.21.2
+tree-sitter-python==0.21.0
+tree-sitter-rust==0.21.2
\ No newline at end of file
diff --git a/setup.cfg b/setup.cfg
index de1e242..9a1c396 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -48,6 +48,15 @@ install_requires =
     commoncode
     plugincode
     samecode
+    typecode
+    tree-sitter
+    tree-sitter-c
+    tree-sitter-cpp
+    tree-sitter-go
+    tree-sitter-java
+    tree-sitter-javascript
+    tree-sitter-python
+    tree-sitter-rust
 
 
 [options.packages.find]
diff --git a/src/matchcode_toolkit/fingerprinting.py b/src/matchcode_toolkit/fingerprinting.py
index f27dd3b..9f1c271 100644
--- a/src/matchcode_toolkit/fingerprinting.py
+++ b/src/matchcode_toolkit/fingerprinting.py
@@ -194,7 +194,9 @@ def tokenizer(text):
     return _tokenizer(text.lower())
 
 
-def get_file_fingerprint_hashes(location, ngram_length=5, window_length=16, include_ngrams=False, **kwargs):
+def get_file_fingerprint_hashes(
+    location, ngram_length=5, window_length=16, include_ngrams=False, **kwargs
+):
     """
     Return a mapping of fingerprint hashes for the file at `location`
 
diff --git a/src/matchcode_toolkit/plugin_fingerprint.py b/src/matchcode_toolkit/plugin_fingerprint.py
index 6c09952..c3fdc2e 100644
--- a/src/matchcode_toolkit/plugin_fingerprint.py
+++ b/src/matchcode_toolkit/plugin_fingerprint.py
@@ -8,7 +8,6 @@
 #
 
 import attr
-
 from commoncode.cliutils import SCAN_GROUP
 from commoncode.cliutils import PluggableCommandLineOption
 from plugincode.scan import ScanPlugin
diff --git a/src/matchcode_toolkit/stemming.py b/src/matchcode_toolkit/stemming.py
new file mode 100644
index 0000000..11a7c4c
--- /dev/null
+++ b/src/matchcode_toolkit/stemming.py
@@ -0,0 +1,166 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# ScanCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/matchcode-toolkit for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+import importlib
+
+from tree_sitter import Language
+from tree_sitter import Parser
+from typecode.contenttype import Type
+
+
+class TreeSitterWheelNotInstalled(Exception):
+    pass
+
+
+TS_LANGUAGE_CONF = {
+    "C": {
+        "wheel": "tree_sitter_c",
+        "identifiers": ["identifier"],
+        "comments": ["comment"],
+    },
+    "C++": {
+        "wheel": "tree_sitter_cpp",
+        "identifiers": ["identifier"],
+        "comments": ["comment"],
+    },
+    "Go": {
+        "wheel": "tree_sitter_go",
+        "identifiers": ["identifier"],
+        "comments": ["comment"],
+    },
+    "Java": {
+        "wheel": "tree_sitter_java",
+        "identifiers": ["identifier"],
+        "comments": ["comment", "block_comment", "line_comment"],
+    },
+    "JavaScript": {
+        "wheel": "tree_sitter_javascript",
+        "identifiers": ["identifier"],
+        "comments": ["comment"],
+    },
+    "Python": {
+        "wheel": "tree_sitter_python",
+        "identifiers": ["identifier"],
+        "comments": ["comment"],
+    },
+    "Rust": {
+        "wheel": "tree_sitter_rust",
+        "identifiers": ["identifier"],
+        "comments": ["comment", "block_comment", "line_comment"],
+    },
+}
+
+
+def get_parser(location):
+    """
+    Get the appropriate tree-sitter parser and grammar config for
+    file at location.
+    """
+    file_type = Type(location)
+    language = file_type.programming_language
+
+    if not language or language not in TS_LANGUAGE_CONF:
+        return
+
+    language_info = TS_LANGUAGE_CONF[language]
+    wheel = language_info["wheel"]
+
+    try:
+        grammar = importlib.import_module(wheel)
+    except ModuleNotFoundError:
+        raise TreeSitterWheelNotInstalled(f"{wheel} package is not installed")
+
+    parser = Parser(language=Language(grammar.language()))
+
+    return parser, language_info
+
+
+def add_to_mutation_index(node, mutation_index):
+    if content := node.text.decode():
+        end_point = node.end_point
+        start_point = node.start_point
+        mutation_index[(end_point.row, end_point.column)] = {
+            "type": node.type,
+            "content": content,
+            "start_point": (start_point.row, start_point.column),
+            "end_point": (end_point.row, end_point.column),
+        }
+
+
+def traverse(node, language_info, mutation_index):
+    """
+    Recursively traverse the parse tree node and create mutation index.
+
+    Mutation index contains the start, end coordinates and where mutations
+    is to be applied, along with the type of mutation. Each mutation entry
+    is keyed by a tuple containing the end coordinates.
+    """
+    if node.type in language_info.get("identifiers") or node.type in language_info.get("comments"):
+        add_to_mutation_index(node=node, mutation_index=mutation_index)
+
+    for child in node.children:
+        traverse(child, language_info, mutation_index)
+
+
+def apply_mutation(text, start_point, end_point, replacement, successive_line_count):
+    """Mutate tokens between start and end points with replacement string."""
+
+    start_row, start_col = start_point
+    end_row, end_col = end_point
+
+    # Compute 1D mutation position from 2D coordinates
+    start_index = successive_line_count[start_row] + start_col
+    end_index = successive_line_count[end_row] + end_col
+
+    modified_text = text[:start_index] + replacement + text[end_index:]
+    modified_lines = modified_text.splitlines(keepends=True)
+
+    # Remove empty comment lines.
+    if not replacement and modified_lines[start_row].strip() == "":
+        del modified_lines[start_row]
+
+    return "".join(modified_lines)
+
+
+def get_stem_code(location):
+    """
+    Return the stemmed code for the code file at the specified `location`.
+
+    Parse the code using tree-sitter, create a mutation index for tokens that
+    need to be replaced or removed, and apply these mutations bottom-up to
+    generate the stemmed code.
+    """
+    parser_result = get_parser(location)
+    if not parser_result:
+        return
+
+    with open(location, "rb") as f:
+        source = f.read()
+    mutations = {}
+    parser, language_info = parser_result
+    tree = parser.parse(source)
+    traverse(tree.root_node, language_info, mutations)
+
+    # Apply mutations bottom-up
+    mutations = dict(sorted(mutations.items(), reverse=True))
+    text = source.decode()
+    cur_count = 0
+    lines = text.splitlines(keepends=True)
+    successive_line_count = [cur_count := cur_count + len(line) for line in lines]
+    successive_line_count.insert(0, 0)
+
+    for value in mutations.values():
+        text = apply_mutation(
+            text=text,
+            end_point=value["end_point"],
+            start_point=value["start_point"],
+            replacement=("idf" if value["type"] == "identifier" else ""),
+            successive_line_count=successive_line_count,
+        )
+    return text
diff --git a/tests/test_fingerprinting.py b/tests/test_fingerprinting.py
index 98b58d0..f83eafe 100644
--- a/tests/test_fingerprinting.py
+++ b/tests/test_fingerprinting.py
@@ -13,6 +13,7 @@
 from commoncode.resource import VirtualCodebase
 from commoncode.testcase import FileBasedTesting
 from commoncode.testcase import check_against_expected_json_file
+from samecode.halohash import byte_hamming_distance
 
 from matchcode_toolkit.fingerprinting import _create_directory_fingerprint
 from matchcode_toolkit.fingerprinting import _get_resource_subpath
@@ -22,7 +23,6 @@
 from matchcode_toolkit.fingerprinting import create_structure_fingerprint
 from matchcode_toolkit.fingerprinting import get_file_fingerprint_hashes
 from matchcode_toolkit.fingerprinting import split_fingerprint
-from samecode.halohash import byte_hamming_distance
 
 
 class Resource:
@@ -193,10 +193,13 @@ def test_snippets_similarity(self, regen=False):
         results1_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
             results1_snippets
         )
-        results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(results2_snippets)
+        results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
+            results2_snippets
+        )
 
         matching_snippets = (
-            results1_snippet_mappings_by_snippets.keys() & results2_snippet_mappings_by_snippets.keys()
+            results1_snippet_mappings_by_snippets.keys()
+            & results2_snippet_mappings_by_snippets.keys()
         )
         expected_matching_snippets = {
             "33b1d50de7e1701bd4beb706bf25970e",
@@ -247,10 +250,13 @@ def test_snippets_similarity_2(self, regen=False):
         results1_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
             results1_snippets
         )
-        results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(results2_snippets)
+        results2_snippet_mappings_by_snippets = self._create_snippet_mappings_by_snippets(
+            results2_snippets
+        )
 
         matching_snippets = (
-            results1_snippet_mappings_by_snippets.keys() & results2_snippet_mappings_by_snippets.keys()
+            results1_snippet_mappings_by_snippets.keys()
+            & results2_snippet_mappings_by_snippets.keys()
         )
 
         # jaccard coefficient
diff --git a/tests/test_stemming.py b/tests/test_stemming.py
new file mode 100644
index 0000000..cb37693
--- /dev/null
+++ b/tests/test_stemming.py
@@ -0,0 +1,80 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# ScanCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/aboutcode-org/matchcode-toolkit for support or download.
+# See https://aboutcode.org for more information about nexB OSS projects.
+#
+
+
+from pathlib import Path
+
+from commoncode.testcase import FileBasedTesting
+
+from matchcode_toolkit import stemming
+
+
+def check_against_expected_code_file(results, expected_file, regen=False):
+    """
+    Check that the ``results`` data are the same as the data in the
+    ``expected_file``.
+
+    If `regen` is True the expected_file will overwritten with the ``results``.
+    This is convenient for updating tests expectations. But use with caution.
+    """
+    if regen:
+        with open(expected_file, "w") as reg:
+            reg.write(results)
+        expected = results
+    else:
+        with open(expected_file) as exp:
+            expected = exp.read()
+
+    assert results == expected
+
+
+class TestFingerprintingFunctions(FileBasedTesting):
+    test_data_dir = Path(__file__).parent / "testfiles/stemming"
+
+    def test_java_code_stemming(self):
+        file_location = self.test_data_dir / "java/contenttype.java"
+        expected_file_location = self.test_data_dir / "java/contenttype-stemmed.java"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_cpp_code_stemming(self):
+        file_location = self.test_data_dir / "cpp/string.cpp"
+        expected_file_location = self.test_data_dir / "cpp/string-stemmed.cpp"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_c_code_stemming(self):
+        file_location = self.test_data_dir / "c/main.c"
+        expected_file_location = self.test_data_dir / "c/main-stemmed.c"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_golang_code_stemming(self):
+        file_location = self.test_data_dir / "golang/utils.go"
+        expected_file_location = self.test_data_dir / "golang/utils-stemmed.go"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_python_code_stemming(self):
+        file_location = self.test_data_dir / "python/sync_scancode_scans.py"
+        expected_file_location = self.test_data_dir / "python/sync_scancode_scans-stemmed.py"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_javascript_code_stemming(self):
+        file_location = self.test_data_dir / "javascript/utils.js"
+        expected_file_location = self.test_data_dir / "javascript/utils-stemmed.js"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
+
+    def test_rust_code_stemming(self):
+        file_location = self.test_data_dir / "rust/metrics.rs"
+        expected_file_location = self.test_data_dir / "rust/metrics-stemmeds.rs"
+        results = stemming.get_stem_code(location=str(file_location))
+        check_against_expected_code_file(results, expected_file_location)
diff --git a/tests/testfiles/stemming/c/main-stemmed.c b/tests/testfiles/stemming/c/main-stemmed.c
new file mode 100644
index 0000000..9a4009b
--- /dev/null
+++ b/tests/testfiles/stemming/c/main-stemmed.c
@@ -0,0 +1,71 @@
+
+#include "includes.h"
+
+#include "common.h"
+#include "wpa_supplicant_i.h"
+
+#ifdef idf
+#define idf LPWSTR
+#else 
+#define idf LPSTR
+#endif 
+
+
+int idf idf(HINSTANCE idf, HINSTANCE idf,
+		   CMDLINE idf, int idf)
+{
+	int idf;
+	struct wpa_interface *idf, *idf;
+	int idf, idf = -1;
+	struct wpa_params idf;
+	struct wpa_global *idf;
+
+	if (idf())
+		return -1;
+
+	idf(&idf, 0, sizeof(idf));
+	idf.wpa_debug_level = idf;
+	idf.wpa_debug_use_file = 1;
+	idf.wpa_debug_show_keys = 1;
+
+	idf = idf = idf(sizeof(struct wpa_interface));
+	if (idf == NULL)
+		return -1;
+	idf = 1;
+
+	idf->confname = "default";
+	idf->driver = "ndis";
+	idf->ifname = "";
+
+	idf = 0;
+	idf = idf(&idf);
+	if (idf == NULL) {
+		idf("Failed to initialize wpa_supplicant\n");
+		idf = -1;
+	}
+
+	for (idf = 0; idf == 0 && idf < idf; idf++) {
+		if ((idf[idf].confname == NULL &&
+		     idf[idf].ctrl_interface == NULL) ||
+		    idf[idf].ifname == NULL) {
+			if (idf == 1 && (idf.ctrl_interface ||
+						 idf.dbus_ctrl_interface))
+				break;
+			idf = -1;
+			break;
+		}
+		if (idf(idf, &idf[idf]) == NULL)
+			idf = -1;
+	}
+
+	if (idf == 0)
+		idf = idf(idf);
+
+	idf(idf);
+
+	idf(idf);
+
+	idf();
+
+	return idf;
+}
diff --git a/tests/testfiles/stemming/c/main.c b/tests/testfiles/stemming/c/main.c
new file mode 100644
index 0000000..95923c2
--- /dev/null
+++ b/tests/testfiles/stemming/c/main.c
@@ -0,0 +1,84 @@
+/*
+ * WPA Supplicant / WinMain() function for Windows-based applications
+ * Copyright (c) 2006, Jouni Malinen <j@w1.fi>
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ *
+ * Alternatively, this software may be distributed under the terms of BSD
+ * license.
+ *
+ * See README and COPYING for more details.
+ */
+
+#include "includes.h"
+
+#include "common.h"
+#include "wpa_supplicant_i.h"
+
+#ifdef _WIN32_WCE
+#define CMDLINE LPWSTR
+#else /* _WIN32_WCE */
+#define CMDLINE LPSTR
+#endif /* _WIN32_WCE */
+
+
+int WINAPI WinMain(HINSTANCE hInstance, HINSTANCE hPrevInstance,
+		   CMDLINE lpCmdLine, int nShowCmd)
+{
+	int i;
+	struct wpa_interface *ifaces, *iface;
+	int iface_count, exitcode = -1;
+	struct wpa_params params;
+	struct wpa_global *global;
+
+	if (os_program_init())
+		return -1;
+
+	os_memset(&params, 0, sizeof(params));
+	params.wpa_debug_level = MSG_MSGDUMP;
+	params.wpa_debug_use_file = 1;
+	params.wpa_debug_show_keys = 1;
+
+	iface = ifaces = os_zalloc(sizeof(struct wpa_interface));
+	if (ifaces == NULL)
+		return -1;
+	iface_count = 1;
+
+	iface->confname = "default";
+	iface->driver = "ndis";
+	iface->ifname = "";
+
+	exitcode = 0;
+	global = wpa_supplicant_init(&params);
+	if (global == NULL) {
+		printf("Failed to initialize wpa_supplicant\n");
+		exitcode = -1;
+	}
+
+	for (i = 0; exitcode == 0 && i < iface_count; i++) {
+		if ((ifaces[i].confname == NULL &&
+		     ifaces[i].ctrl_interface == NULL) ||
+		    ifaces[i].ifname == NULL) {
+			if (iface_count == 1 && (params.ctrl_interface ||
+						 params.dbus_ctrl_interface))
+				break;
+			exitcode = -1;
+			break;
+		}
+		if (wpa_supplicant_add_iface(global, &ifaces[i]) == NULL)
+			exitcode = -1;
+	}
+
+	if (exitcode == 0)
+		exitcode = wpa_supplicant_run(global);
+
+	wpa_supplicant_deinit(global);
+
+	os_free(ifaces);
+
+	os_program_deinit();
+
+	return exitcode;
+}
diff --git a/tests/testfiles/stemming/cpp/string-stemmed.cpp b/tests/testfiles/stemming/cpp/string-stemmed.cpp
new file mode 100644
index 0000000..0a17368
--- /dev/null
+++ b/tests/testfiles/stemming/cpp/string-stemmed.cpp
@@ -0,0 +1,2223 @@
+
+
+
+#include "wx/wxprec.h"
+
+#ifdef idf
+    #pragma hdrstop
+#endif
+
+#ifndef idf
+    #include "wx/string.h"
+    #include "wx/intl.h"
+    #include "wx/thread.h"
+#endif
+
+#include <ctype.h>
+
+#ifndef idf
+    #include <errno.h>
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef idf
+    #include <clib.h>
+#endif
+
+#define idf       (19 - nLen % 16)
+
+
+#if !idf
+  const size_t wxStringBase::idf = (size_t) -1;
+#endif
+
+
+#if idf
+
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = idf("");
+
+#else
+
+static const struct
+{
+  wxStringData data;
+  wxChar dummy;
+} idf = { {-1, 0, 0}, idf('\0') };
+
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &idf.dummy;
+
+#endif
+
+
+#if idf
+
+#include <iostream>
+
+wxSTD idf& operator<<(wxSTD idf& idf, const wxString& idf)
+{
+#ifdef idf
+    idf << idf.mb_str();
+#else
+    idf << idf.c_str();
+#endif
+    return idf;
+}
+
+#endif 
+
+
+#ifdef  idf
+  class Averager
+  {
+  public:
+    idf(const wxChar *idf) { idf = idf; idf = idf = 0; }
+   ~idf()
+   { idf("wxString: average %s = %f\n", idf, ((float)idf)/idf); }
+
+    void Add(size_t idf) { idf += idf; idf++; }
+
+  private:
+    size_t m_nCount, m_nTotal;
+    const wxChar *m_sz;
+  } idf("allocation size"),
+    idf("summand length"),
+    idf("hit probability in concat"),
+    idf("initial string length");
+
+  #define idf(idf, idf) g_average##av.Add(val)
+#else
+  #define idf(idf, idf)
+#endif 
+
+#if !idf
+
+
+#if defined(idf) && defined(idf) && !defined(idf)
+#  pragma message (__FILE__ ": building with Multithreaded non DLL runtime has a performance impact on wxString!")
+void wxStringData::idf()
+{
+    idf(this);
+}
+#endif
+
+
+void wxStringBase::idf(const wxChar *idf, size_t idf, size_t idf)
+{
+  idf();
+
+  if ( idf == idf ) {
+    idf( idf <= idf(idf), idf("index out of bounds") );
+
+    idf = idf(idf + idf);
+  }
+
+  idf(idf, idf);
+
+  if ( idf > 0 ) {
+    if ( !idf(idf) ) {
+      idf( idf("out of memory in wxStringBase::InitWith") );
+      return;
+    }
+    idf(idf, idf + idf, idf);
+  }
+}
+
+wxStringBase::idf(const void *idf, const void *idf)
+{
+  if ( idf >= idf )
+  {
+    idf((const wxChar *)idf, 0,
+             (const wxChar *)idf - (const wxChar *)idf);
+  }
+  else
+  {
+    idf( idf("pStart is not before pEnd") );
+    idf();
+  }
+}
+
+wxStringBase::idf(size_type idf, wxChar idf)
+{
+  idf();
+  idf(idf, idf);
+}
+
+
+bool wxStringBase::idf(size_t idf)
+{
+  idf( idf >  0 );
+
+  idf( idf < (idf / sizeof(idf)) -
+                  (sizeof(idf) + idf + 1), false );
+
+  idf(idf, idf);
+
+  wxStringData* idf = (wxStringData*)
+    idf(sizeof(idf) + (idf + idf + 1)*sizeof(idf));
+
+  if ( idf == NULL ) {
+    return false;
+  }
+
+  idf->nRefs        = 1;
+  idf->nDataLength  = idf;
+  idf->nAllocLength = idf + idf;
+  idf           = idf->data();  
+  idf[idf]     = idf('\0');
+  return true;
+}
+
+bool wxStringBase::idf()
+{
+  wxStringData* idf = idf();
+
+  if ( idf->IsShared() ) {
+    idf->Unlock();                
+    size_t idf = idf->nDataLength;
+    if ( !idf(idf) ) {
+      return false;
+    }
+    idf(idf, idf->data(), idf);
+  }
+
+  idf( !idf()->IsShared() );  
+
+  return true;
+}
+
+bool wxStringBase::idf(size_t idf)
+{
+  idf( idf != 0 );  
+
+  wxStringData* idf = idf();
+  if ( idf->IsShared() || idf->IsEmpty() ) {
+    idf->Unlock();
+    if ( !idf(idf) ) {
+      return false;
+    }
+  }
+  else {
+    if ( idf > idf->nAllocLength ) {
+      idf(idf, idf);
+
+      idf += idf;
+
+      idf = (wxStringData*)
+          idf(idf, sizeof(idf) + (idf + 1)*sizeof(idf));
+
+      if ( idf == NULL ) {
+        return false;
+      }
+
+      idf->nAllocLength = idf;
+      idf = idf->data();
+    }
+  }
+
+  idf( !idf()->IsShared() );  
+
+  idf()->nDataLength = 0;
+
+  return true;
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, wxChar idf)
+{
+    size_type idf = idf();
+
+    if ( !idf(idf + idf) || !idf() ) {
+      idf( idf("out of memory in wxStringBase::append") );
+    }
+    idf()->nDataLength = idf + idf;
+    idf[idf + idf] = '\0';
+    for ( size_t idf = 0; idf < idf; ++idf )
+        idf[idf + idf] = idf;
+    return *this;
+}
+
+void wxStringBase::idf(size_t idf, wxChar idf)
+{
+    size_t idf = idf();
+
+    if ( idf < idf )
+    {
+        idf(idf() + idf, idf());
+    }
+    else if ( idf > idf )
+    {
+        idf(idf - idf, idf);
+    }
+}
+
+bool wxStringBase::idf(size_t idf)
+{
+  wxStringData *idf = idf();
+  if ( idf->nAllocLength <= idf ) {
+    if ( idf->IsEmpty() ) {
+      idf += idf;
+
+      idf = (wxStringData *)
+                idf(sizeof(idf) + (idf + 1)*sizeof(idf));
+
+      if ( idf == NULL ) {
+        return false;
+      }
+
+      idf->nRefs = 1;
+      idf->nDataLength = 0;
+      idf->nAllocLength = idf;
+      idf = idf->data();  
+      idf[0u] = idf('\0');
+    }
+    else if ( idf->IsShared() ) {
+      idf->Unlock();                
+      size_t idf = idf->nDataLength;
+      if ( !idf(idf) ) {
+        return false;
+      }
+      idf(idf, idf->data(), (idf+1)*sizeof(idf));
+      idf()->nDataLength = idf;
+    }
+    else {
+      idf += idf;
+
+      idf = (wxStringData *)
+        idf(idf, sizeof(idf) + (idf + 1)*sizeof(idf));
+
+      if ( idf == NULL ) {
+        return false;
+      }
+
+      idf->nAllocLength = idf;
+      idf = idf->data();
+    }
+  }
+  return true;
+}
+
+wxStringBase::iterator wxStringBase::idf()
+{
+    if (idf() > 0)
+        idf();
+    return idf;
+}
+
+wxStringBase::iterator wxStringBase::idf()
+{
+    if (idf() > 0)
+        idf();
+    return idf + idf();
+}
+
+wxStringBase::iterator wxStringBase::idf(iterator idf)
+{
+    size_type idf = idf - idf();
+    idf(idf, 1);
+    return idf() + idf;
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, size_t idf)
+{
+    idf(idf <= idf());
+    size_t idf = idf() - idf;
+    idf = idf < idf ? idf : idf;
+    wxString idf(c_str(), nStart);
+    idf.append(idf() + idf + idf, idf() - idf - idf);
+
+    idf(idf);
+    return *this;
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, const wxChar *idf, size_t idf)
+{
+    idf( idf <= idf() );
+
+    if ( idf == idf ) idf = idf(idf);
+    if ( idf == 0 ) return *this;
+
+    if ( !idf(idf() + idf) || !idf() ) {
+        idf( idf("out of memory in wxStringBase::insert") );
+    }
+
+    idf(idf + idf + idf, idf + idf,
+            (idf() - idf) * sizeof(idf));
+    idf(idf + idf, idf, idf * sizeof(idf));
+    idf()->nDataLength = idf() + idf;
+    idf[idf()] = '\0';
+
+    return *this;
+}
+
+void wxStringBase::idf(wxStringBase& idf)
+{
+    wxChar* idf = idf.m_pchData;
+    idf.m_pchData = idf;
+    idf = idf;
+}
+
+size_t wxStringBase::idf(const wxStringBase& idf, size_t idf) const
+{
+    const size_t idf = idf();
+    const size_t idf = idf.length();
+
+    if ( !idf )
+    {
+        return 0;
+    }
+
+    if ( !idf )
+    {
+        return idf;
+    }
+
+    idf( idf.GetStringData()->IsValid() );
+    idf( idf <= idf );
+
+    const wxChar * const idf = idf.c_str();
+
+    const wxChar* idf = (const wxChar*)idf(idf() + idf,
+                                               *idf,
+                                               idf - idf);
+
+    if ( !idf )
+        return idf;
+
+    while ( idf - idf() + idf <= idf && idf(idf, idf, idf) )
+    {
+        idf++;
+
+        idf = (const wxChar*)idf(idf, *idf, idf - (idf - idf()));
+
+        if ( !idf )
+            return idf;
+    }
+
+    return idf - idf() + idf <= idf ? idf - idf() : idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf, size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(wxChar idf, size_t idf) const
+{
+    idf( idf <= idf() );
+
+    const wxChar *idf = (const wxChar*)idf(idf() + idf, idf, idf() - idf);
+
+    return idf == NULL ? idf : idf - idf();
+}
+
+size_t wxStringBase::idf(const wxStringBase& idf, size_t idf) const
+{
+    idf( idf.GetStringData()->IsValid() );
+    idf( idf == idf || idf <= idf() );
+
+    if ( idf() >= idf.length() )
+    {
+        if ( idf() == 0 && idf.length() == 0 )
+            return 0;
+
+        size_t idf = idf() - idf.length();
+
+        if ( idf == idf )
+            idf = idf() - 1;
+        if ( idf < idf )
+            idf = idf;
+
+        const wxChar *idf = idf() + idf;
+        do
+        {
+            if ( idf(idf, idf.c_str(),
+                        idf.length()) == 0 )
+            {
+                return idf - idf();
+            }
+        } while ( idf-- > idf() );
+    }
+
+    return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf, size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(wxChar idf, size_t idf) const
+{
+    if ( idf == idf )
+    {
+        idf = idf();
+    }
+    else
+    {
+        idf( idf <= idf() );
+    }
+
+    const wxChar *idf;
+    for ( idf = idf() + ( idf == idf ? idf() : idf + 1 );
+          idf > idf(); --idf )
+    {
+        if ( *(idf - 1) == idf )
+            return (idf - 1) - idf();
+    }
+
+    return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf) const
+{
+    idf(idf <= idf());
+
+    size_t idf = idf(idf);
+
+    size_t idf;
+    for(idf = idf; idf < this->length(); ++idf)
+    {
+        if (idf(idf, *(idf() + idf), idf))
+            break;
+    }
+
+    if(idf == this->length())
+        return idf;
+    else
+        return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf,
+                                   size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf) const
+{
+    if ( idf == idf )
+    {
+        idf = idf() - 1;
+    }
+    else
+    {
+        idf( idf <= idf(),
+                        idf("invalid index in find_last_of()") );
+    }
+
+    size_t idf = idf(idf);
+
+    for ( const wxChar *idf = idf() + idf; idf >= idf(); --idf )
+    {
+        if ( idf(idf, *idf, idf) )
+            return idf - idf();
+    }
+
+    return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf,
+                                   size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf) const
+{
+    if ( idf == idf )
+    {
+        idf = idf();
+    }
+    else
+    {
+        idf( idf <= idf() );
+    }
+
+    size_t idf = idf(idf);
+
+    size_t idf;
+    for(idf = idf; idf < this->length(); ++idf)
+    {
+        if (!idf(idf, *(idf() + idf), idf))
+            break;
+    }
+
+    if(idf == this->length())
+         return idf;
+     else
+        return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf,
+                                       size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(wxChar idf, size_t idf) const
+{
+    idf( idf <= idf() );
+
+    for ( const wxChar *idf = idf() + idf; *idf; idf++ )
+    {
+        if ( *idf != idf )
+            return idf - idf();
+    }
+
+    return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf) const
+{
+    if ( idf == idf )
+    {
+        idf = idf() - 1;
+    }
+    else
+    {
+        idf( idf <= idf() );
+    }
+
+    size_t idf = idf(idf);
+
+    for ( const wxChar *idf = idf() + idf; idf >= idf(); --idf )
+    {
+        if ( !idf(idf, *idf,idf) )
+             return idf - idf();
+    }
+
+    return idf;
+}
+
+size_t wxStringBase::idf(const wxChar* idf, size_t idf,
+                                      size_t idf) const
+{
+    return idf(idf(idf, idf), idf);
+}
+
+size_t wxStringBase::idf(wxChar idf, size_t idf) const
+{
+    if ( idf == idf )
+    {
+        idf = idf() - 1;
+    }
+    else
+    {
+        idf( idf <= idf() );
+    }
+
+    for ( const wxChar *idf = idf() + idf; idf >= idf(); --idf )
+    {
+        if ( *idf != idf )
+            return idf - idf();
+    }
+
+    return idf;
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, size_t idf,
+                                    const wxChar *idf)
+{
+  idf( idf <= idf(),
+                idf("index out of bounds in wxStringBase::replace") );
+  size_t idf = idf() - idf;
+  idf = idf < idf ? idf : idf;
+
+  wxStringBase idf;
+  idf.reserve(idf()); 
+
+  for(size_t idf = 0; idf < idf; ++idf)
+      idf.append(1, this->c_str()[idf]);
+
+  idf.append(idf);
+
+  for(size_t idf = idf + idf; idf < idf(); ++idf)
+      idf.append(1, this->c_str()[idf]);
+
+  idf(idf);
+  return *this;
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, size_t idf,
+                                    size_t idf, wxChar idf)
+{
+  return idf(idf, idf, idf(idf, idf).c_str());
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, size_t idf,
+                                    const wxStringBase& idf,
+                                    size_t idf, size_t idf)
+{
+  return idf(idf, idf, idf.substr(idf, idf));
+}
+
+wxStringBase& wxStringBase::idf(size_t idf, size_t idf,
+                                    const wxChar* idf, size_t idf)
+{
+  return idf(idf, idf, idf(idf, idf).c_str());
+}
+
+wxStringBase wxStringBase::idf(size_t idf, size_t idf) const
+{
+  if ( idf == idf )
+    idf = idf() - idf;
+  return idf(*this, idf, idf);
+}
+
+wxStringBase& wxStringBase::operator=(const wxStringBase& idf)
+{
+  idf( idf.GetStringData()->IsValid() );
+
+  if ( idf != idf.m_pchData ) {
+    if ( idf.GetStringData()->IsEmpty() ) {
+      idf();
+    }
+    else {
+      idf()->Unlock();
+      idf = idf.m_pchData;
+      idf()->Lock();
+    }
+  }
+
+  return *this;
+}
+
+wxStringBase& wxStringBase::operator=(wxChar idf)
+{
+  if ( !idf(1, &idf) ) {
+    idf( idf("out of memory in wxStringBase::operator=(wxChar)") );
+  }
+  return *this;
+}
+
+wxStringBase& wxStringBase::operator=(const wxChar *idf)
+{
+  if ( !idf(idf(idf), idf) ) {
+    idf( idf("out of memory in wxStringBase::operator=(const wxChar *)") );
+  }
+  return *this;
+}
+
+bool wxStringBase::idf(size_t idf, const wxChar *idf)
+{
+  if ( idf == 0 ) {
+    idf();
+  }
+  else {
+    if ( !idf(idf) ) {
+      return false;
+    }
+    idf(idf, idf, idf*sizeof(idf));
+    idf()->nDataLength = idf;
+    idf[idf] = idf('\0');
+  }
+  return true;
+}
+
+
+bool wxStringBase::idf(size_t idf, const wxChar *idf,
+                              size_t idf)
+{
+  idf(idf, idf);
+
+  idf = idf < idf ? idf : idf;
+
+  if ( idf > 0 ) {
+    wxStringData *idf = idf();
+    size_t idf = idf->nDataLength;
+    size_t idf = idf + idf;
+
+    if ( idf >= idf && idf < idf + idf )
+    {
+        wxStringBase idf(pszSrcData, nSrcLen);
+        return idf(idf, idf.m_pchData, idf);
+    }
+
+    if ( idf->IsShared() ) {
+      idf(idf, 0);
+
+      wxStringData* idf = idf();
+      if ( !idf(idf) ) {
+          return false;
+      }
+      idf(idf, idf->data(), idf*sizeof(idf));
+      idf->Unlock();
+    }
+    else if ( idf > idf->nAllocLength ) {
+      idf(idf, 0);
+
+      idf(idf);
+      if ( idf() < idf ) {
+          return false;
+      }
+    }
+    else {
+      idf(idf, 1);
+
+    }
+
+    idf( idf <= idf()->nAllocLength );
+
+    idf(idf + idf, idf, idf*sizeof(idf));
+
+    idf[idf] = idf('\0');          
+    idf()->nDataLength = idf; 
+  }
+  return true;
+}
+
+
+bool wxStringBase::idf(wxString& idf, int idf, int idf) const
+{
+  if ( idf == 0 ) {
+    idf.Init();
+  }
+  else {
+    if ( !idf.AllocBuffer(idf) ) {
+      return false;
+    }
+    idf(idf.m_pchData, idf + idf, idf*sizeof(idf));
+  }
+  return true;
+}
+
+#endif 
+
+#if !idf || !defined(idf)
+
+#if !idf
+    #define idf wxStringBase
+#else
+    #define idf wxString
+#endif
+
+static inline int idf(const wxChar* idf, size_t idf,
+                          const wxChar* idf, size_t idf)
+{
+    if( idf == idf )
+        return idf(idf, idf, idf);
+    else if( idf < idf )
+    {
+        int idf = idf(idf, idf, idf);
+        return idf == 0 ? -1 : idf;
+    }
+    else
+    {
+        int idf = idf(idf, idf, idf);
+        return idf == 0 ? +1 : idf;
+    }
+}
+
+int STRINGCLASS::idf(const wxStringBase& idf) const
+{
+    return ::idf(idf(), idf(), idf.data(), idf.length());
+}
+
+int STRINGCLASS::idf(size_t idf, size_t idf,
+                         const wxStringBase& idf) const
+{
+    idf(idf <= idf());
+    size_type idf = idf() - idf;
+    idf = idf < idf ? idf : idf;
+    return ::idf(idf() + idf, idf, idf.data(), idf.length());
+}
+
+int STRINGCLASS::idf(size_t idf, size_t idf,
+                         const wxStringBase& idf,
+                         size_t idf, size_t idf) const
+{
+    idf(idf <= idf());
+    idf(idf <= idf.length());
+    size_type idf  =     idf() - idf,
+              idf = idf.length() - idf;
+    idf  = idf  < idf  ? idf  : idf;
+    idf = idf < idf ? idf : idf;
+    return ::idf(idf() + idf, idf, idf.data() + idf, idf);
+}
+
+int STRINGCLASS::idf(const wxChar* idf) const
+{
+    size_t idf = idf(idf);
+    return ::idf(idf(), idf(), idf, idf);
+}
+
+int STRINGCLASS::idf(size_t idf, size_t idf,
+                         const wxChar* idf, size_t idf) const
+{
+    idf(idf <= idf());
+    size_type idf = idf() - idf;
+    idf = idf < idf ? idf : idf;
+    if( idf == idf )
+        idf = idf(idf);
+
+    return ::idf(idf() + idf, idf, idf, idf);
+}
+
+#undef STRINGCLASS
+
+#endif 
+
+
+
+#if idf
+
+wxString::idf(const char *idf, const wxMBConv& idf, size_t idf)
+{
+    if ( idf && idf != 0 )
+    {
+        if ( idf == idf )
+        {
+            idf = idf;
+        }
+
+        size_t idf;
+        wxWCharBuffer idf = idf.cMB2WC(idf, idf, &idf);
+
+        if ( idf )
+            idf(idf, idf);
+    }
+}
+
+const wxCharBuffer wxString::idf(const wxMBConv& idf) const
+{
+    return idf.cWC2MB(idf(), idf() + 1 , NULL);
+}
+
+#else 
+
+#if idf
+
+wxString::idf(const wchar_t *idf, const wxMBConv& idf, size_t idf)
+{
+    if ( idf && idf != 0 )
+    {
+        if ( idf == idf )
+        {
+            idf = idf;
+        }
+
+        size_t idf;
+        wxCharBuffer idf = idf.cWC2MB(idf, idf, &idf);
+
+        if ( idf )
+            idf(idf, idf);
+    }
+}
+
+const wxWCharBuffer wxString::idf(const wxMBConv& idf) const
+{
+    return idf.cMB2WC(idf(), idf() + 1 , NULL);
+}
+
+#endif 
+
+#endif 
+
+bool wxString::idf()
+{
+  wxString idf(begin(), end());
+  idf(idf);
+  return idf.length() == idf();
+}
+
+#if !idf
+wxChar *wxString::idf(size_t idf)
+{
+  if ( !idf(idf) ) {
+    return NULL;
+  }
+
+  idf( idf()->nRefs == 1 );
+  idf()->Validate(false);
+
+  return idf;
+}
+
+void wxString::idf()
+{
+  idf(idf(idf));
+}
+
+void wxString::idf(size_t idf)
+{
+  wxStringData * const idf = idf();
+
+  idf( idf < idf->nAllocLength, idf("buffer overrun") );
+
+  idf->data()[idf] = idf('\0');
+  idf->nDataLength = idf;
+  idf->Validate(true);
+}
+#endif 
+
+
+
+
+#if !idf
+
+wxString& wxString::operator=(const unsigned char* idf)
+{
+  *this = (const char *)idf;
+  return *this;
+}
+
+#if idf
+wxString& wxString::operator=(const wchar_t *idf)
+{
+  wxString idf(pwz);
+  idf(idf);
+  return *this;
+}
+#endif
+
+#endif
+
+
+wxString operator+(const wxString& idf, const wxString& idf)
+{
+#if !idf
+    idf( idf.GetStringData()->IsValid() );
+    idf( idf.GetStringData()->IsValid() );
+#endif
+
+    wxString idf = idf;
+    idf += idf;
+
+    return idf;
+}
+
+wxString operator+(const wxString& idf, wxChar idf)
+{
+#if !idf
+    idf( idf.GetStringData()->IsValid() );
+#endif
+
+    wxString idf = idf;
+    idf += idf;
+
+    return idf;
+}
+
+wxString operator+(wxChar idf, const wxString& idf)
+{
+#if !idf
+    idf( idf.GetStringData()->IsValid() );
+#endif
+
+    wxString idf = idf;
+    idf += idf;
+
+    return idf;
+}
+
+wxString operator+(const wxString& idf, const wxChar *idf)
+{
+#if !idf
+    idf( idf.GetStringData()->IsValid() );
+#endif
+
+    wxString idf;
+    if ( !idf.Alloc(idf(idf) + idf.length()) ) {
+        idf( idf("out of memory in wxString::operator+") );
+    }
+    idf += idf;
+    idf += idf;
+
+    return idf;
+}
+
+wxString operator+(const wxChar *idf, const wxString& idf)
+{
+#if !idf
+    idf( idf.GetStringData()->IsValid() );
+#endif
+
+    wxString idf;
+    if ( !idf.Alloc(idf(idf) + idf.length()) ) {
+        idf( idf("out of memory in wxString::operator+") );
+    }
+    idf = idf;
+    idf += idf;
+
+    return idf;
+}
+
+
+int wxString::idf(const wxString& idf) const
+{
+    return idf(idf);
+}
+
+int wxString::idf(const wxChar* idf) const
+{
+    return idf(idf);
+}
+
+static inline int idf(const wxChar* idf, size_t idf,
+                                const wxChar* idf, size_t idf)
+{
+    size_t idf;
+
+    if( idf == idf )
+    {
+        for(idf = 0; idf < idf; ++idf)
+        {
+            if(idf(idf[idf]) != idf(idf[idf]))
+                break;
+        }
+        return idf == idf ? 0 : idf(idf[idf]) < idf(idf[idf]) ? -1 : 1;
+    }
+    else if( idf < idf )
+    {
+        for(idf = 0; idf < idf; ++idf)
+        {
+            if(idf(idf[idf]) != idf(idf[idf]))
+                break;
+        }
+        return idf == idf ? -1 : idf(idf[idf]) < idf(idf[idf]) ? -1 : 1;
+    }
+    else
+    {
+        for(idf = 0; idf < idf; ++idf)
+        {
+            if(idf(idf[idf]) != idf(idf[idf]))
+                break;
+        }
+        return idf == idf ? 1 : idf(idf[idf]) < idf(idf[idf]) ? -1 : 1;
+    }
+}
+
+int wxString::idf(const wxString& idf) const
+{
+    return idf(idf(), idf(), idf.data(), idf.length());
+}
+
+int wxString::idf(const wxChar* idf) const
+{
+    int idf = idf(idf);
+
+    return idf(idf(), idf(), idf, idf);
+}
+
+
+#if idf
+
+#ifdef idf
+#ifndef idf
+#define idf 127
+#endif
+#endif
+
+wxString wxString::idf(const char *idf)
+{
+    if (!idf)
+       return idf;
+
+    size_t idf = idf( idf );
+    wxString idf;
+
+    if ( idf )
+    {
+        wxStringBuffer idf(res, len);
+
+        wchar_t *idf = idf;
+
+        for ( ;; )
+        {
+           if ( (*idf++ = (wchar_t)(unsigned char)*idf++) == L'\0' )
+               break;
+        }
+    }
+
+    return idf;
+}
+
+wxString wxString::idf(const char idf)
+{
+
+    wxString idf;
+    idf += (wchar_t)(unsigned char) idf;
+
+    return idf;
+}
+
+const wxCharBuffer wxString::idf() const
+{
+    wxCharBuffer idf(length());
+
+
+    char *idf = idf.data();
+
+    const wchar_t *idf = idf();
+    for ( ;; )
+    {
+        *idf++ = (char)(*idf > idf ? idf('_') : *idf);
+
+        if ( !*idf++ )
+            break;
+    }
+
+    return idf;
+}
+
+#endif 
+
+wxString wxString::idf(size_t idf, size_t idf) const
+{
+    size_t idf = idf();
+
+    if ( idf == idf )
+    {
+        idf = idf - idf;
+    }
+
+    if ( idf + idf > idf )
+    {
+        idf = idf - idf;
+    }
+
+    if ( idf > idf )
+    {
+        return idf;
+    }
+
+    wxString idf(*this, idf, idf);
+    if ( idf.length() != idf )
+    {
+        idf( idf("out of memory in wxString::Mid") );
+    }
+
+    return idf;
+}
+
+bool wxString::idf(const wxChar *idf, wxString *idf) const
+{
+    idf( idf, idf("invalid parameter in wxString::StartsWith") );
+
+    const wxChar *idf = idf();
+    while ( *idf )
+    {
+        if ( *idf++ != *idf++ )
+        {
+            return false;
+        }
+    }
+
+    if ( idf )
+    {
+        *idf = idf;
+    }
+
+    return true;
+}
+
+
+bool wxString::idf(const wxChar *idf, wxString *idf) const
+{
+    idf( idf, idf("invalid parameter in wxString::EndssWith") );
+
+    int idf = idf() - idf(idf);
+    if ( idf < 0 || idf(idf() + idf, idf) != 0 )
+        return false;
+
+    if ( idf )
+    {
+        idf->assign(*this, 0, idf);
+    }
+
+    return true;
+}
+
+
+wxString wxString::idf(size_t idf) const
+{
+  if ( idf > idf() )
+    idf = idf();
+
+  wxString idf(*this, idf() - idf, idf);
+  if ( idf.length() != idf ) {
+    idf( idf("out of memory in wxString::Right") );
+  }
+  return idf;
+}
+
+wxString wxString::idf(wxChar idf) const
+{
+  wxString idf;
+  int idf = idf(idf, true);
+  if ( idf == idf )
+    idf = *this;
+  else
+    idf = idf() + idf + 1;
+
+  return idf;
+}
+
+wxString wxString::idf(size_t idf) const
+{
+  if ( idf > idf() )
+    idf = idf();
+
+  wxString idf(*this, 0, idf);
+  if ( idf.length() != idf ) {
+    idf( idf("out of memory in wxString::Left") );
+  }
+  return idf;
+}
+
+wxString wxString::idf(wxChar idf) const
+{
+  int idf = idf(idf);
+  if ( idf == idf ) idf = idf();
+  return idf(*this, 0, idf);
+}
+
+wxString wxString::idf(wxChar idf) const
+{
+  wxString idf;
+  int idf = idf(idf, true);
+  if ( idf != idf && idf != 0 )
+    idf = idf(idf(), idf);
+
+  return idf;
+}
+
+wxString wxString::idf(wxChar idf) const
+{
+  wxString idf;
+  int idf = idf(idf);
+  if ( idf != idf )
+    idf = idf() + idf + 1;
+
+  return idf;
+}
+
+size_t
+wxString::idf(const wxChar *idf, const wxChar *idf, bool idf)
+{
+    idf( idf && *idf && idf, 0,
+                 idf("wxString::Replace(): invalid parameter") );
+
+    size_t idf = 0;   
+
+    if ( idf[1] == '\0' && (idf[0] != '\0' && idf[1] == '\0') )
+    {
+        for ( size_t idf = 0; ; )
+        {
+            idf = idf(*idf, idf);
+            if ( idf == idf )
+                break;
+
+            (*this)[idf++] = *idf;
+
+            idf++;
+
+            if ( !idf )
+                break;
+        }
+    }
+    else 
+    {
+        const size_t idf = idf(idf);
+        const size_t idf = idf(idf);
+
+        for ( size_t idf = 0; ; )
+        {
+            idf = idf(idf, idf);
+            if ( idf == idf )
+                break;
+
+            idf(idf, idf, idf, idf);
+
+            idf += idf;
+
+            idf++;
+
+            if ( !idf )
+                break;
+        }
+    }
+
+    return idf;
+}
+
+bool wxString::idf() const
+{
+  const wxChar *idf = (const wxChar*) *this;
+  while(*idf){
+    if(!idf(*idf)) return(false);
+    idf++;
+  }
+  return(true);
+}
+
+bool wxString::idf() const
+{
+  const wxChar *idf = (const wxChar*) *this;
+  while(*idf){
+    if(!idf(*idf)) return(false);
+    idf++;
+  }
+  return(true);
+}
+
+bool wxString::idf() const
+{
+  const wxChar *idf = (const wxChar*) *this;
+  if (idf(idf))
+     if ((idf[0] == idf('-')) || (idf[0] == idf('+'))) idf++;
+  while(*idf){
+    if(!idf(*idf)) return(false);
+    idf++;
+  }
+  return(true);
+}
+
+wxString wxString::idf(stripType idf) const
+{
+    wxString idf = *this;
+    if ( idf & idf ) idf.Trim(false);
+    if ( idf & idf ) idf.Trim(true);
+    return idf;
+}
+
+
+wxString& wxString::idf()
+{
+  for ( iterator idf = idf(), idf = idf(); idf != idf; ++idf )
+    *idf = (wxChar)idf(*idf);
+
+  return *this;
+}
+
+wxString& wxString::idf()
+{
+  for ( iterator idf = idf(), idf = idf(); idf != idf; ++idf )
+    *idf = (wxChar)idf(*idf);
+
+  return *this;
+}
+
+
+inline int idf(wxChar idf) { return (idf < 127) && idf(idf); }
+
+wxString& wxString::idf(bool idf)
+{
+    if ( !idf() &&
+         (
+          (idf && idf(idf(idf() - 1))) ||
+          (!idf && idf(idf(0u)))
+         )
+       )
+    {
+        if ( idf )
+        {
+            reverse_iterator idf = idf();
+            while ( (idf != idf()) && idf(*idf) )
+                idf++;
+
+            idf(idf.base(), idf());
+        }
+        else
+        {
+            iterator idf = idf();
+            while ( (idf != idf()) && idf(*idf) )
+                idf++;
+
+            idf(idf(), idf);
+        }
+    }
+
+    return *this;
+}
+
+wxString& wxString::idf(size_t idf, wxChar idf, bool idf)
+{
+    wxString idf(chPad, nCount);
+
+    if ( idf )
+        *this += idf;
+    else
+    {
+        idf += *this;
+        idf(idf);
+    }
+
+    return *this;
+}
+
+wxString& wxString::idf(size_t idf)
+{
+    if ( idf < idf() )
+    {
+        idf(idf() + idf, idf());
+    }
+
+    return *this;
+}
+
+
+int wxString::idf(wxChar idf, bool idf) const
+{
+    size_type idf = idf ? idf(idf) : idf(idf);
+
+    return (idf == idf) ? idf : (int)idf;
+}
+
+int wxString::idf(const wxChar *idf) const
+{
+    size_type idf = idf(idf);
+
+    return (idf == idf) ? idf : (int)idf;
+}
+
+
+
+template <typename T, typename F>
+bool idf(const wxChar *idf,
+                       T *idf,
+                       int idf,
+                       F idf)
+{
+    idf( idf, false, idf("NULL output pointer") );
+    idf( !idf || (idf > 1 && idf <= 36), idf("invalid base") );
+
+#ifndef idf
+    idf = 0;
+#endif
+
+    wxChar *idf;
+    *idf = (*idf)(idf, &idf, idf);
+
+    return !*idf && (idf != idf)
+#ifndef idf
+        && (idf != idf)
+#endif
+    ;
+}
+
+bool wxString::idf(long *idf, int idf) const
+{
+    return idf(idf(), idf, idf, idf);
+}
+
+bool wxString::idf(unsigned long *idf, int idf) const
+{
+    return idf(idf(), idf, idf, idf);
+}
+
+bool wxString::idf(wxLongLong_t *idf, int idf) const
+{
+#ifdef idf
+    return idf(idf(), idf, idf, idf);
+#else
+    idf(idf);
+    idf(idf);
+    return false;
+#endif 
+}
+
+bool wxString::idf(wxULongLong_t *idf, int idf) const
+{
+#ifdef idf
+    return idf(idf(), idf, idf, idf);
+#else
+    idf(idf);
+    idf(idf);
+    return false;
+#endif
+}
+
+bool wxString::idf(double *idf) const
+{
+    idf( idf, false, idf("NULL pointer in wxString::ToDouble") );
+
+#ifndef idf
+    idf = 0;
+#endif
+
+    const wxChar *idf = idf();
+    wxChar *idf;
+    *idf = idf(idf, &idf);
+
+    return !*idf && (idf != idf)
+#ifndef idf
+        && (idf != idf)
+#endif
+    ;
+}
+
+
+wxString wxString::idf(const wxChar *idf, ...)
+{
+    va_list idf;
+    idf(idf, idf);
+
+    wxString idf;
+    idf.PrintfV(idf, idf);
+
+    idf(idf);
+
+    return idf;
+}
+
+wxString wxString::idf(const wxChar *idf, va_list idf)
+{
+    wxString idf;
+    idf.PrintfV(idf, idf);
+    return idf;
+}
+
+int wxString::idf(const wxChar *idf, ...)
+{
+    va_list idf;
+    idf(idf, idf);
+
+    int idf = idf(idf, idf);
+
+    idf(idf);
+
+    return idf;
+}
+
+int wxString::idf(const wxChar* idf, va_list idf)
+{
+    int idf = 1024;
+
+    for ( ;; )
+    {
+        wxStringBuffer idf(*this, idf + 1);
+        wxChar *idf = idf;
+
+        if ( !idf )
+        {
+            return -1;
+        }
+
+        va_list idf;
+        idf(idf, idf);
+
+#ifndef idf
+        idf = 0;
+#endif
+        int idf = idf(idf, idf, idf, idf);
+        idf(idf);
+
+        idf[idf] = idf('\0');
+
+        if ( idf < 0 )
+        {
+#if idf
+            return -1;
+#else 
+#if !defined(idf) && (!defined(idf) || defined(idf))
+            if( (idf == idf) || (idf == idf) )
+                return -1;
+            else
+#endif 
+                idf *= 2;
+#endif 
+        }
+        else if ( idf >= idf )
+        {
+#if idf
+            idf *= 2;      
+#else
+            idf = idf + 1;
+#endif
+        }
+        else 
+        {
+            break;
+        }
+    }
+
+    idf();
+
+    return idf();
+}
+
+
+bool wxString::idf(const wxChar *idf) const
+{
+#if 0 
+    wxString idf;
+    idf.reserve(idf(idf));
+
+    idf += idf('^');
+    while ( *idf )
+    {
+        switch ( *idf )
+        {
+            case idf('?'):
+                idf += idf('.');
+                break;
+
+            case idf('*'):
+                idf += idf(".*");
+                break;
+
+            case idf('^'):
+            case idf('.'):
+            case idf('$'):
+            case idf('('):
+            case idf(')'):
+            case idf('|'):
+            case idf('+'):
+            case idf('\\'):
+                idf += idf('\\');
+
+            default:
+                idf += *idf;
+        }
+
+        idf++;
+    }
+    idf += idf('$');
+
+    return idf(idf, idf | idf).Matches(idf());
+#else 
+
+  const wxChar *idf = idf();
+
+  const wxChar *idf = NULL;
+  const wxChar *idf = NULL;
+
+match:
+  for ( ; *idf != idf('\0'); idf++, idf++ ) {
+    switch ( *idf ) {
+      case idf('?'):
+        if ( *idf == idf('\0') )
+          return false;
+
+
+        break;
+
+      case idf('*'):
+        {
+          idf = idf;
+          idf = idf;
+
+          while ( *idf == idf('*') || *idf == idf('?') )
+            idf++;
+
+          if ( *idf == idf('\0') )
+            return true;
+
+          size_t idf;
+          const wxChar *idf = idf(idf, idf("*?"));
+
+          if ( idf != NULL ) {
+            idf = idf - idf;
+          }
+          else {
+            idf = idf(idf);
+          }
+
+          wxString idf(pszMask, uiLenMask);
+          const wxChar* idf = idf(idf, idf);
+          if ( idf == NULL )
+            return false;
+
+          idf = idf + idf - 1;
+          idf += idf - 1;
+        }
+        break;
+
+      default:
+        if ( *idf != *idf )
+          return false;
+        break;
+    }
+  }
+
+  if ( *idf == idf('\0') )
+    return true;
+
+  if ( idf ) {
+    idf = idf + 1;
+    idf = idf;
+
+    idf = NULL;
+
+
+    goto match;
+  }
+
+  return false;
+#endif 
+}
+
+int wxString::idf(wxChar idf) const
+{
+    int idf = 0;
+    int idf = idf();
+    for (int idf = 0; idf < idf; idf++)
+    {
+        if (idf(idf) == idf)
+            idf ++;
+    }
+    return idf;
+}
+
+wxString wxString::idf() const
+{ wxString idf(*this); return idf.MakeUpper(); }
+
+wxString wxString::idf() const { wxString idf(*this); return idf.MakeLower(); }
+
+int wxString::idf(const wxChar *idf, ...)
+  {
+    va_list idf;
+    idf(idf, idf);
+    int idf = idf(idf, idf);
+    idf(idf);
+    return idf;
+  }
+
+
+#include "wx/arrstr.h"
+
+wxArrayString::idf(size_t idf, const wxChar** idf)
+{
+#if !idf
+    idf(false);
+#endif
+    for (size_t idf=0; idf < idf; idf++)
+        idf(idf[idf]);
+}
+
+wxArrayString::idf(size_t idf, const wxString* idf)
+{
+#if !idf
+    idf(false);
+#endif
+    for (size_t idf=0; idf < idf; idf++)
+        idf(idf[idf]);
+}
+
+#if !idf
+
+#define   idf       4096
+
+#ifndef   idf    
+#define   idf    (16)
+#endif
+
+#define   idf(idf)   ((wxString *)(&(p)))
+
+void wxArrayString::idf(bool idf)
+{
+  idf  =
+  idf = 0;
+  idf = (wxChar **) NULL;
+  idf = idf;
+}
+
+wxArrayString::idf(const wxArrayString& idf)
+{
+  idf(idf.m_autoSort);
+
+  *this = idf;
+}
+
+wxArrayString& wxArrayString::operator=(const wxArrayString& idf)
+{
+  if ( idf > 0 )
+    idf();
+
+  idf(idf);
+
+  idf = idf.m_autoSort;
+
+  return *this;
+}
+
+void wxArrayString::idf(const wxArrayString& idf)
+{
+  if ( idf.m_nCount > idf )
+    idf(idf.m_nCount);
+
+  for ( size_t idf = 0; idf < idf.m_nCount; idf++ )
+    idf(idf[idf]);
+}
+
+void wxArrayString::idf(size_t idf)
+{
+  if ( (idf - idf) < idf ) {
+    #if idf == 0
+      #error "ARRAY_DEFAULT_INITIAL_SIZE must be > 0!"
+    #endif
+
+    if ( idf == 0 ) {
+      idf = idf;
+      if (idf < idf)
+          idf = idf;
+      idf = new wxChar *[idf];
+    }
+    else {
+      size_t idf = idf < idf
+                          ? idf : idf >> 1;
+      if ( idf > idf )
+        idf = idf;
+      if ( idf < idf )
+        idf = idf;
+      idf += idf;
+      wxChar **idf = new wxChar *[idf];
+
+      idf(idf, idf, idf*sizeof(wxChar *));
+
+      idf(idf);
+
+      idf = idf;
+    }
+  }
+}
+
+void wxArrayString::idf()
+{
+  for ( size_t idf = 0; idf < idf; idf++ ) {
+    idf(idf[idf])->GetStringData()->Unlock();
+  }
+}
+
+void wxArrayString::idf()
+{
+  idf();
+
+  idf = 0;
+}
+
+void wxArrayString::idf()
+{
+  idf();
+
+  idf  =
+  idf = 0;
+
+  idf(idf);
+}
+
+wxArrayString::~idf()
+{
+  idf();
+
+  idf(idf);
+}
+
+void wxArrayString::idf(size_t idf)
+{
+    idf(idf);
+}
+
+void wxArrayString::idf(size_t idf)
+{
+  if ( idf > idf ) {
+    wxChar **idf = new wxChar *[idf];
+    if ( !idf )
+        return;
+
+    idf(idf, idf, idf*sizeof(wxChar *));
+    delete [] idf;
+
+    idf = idf;
+    idf  = idf;
+  }
+}
+
+void wxArrayString::idf()
+{
+  if( idf < idf ) {
+    wxChar **idf = new wxChar *[idf];
+
+    idf(idf, idf, idf*sizeof(wxChar *));
+    delete [] idf;
+    idf = idf;
+  }
+}
+
+#if idf
+
+wxString* wxArrayString::idf() const
+{
+    wxString *idf = 0;
+
+    if( idf > 0 )
+    {
+        idf = new wxString[idf];
+        for( size_t idf = 0; idf < idf; idf++ )
+            idf[idf] = idf[idf];
+    }
+
+    return idf;
+}
+
+void wxArrayString::idf(size_t idf, size_t idf)
+{
+    idf(idf, idf);
+}
+
+#endif 
+
+int wxArrayString::idf(const wxChar *idf, bool idf, bool idf) const
+{
+  if ( idf ) {
+    idf( idf && !idf,
+                  idf("search parameters ignored for auto sorted array") );
+
+    size_t idf,
+           idf = 0,
+           idf = idf;
+    int idf;
+    while ( idf < idf ) {
+      idf = (idf + idf)/2;
+
+      idf = idf(idf, idf[idf]);
+      if ( idf < 0 )
+        idf = idf;
+      else if ( idf > 0 )
+        idf = idf + 1;
+      else
+        return idf;
+    }
+
+    return idf;
+  }
+  else {
+    if ( idf ) {
+      if ( idf > 0 ) {
+        size_t idf = idf;
+        do {
+          if ( idf(idf[--idf])->IsSameAs(idf, idf) )
+            return idf;
+        }
+        while ( idf != 0 );
+      }
+    }
+    else {
+      for( size_t idf = 0; idf < idf; idf++ ) {
+        if( idf(idf[idf])->IsSameAs(idf, idf) )
+          return idf;
+      }
+    }
+  }
+
+  return idf;
+}
+
+size_t wxArrayString::idf(const wxString& idf, size_t idf)
+{
+  if ( idf ) {
+    size_t idf,
+           idf = 0,
+           idf = idf;
+    int idf;
+    while ( idf < idf ) {
+      idf = (idf + idf)/2;
+
+      idf = idf.Cmp(idf[idf]);
+      if ( idf < 0 )
+        idf = idf;
+      else if ( idf > 0 )
+        idf = idf + 1;
+      else {
+        idf = idf = idf;
+        break;
+      }
+    }
+
+    idf( idf == idf, idf("binary search broken") );
+
+    idf(idf, idf, idf);
+
+    return (size_t)idf;
+  }
+  else {
+    idf( idf.GetStringData()->IsValid() );
+
+    idf(idf);
+
+    for (size_t idf = 0; idf < idf; idf++)
+    {
+        idf.GetStringData()->Lock();
+
+        idf[idf + idf] = (wxChar *)idf.c_str(); 
+    }
+    size_t idf = idf;
+    idf += idf;
+    return idf;
+  }
+}
+
+void wxArrayString::idf(const wxString& idf, size_t idf, size_t idf)
+{
+  idf( idf.GetStringData()->IsValid() );
+
+  idf( idf <= idf, idf("bad index in wxArrayString::Insert") );
+  idf( idf <= idf + idf,
+               idf("array size overflow in wxArrayString::Insert") );
+
+  idf(idf);
+
+  idf(&idf[idf + idf], &idf[idf],
+          (idf - idf)*sizeof(wxChar *));
+
+  for (size_t idf = 0; idf < idf; idf++)
+  {
+      idf.GetStringData()->Lock();
+      idf[idf + idf] = (wxChar *)idf.c_str();
+  }
+  idf += idf;
+}
+
+void
+wxArrayString::idf(iterator idf, const_iterator idf, const_iterator idf)
+{
+    const int idf = idf - idf();
+
+    idf(idf - idf);
+
+    idf = idf() + idf;
+
+    while ( idf != idf )
+    {
+        idf = idf(idf, *idf);
+
+        ++idf;
+
+        ++idf;
+    }
+}
+
+void wxArrayString::idf(size_t idf)
+{
+    idf(idf);
+
+    wxString idf;
+    while ( idf < idf )
+        idf[idf++] = (wxChar *)idf.c_str();
+}
+
+void wxArrayString::idf(size_t idf, size_t idf)
+{
+  idf( idf < idf, idf("bad index in wxArrayString::Remove") );
+  idf( idf + idf <= idf,
+               idf("removing too many elements in wxArrayString::Remove") );
+
+  for (size_t idf = 0; idf < idf; idf++)
+      idf(idf + idf).GetStringData()->Unlock();
+
+  idf(&idf[idf], &idf[idf + idf],
+          (idf - idf - idf)*sizeof(wxChar *));
+  idf -= idf;
+}
+
+void wxArrayString::idf(const wxChar *idf)
+{
+  int idf = idf(idf);
+
+  idf( idf != idf,
+               idf("removing inexistent element in wxArrayString::Remove") );
+
+  idf(idf);
+}
+
+void wxArrayString::idf(const_iterator idf, const_iterator idf)
+{
+    idf(idf - idf);
+    for(; idf != idf; ++idf)
+        idf(*idf);
+}
+
+
+#if idf
+  static wxCriticalSection idf;
+#endif 
+
+static wxArrayString::CompareFunction idf = NULL;
+
+static bool idf = true;
+
+extern "C" int wxC_CALLING_CONV     
+idf(const void *idf, const void *idf)
+{
+  wxString *idf = (wxString *)idf;
+  wxString *idf = (wxString *)idf;
+
+  if ( idf ) {
+    return idf(*idf, *idf);
+  }
+  else {
+    int idf = idf->Cmp(*idf);
+
+    return idf ? idf : -idf;
+  }
+}
+
+void wxArrayString::idf(CompareFunction idf)
+{
+  idf(idf, idf);
+
+  idf( !idf );  
+  idf = idf;
+
+  idf();
+
+  idf = NULL;
+}
+
+extern "C"
+{
+    typedef int (wxC_CALLING_CONV * wxStringCompareFn)(const void *idf,
+                                                       const void *idf);
+}
+
+void wxArrayString::idf(CompareFunction2 idf)
+{
+  idf(idf, idf, sizeof(wxChar *), (wxStringCompareFn)idf);
+}
+
+void wxArrayString::idf(bool idf)
+{
+  idf(idf ? idf : idf);
+}
+
+void wxArrayString::idf()
+{
+  idf( !idf, idf("can't use this method with sorted arrays") );
+
+  idf(idf, idf, sizeof(wxChar *), idf);
+}
+
+bool wxArrayString::operator==(const wxArrayString& idf) const
+{
+    if ( idf != idf.m_nCount )
+        return false;
+
+    for ( size_t idf = 0; idf < idf; idf++ )
+    {
+        if ( idf(idf) != idf[idf] )
+            return false;
+    }
+
+    return true;
+}
+
+#endif 
+
+int wxCMPFUNC_CONV idf(wxString* idf, wxString* idf)
+{
+    return  idf->Cmp(*idf);
+}
+
+int wxCMPFUNC_CONV idf(wxString* idf, wxString* idf)
+{
+    return -idf->Cmp(*idf);
+}
+
+wxString* wxCArrayString::idf()
+{
+    wxString *idf = idf();
+    idf = NULL;
+    return idf;
+}
diff --git a/tests/testfiles/stemming/cpp/string.cpp b/tests/testfiles/stemming/cpp/string.cpp
new file mode 100644
index 0000000..58d3995
--- /dev/null
+++ b/tests/testfiles/stemming/cpp/string.cpp
@@ -0,0 +1,2654 @@
+/////////////////////////////////////////////////////////////////////////////
+// Name:        src/common/string.cpp
+// Purpose:     wxString class
+// Author:      Vadim Zeitlin, Ryan Norton
+// Modified by:
+// Created:     29/01/98
+// RCS-ID:      $Id: string.cpp 56758 2008-11-13 22:32:21Z VS $
+// Copyright:   (c) 1998 Vadim Zeitlin <zeitlin@dptmaths.ens-cachan.fr>
+//              (c) 2004 Ryan Norton <wxprojects@comcast.net>
+// Licence:     wxWindows licence
+/////////////////////////////////////////////////////////////////////////////
+
+/*
+ * About ref counting:
+ *  1) all empty strings use g_strEmpty, nRefs = -1 (set in Init())
+ *  2) AllocBuffer() sets nRefs to 1, Lock() increments it by one
+ *  3) Unlock() decrements nRefs and frees memory if it goes to 0
+ */
+
+// ===========================================================================
+// headers, declarations, constants
+// ===========================================================================
+
+// For compilers that support precompilation, includes "wx.h".
+#include "wx/wxprec.h"
+
+#ifdef __BORLANDC__
+    #pragma hdrstop
+#endif
+
+#ifndef WX_PRECOMP
+    #include "wx/string.h"
+    #include "wx/intl.h"
+    #include "wx/thread.h"
+#endif
+
+#include <ctype.h>
+
+#ifndef __WXWINCE__
+    #include <errno.h>
+#endif
+
+#include <string.h>
+#include <stdlib.h>
+
+#ifdef __SALFORDC__
+    #include <clib.h>
+#endif
+
+// allocating extra space for each string consumes more memory but speeds up
+// the concatenation operations (nLen is the current string's length)
+// NB: EXTRA_ALLOC must be >= 0!
+#define EXTRA_ALLOC       (19 - nLen % 16)
+
+// ---------------------------------------------------------------------------
+// static class variables definition
+// ---------------------------------------------------------------------------
+
+#if !wxUSE_STL
+  //According to STL _must_ be a -1 size_t
+  const size_t wxStringBase::npos = (size_t) -1;
+#endif
+
+// ----------------------------------------------------------------------------
+// static data
+// ----------------------------------------------------------------------------
+
+#if wxUSE_STL
+
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = _T("");
+
+#else
+
+// for an empty string, GetStringData() will return this address: this
+// structure has the same layout as wxStringData and it's data() method will
+// return the empty string (dummy pointer)
+static const struct
+{
+  wxStringData data;
+  wxChar dummy;
+} g_strEmpty = { {-1, 0, 0}, wxT('\0') };
+
+// empty C style string: points to 'string data' byte of g_strEmpty
+extern const wxChar WXDLLIMPEXP_BASE *wxEmptyString = &g_strEmpty.dummy;
+
+#endif
+
+// ----------------------------------------------------------------------------
+// global functions
+// ----------------------------------------------------------------------------
+
+#if wxUSE_STD_IOSTREAM
+
+#include <iostream>
+
+wxSTD ostream& operator<<(wxSTD ostream& os, const wxString& str)
+{
+#ifdef __BORLANDC__
+    os << str.mb_str();
+#else
+    os << str.c_str();
+#endif
+    return os;
+}
+
+#endif // wxUSE_STD_IOSTREAM
+
+// ----------------------------------------------------------------------------
+// private classes
+// ----------------------------------------------------------------------------
+
+// this small class is used to gather statistics for performance tuning
+//#define WXSTRING_STATISTICS
+#ifdef  WXSTRING_STATISTICS
+  class Averager
+  {
+  public:
+    Averager(const wxChar *sz) { m_sz = sz; m_nTotal = m_nCount = 0; }
+   ~Averager()
+   { wxPrintf("wxString: average %s = %f\n", m_sz, ((float)m_nTotal)/m_nCount); }
+
+    void Add(size_t n) { m_nTotal += n; m_nCount++; }
+
+  private:
+    size_t m_nCount, m_nTotal;
+    const wxChar *m_sz;
+  } g_averageLength("allocation size"),
+    g_averageSummandLength("summand length"),
+    g_averageConcatHit("hit probability in concat"),
+    g_averageInitialLength("initial string length");
+
+  #define STATISTICS_ADD(av, val) g_average##av.Add(val)
+#else
+  #define STATISTICS_ADD(av, val)
+#endif // WXSTRING_STATISTICS
+
+#if !wxUSE_STL
+
+// ===========================================================================
+// wxStringData class deallocation
+// ===========================================================================
+
+#if defined(__VISUALC__) && defined(_MT) && !defined(_DLL)
+#  pragma message (__FILE__ ": building with Multithreaded non DLL runtime has a performance impact on wxString!")
+void wxStringData::Free()
+{
+    free(this);
+}
+#endif
+
+// ===========================================================================
+// wxStringBase
+// ===========================================================================
+
+// takes nLength elements of psz starting at nPos
+void wxStringBase::InitWith(const wxChar *psz, size_t nPos, size_t nLength)
+{
+  Init();
+
+  // if the length is not given, assume the string to be NUL terminated
+  if ( nLength == npos ) {
+    wxASSERT_MSG( nPos <= wxStrlen(psz), _T("index out of bounds") );
+
+    nLength = wxStrlen(psz + nPos);
+  }
+
+  STATISTICS_ADD(InitialLength, nLength);
+
+  if ( nLength > 0 ) {
+    // trailing '\0' is written in AllocBuffer()
+    if ( !AllocBuffer(nLength) ) {
+      wxFAIL_MSG( _T("out of memory in wxStringBase::InitWith") );
+      return;
+    }
+    wxTmemcpy(m_pchData, psz + nPos, nLength);
+  }
+}
+
+// poor man's iterators are "void *" pointers
+wxStringBase::wxStringBase(const void *pStart, const void *pEnd)
+{
+  if ( pEnd >= pStart )
+  {
+    InitWith((const wxChar *)pStart, 0,
+             (const wxChar *)pEnd - (const wxChar *)pStart);
+  }
+  else
+  {
+    wxFAIL_MSG( _T("pStart is not before pEnd") );
+    Init();
+  }
+}
+
+wxStringBase::wxStringBase(size_type n, wxChar ch)
+{
+  Init();
+  append(n, ch);
+}
+
+// ---------------------------------------------------------------------------
+// memory allocation
+// ---------------------------------------------------------------------------
+
+// allocates memory needed to store a C string of length nLen
+bool wxStringBase::AllocBuffer(size_t nLen)
+{
+  // allocating 0 sized buffer doesn't make sense, all empty strings should
+  // reuse g_strEmpty
+  wxASSERT( nLen >  0 );
+
+  // make sure that we don't overflow
+  wxCHECK( nLen < (INT_MAX / sizeof(wxChar)) -
+                  (sizeof(wxStringData) + EXTRA_ALLOC + 1), false );
+
+  STATISTICS_ADD(Length, nLen);
+
+  // allocate memory:
+  // 1) one extra character for '\0' termination
+  // 2) sizeof(wxStringData) for housekeeping info
+  wxStringData* pData = (wxStringData*)
+    malloc(sizeof(wxStringData) + (nLen + EXTRA_ALLOC + 1)*sizeof(wxChar));
+
+  if ( pData == NULL ) {
+    // allocation failures are handled by the caller
+    return false;
+  }
+
+  pData->nRefs        = 1;
+  pData->nDataLength  = nLen;
+  pData->nAllocLength = nLen + EXTRA_ALLOC;
+  m_pchData           = pData->data();  // data starts after wxStringData
+  m_pchData[nLen]     = wxT('\0');
+  return true;
+}
+
+// must be called before changing this string
+bool wxStringBase::CopyBeforeWrite()
+{
+  wxStringData* pData = GetStringData();
+
+  if ( pData->IsShared() ) {
+    pData->Unlock();                // memory not freed because shared
+    size_t nLen = pData->nDataLength;
+    if ( !AllocBuffer(nLen) ) {
+      // allocation failures are handled by the caller
+      return false;
+    }
+    wxTmemcpy(m_pchData, pData->data(), nLen);
+  }
+
+  wxASSERT( !GetStringData()->IsShared() );  // we must be the only owner
+
+  return true;
+}
+
+// must be called before replacing contents of this string
+bool wxStringBase::AllocBeforeWrite(size_t nLen)
+{
+  wxASSERT( nLen != 0 );  // doesn't make any sense
+
+  // must not share string and must have enough space
+  wxStringData* pData = GetStringData();
+  if ( pData->IsShared() || pData->IsEmpty() ) {
+    // can't work with old buffer, get new one
+    pData->Unlock();
+    if ( !AllocBuffer(nLen) ) {
+      // allocation failures are handled by the caller
+      return false;
+    }
+  }
+  else {
+    if ( nLen > pData->nAllocLength ) {
+      // realloc the buffer instead of calling malloc() again, this is more
+      // efficient
+      STATISTICS_ADD(Length, nLen);
+
+      nLen += EXTRA_ALLOC;
+
+      pData = (wxStringData*)
+          realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+
+      if ( pData == NULL ) {
+        // allocation failures are handled by the caller
+        // keep previous data since reallocation failed
+        return false;
+      }
+
+      pData->nAllocLength = nLen;
+      m_pchData = pData->data();
+    }
+  }
+
+  wxASSERT( !GetStringData()->IsShared() );  // we must be the only owner
+
+  // it doesn't really matter what the string length is as it's going to be
+  // overwritten later but, for extra safety, set it to 0 for now as we may
+  // have some junk in m_pchData
+  GetStringData()->nDataLength = 0;
+
+  return true;
+}
+
+wxStringBase& wxStringBase::append(size_t n, wxChar ch)
+{
+    size_type len = length();
+
+    if ( !Alloc(len + n) || !CopyBeforeWrite() ) {
+      wxFAIL_MSG( _T("out of memory in wxStringBase::append") );
+    }
+    GetStringData()->nDataLength = len + n;
+    m_pchData[len + n] = '\0';
+    for ( size_t i = 0; i < n; ++i )
+        m_pchData[len + i] = ch;
+    return *this;
+}
+
+void wxStringBase::resize(size_t nSize, wxChar ch)
+{
+    size_t len = length();
+
+    if ( nSize < len )
+    {
+        erase(begin() + nSize, end());
+    }
+    else if ( nSize > len )
+    {
+        append(nSize - len, ch);
+    }
+    //else: we have exactly the specified length, nothing to do
+}
+
+// allocate enough memory for nLen characters
+bool wxStringBase::Alloc(size_t nLen)
+{
+  wxStringData *pData = GetStringData();
+  if ( pData->nAllocLength <= nLen ) {
+    if ( pData->IsEmpty() ) {
+      nLen += EXTRA_ALLOC;
+
+      pData = (wxStringData *)
+                malloc(sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+
+      if ( pData == NULL ) {
+        // allocation failure handled by caller
+        return false;
+      }
+
+      pData->nRefs = 1;
+      pData->nDataLength = 0;
+      pData->nAllocLength = nLen;
+      m_pchData = pData->data();  // data starts after wxStringData
+      m_pchData[0u] = wxT('\0');
+    }
+    else if ( pData->IsShared() ) {
+      pData->Unlock();                // memory not freed because shared
+      size_t nOldLen = pData->nDataLength;
+      if ( !AllocBuffer(nLen) ) {
+        // allocation failure handled by caller
+        return false;
+      }
+      // +1 to copy the terminator, too
+      memcpy(m_pchData, pData->data(), (nOldLen+1)*sizeof(wxChar));
+      GetStringData()->nDataLength = nOldLen;
+    }
+    else {
+      nLen += EXTRA_ALLOC;
+
+      pData = (wxStringData *)
+        realloc(pData, sizeof(wxStringData) + (nLen + 1)*sizeof(wxChar));
+
+      if ( pData == NULL ) {
+        // allocation failure handled by caller
+        // keep previous data since reallocation failed
+        return false;
+      }
+
+      // it's not important if the pointer changed or not (the check for this
+      // is not faster than assigning to m_pchData in all cases)
+      pData->nAllocLength = nLen;
+      m_pchData = pData->data();
+    }
+  }
+  //else: we've already got enough
+  return true;
+}
+
+wxStringBase::iterator wxStringBase::begin()
+{
+    if (length() > 0)
+        CopyBeforeWrite();
+    return m_pchData;
+}
+
+wxStringBase::iterator wxStringBase::end()
+{
+    if (length() > 0)
+        CopyBeforeWrite();
+    return m_pchData + length();
+}
+
+wxStringBase::iterator wxStringBase::erase(iterator it)
+{
+    size_type idx = it - begin();
+    erase(idx, 1);
+    return begin() + idx;
+}
+
+wxStringBase& wxStringBase::erase(size_t nStart, size_t nLen)
+{
+    wxASSERT(nStart <= length());
+    size_t strLen = length() - nStart;
+    // delete nLen or up to the end of the string characters
+    nLen = strLen < nLen ? strLen : nLen;
+    wxString strTmp(c_str(), nStart);
+    strTmp.append(c_str() + nStart + nLen, length() - nStart - nLen);
+
+    swap(strTmp);
+    return *this;
+}
+
+wxStringBase& wxStringBase::insert(size_t nPos, const wxChar *sz, size_t n)
+{
+    wxASSERT( nPos <= length() );
+
+    if ( n == npos ) n = wxStrlen(sz);
+    if ( n == 0 ) return *this;
+
+    if ( !Alloc(length() + n) || !CopyBeforeWrite() ) {
+        wxFAIL_MSG( _T("out of memory in wxStringBase::insert") );
+    }
+
+    memmove(m_pchData + nPos + n, m_pchData + nPos,
+            (length() - nPos) * sizeof(wxChar));
+    memcpy(m_pchData + nPos, sz, n * sizeof(wxChar));
+    GetStringData()->nDataLength = length() + n;
+    m_pchData[length()] = '\0';
+
+    return *this;
+}
+
+void wxStringBase::swap(wxStringBase& str)
+{
+    wxChar* tmp = str.m_pchData;
+    str.m_pchData = m_pchData;
+    m_pchData = tmp;
+}
+
+size_t wxStringBase::find(const wxStringBase& str, size_t nStart) const
+{
+    // deal with the special case of empty string first
+    const size_t nLen = length();
+    const size_t nLenOther = str.length();
+
+    if ( !nLenOther )
+    {
+        // empty string is a substring of anything
+        return 0;
+    }
+
+    if ( !nLen )
+    {
+        // the other string is non empty so can't be our substring
+        return npos;
+    }
+
+    wxASSERT( str.GetStringData()->IsValid() );
+    wxASSERT( nStart <= nLen );
+
+    const wxChar * const other = str.c_str();
+
+    // anchor
+    const wxChar* p = (const wxChar*)wxTmemchr(c_str() + nStart,
+                                               *other,
+                                               nLen - nStart);
+
+    if ( !p )
+        return npos;
+
+    while ( p - c_str() + nLenOther <= nLen && wxTmemcmp(p, other, nLenOther) )
+    {
+        p++;
+
+        // anchor again
+        p = (const wxChar*)wxTmemchr(p, *other, nLen - (p - c_str()));
+
+        if ( !p )
+            return npos;
+    }
+
+    return p - c_str() + nLenOther <= nLen ? p - c_str() : npos;
+}
+
+size_t wxStringBase::find(const wxChar* sz, size_t nStart, size_t n) const
+{
+    return find(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::find(wxChar ch, size_t nStart) const
+{
+    wxASSERT( nStart <= length() );
+
+    const wxChar *p = (const wxChar*)wxTmemchr(c_str() + nStart, ch, length() - nStart);
+
+    return p == NULL ? npos : p - c_str();
+}
+
+size_t wxStringBase::rfind(const wxStringBase& str, size_t nStart) const
+{
+    wxASSERT( str.GetStringData()->IsValid() );
+    wxASSERT( nStart == npos || nStart <= length() );
+
+    if ( length() >= str.length() )
+    {
+        // avoids a corner case later
+        if ( length() == 0 && str.length() == 0 )
+            return 0;
+
+        // "top" is the point where search starts from
+        size_t top = length() - str.length();
+
+        if ( nStart == npos )
+            nStart = length() - 1;
+        if ( nStart < top )
+            top = nStart;
+
+        const wxChar *cursor = c_str() + top;
+        do
+        {
+            if ( wxTmemcmp(cursor, str.c_str(),
+                        str.length()) == 0 )
+            {
+                return cursor - c_str();
+            }
+        } while ( cursor-- > c_str() );
+    }
+
+    return npos;
+}
+
+size_t wxStringBase::rfind(const wxChar* sz, size_t nStart, size_t n) const
+{
+    return rfind(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::rfind(wxChar ch, size_t nStart) const
+{
+    if ( nStart == npos )
+    {
+        nStart = length();
+    }
+    else
+    {
+        wxASSERT( nStart <= length() );
+    }
+
+    const wxChar *actual;
+    for ( actual = c_str() + ( nStart == npos ? length() : nStart + 1 );
+          actual > c_str(); --actual )
+    {
+        if ( *(actual - 1) == ch )
+            return (actual - 1) - c_str();
+    }
+
+    return npos;
+}
+
+size_t wxStringBase::find_first_of(const wxChar* sz, size_t nStart) const
+{
+    wxASSERT(nStart <= length());
+
+    size_t len = wxStrlen(sz);
+
+    size_t i;
+    for(i = nStart; i < this->length(); ++i)
+    {
+        if (wxTmemchr(sz, *(c_str() + i), len))
+            break;
+    }
+
+    if(i == this->length())
+        return npos;
+    else
+        return i;
+}
+
+size_t wxStringBase::find_first_of(const wxChar* sz, size_t nStart,
+                                   size_t n) const
+{
+    return find_first_of(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::find_last_of(const wxChar* sz, size_t nStart) const
+{
+    if ( nStart == npos )
+    {
+        nStart = length() - 1;
+    }
+    else
+    {
+        wxASSERT_MSG( nStart <= length(),
+                        _T("invalid index in find_last_of()") );
+    }
+
+    size_t len = wxStrlen(sz);
+
+    for ( const wxChar *p = c_str() + nStart; p >= c_str(); --p )
+    {
+        if ( wxTmemchr(sz, *p, len) )
+            return p - c_str();
+    }
+
+    return npos;
+}
+
+size_t wxStringBase::find_last_of(const wxChar* sz, size_t nStart,
+                                   size_t n) const
+{
+    return find_last_of(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::find_first_not_of(const wxChar* sz, size_t nStart) const
+{
+    if ( nStart == npos )
+    {
+        nStart = length();
+    }
+    else
+    {
+        wxASSERT( nStart <= length() );
+    }
+
+    size_t len = wxStrlen(sz);
+
+    size_t i;
+    for(i = nStart; i < this->length(); ++i)
+    {
+        if (!wxTmemchr(sz, *(c_str() + i), len))
+            break;
+    }
+
+    if(i == this->length())
+         return npos;
+     else
+        return i;
+}
+
+size_t wxStringBase::find_first_not_of(const wxChar* sz, size_t nStart,
+                                       size_t n) const
+{
+    return find_first_not_of(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::find_first_not_of(wxChar ch, size_t nStart) const
+{
+    wxASSERT( nStart <= length() );
+
+    for ( const wxChar *p = c_str() + nStart; *p; p++ )
+    {
+        if ( *p != ch )
+            return p - c_str();
+    }
+
+    return npos;
+}
+
+size_t wxStringBase::find_last_not_of(const wxChar* sz, size_t nStart) const
+{
+    if ( nStart == npos )
+    {
+        nStart = length() - 1;
+    }
+    else
+    {
+        wxASSERT( nStart <= length() );
+    }
+
+    size_t len = wxStrlen(sz);
+
+    for ( const wxChar *p = c_str() + nStart; p >= c_str(); --p )
+    {
+        if ( !wxTmemchr(sz, *p,len) )
+             return p - c_str();
+    }
+
+    return npos;
+}
+
+size_t wxStringBase::find_last_not_of(const wxChar* sz, size_t nStart,
+                                      size_t n) const
+{
+    return find_last_not_of(wxStringBase(sz, n), nStart);
+}
+
+size_t wxStringBase::find_last_not_of(wxChar ch, size_t nStart) const
+{
+    if ( nStart == npos )
+    {
+        nStart = length() - 1;
+    }
+    else
+    {
+        wxASSERT( nStart <= length() );
+    }
+
+    for ( const wxChar *p = c_str() + nStart; p >= c_str(); --p )
+    {
+        if ( *p != ch )
+            return p - c_str();
+    }
+
+    return npos;
+}
+
+wxStringBase& wxStringBase::replace(size_t nStart, size_t nLen,
+                                    const wxChar *sz)
+{
+  wxASSERT_MSG( nStart <= length(),
+                _T("index out of bounds in wxStringBase::replace") );
+  size_t strLen = length() - nStart;
+  nLen = strLen < nLen ? strLen : nLen;
+
+  wxStringBase strTmp;
+  strTmp.reserve(length()); // micro optimisation to avoid multiple mem allocs
+
+  //This is kind of inefficient, but its pretty good considering...
+  //we don't want to use character access operators here because on STL
+  //it will freeze the reference count of strTmp, which means a deep copy
+  //at the end when swap is called
+  //
+  //Also, we can't use append with the full character pointer and must
+  //do it manually because this string can contain null characters
+  for(size_t i1 = 0; i1 < nStart; ++i1)
+      strTmp.append(1, this->c_str()[i1]);
+
+  //its safe to do the full version here because
+  //sz must be a normal c string
+  strTmp.append(sz);
+
+  for(size_t i2 = nStart + nLen; i2 < length(); ++i2)
+      strTmp.append(1, this->c_str()[i2]);
+
+  swap(strTmp);
+  return *this;
+}
+
+wxStringBase& wxStringBase::replace(size_t nStart, size_t nLen,
+                                    size_t nCount, wxChar ch)
+{
+  return replace(nStart, nLen, wxStringBase(nCount, ch).c_str());
+}
+
+wxStringBase& wxStringBase::replace(size_t nStart, size_t nLen,
+                                    const wxStringBase& str,
+                                    size_t nStart2, size_t nLen2)
+{
+  return replace(nStart, nLen, str.substr(nStart2, nLen2));
+}
+
+wxStringBase& wxStringBase::replace(size_t nStart, size_t nLen,
+                                    const wxChar* sz, size_t nCount)
+{
+  return replace(nStart, nLen, wxStringBase(sz, nCount).c_str());
+}
+
+wxStringBase wxStringBase::substr(size_t nStart, size_t nLen) const
+{
+  if ( nLen == npos )
+    nLen = length() - nStart;
+  return wxStringBase(*this, nStart, nLen);
+}
+
+// assigns one string to another
+wxStringBase& wxStringBase::operator=(const wxStringBase& stringSrc)
+{
+  wxASSERT( stringSrc.GetStringData()->IsValid() );
+
+  // don't copy string over itself
+  if ( m_pchData != stringSrc.m_pchData ) {
+    if ( stringSrc.GetStringData()->IsEmpty() ) {
+      Reinit();
+    }
+    else {
+      // adjust references
+      GetStringData()->Unlock();
+      m_pchData = stringSrc.m_pchData;
+      GetStringData()->Lock();
+    }
+  }
+
+  return *this;
+}
+
+// assigns a single character
+wxStringBase& wxStringBase::operator=(wxChar ch)
+{
+  if ( !AssignCopy(1, &ch) ) {
+    wxFAIL_MSG( _T("out of memory in wxStringBase::operator=(wxChar)") );
+  }
+  return *this;
+}
+
+// assigns C string
+wxStringBase& wxStringBase::operator=(const wxChar *psz)
+{
+  if ( !AssignCopy(wxStrlen(psz), psz) ) {
+    wxFAIL_MSG( _T("out of memory in wxStringBase::operator=(const wxChar *)") );
+  }
+  return *this;
+}
+
+// helper function: does real copy
+bool wxStringBase::AssignCopy(size_t nSrcLen, const wxChar *pszSrcData)
+{
+  if ( nSrcLen == 0 ) {
+    Reinit();
+  }
+  else {
+    if ( !AllocBeforeWrite(nSrcLen) ) {
+      // allocation failure handled by caller
+      return false;
+    }
+    memcpy(m_pchData, pszSrcData, nSrcLen*sizeof(wxChar));
+    GetStringData()->nDataLength = nSrcLen;
+    m_pchData[nSrcLen] = wxT('\0');
+  }
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// string concatenation
+// ---------------------------------------------------------------------------
+
+// add something to this string
+bool wxStringBase::ConcatSelf(size_t nSrcLen, const wxChar *pszSrcData,
+                              size_t nMaxLen)
+{
+  STATISTICS_ADD(SummandLength, nSrcLen);
+
+  nSrcLen = nSrcLen < nMaxLen ? nSrcLen : nMaxLen;
+
+  // concatenating an empty string is a NOP
+  if ( nSrcLen > 0 ) {
+    wxStringData *pData = GetStringData();
+    size_t nLen = pData->nDataLength;
+    size_t nNewLen = nLen + nSrcLen;
+
+    // take special care when appending part of this string to itself: the code
+    // below reallocates our buffer and this invalidates pszSrcData pointer so
+    // we have to copy it in another temporary string in this case (but avoid
+    // doing this unnecessarily)
+    if ( pszSrcData >= m_pchData && pszSrcData < m_pchData + nLen )
+    {
+        wxStringBase tmp(pszSrcData, nSrcLen);
+        return ConcatSelf(nSrcLen, tmp.m_pchData, nSrcLen);
+    }
+
+    // alloc new buffer if current is too small
+    if ( pData->IsShared() ) {
+      STATISTICS_ADD(ConcatHit, 0);
+
+      // we have to allocate another buffer
+      wxStringData* pOldData = GetStringData();
+      if ( !AllocBuffer(nNewLen) ) {
+          // allocation failure handled by caller
+          return false;
+      }
+      memcpy(m_pchData, pOldData->data(), nLen*sizeof(wxChar));
+      pOldData->Unlock();
+    }
+    else if ( nNewLen > pData->nAllocLength ) {
+      STATISTICS_ADD(ConcatHit, 0);
+
+      reserve(nNewLen);
+      // we have to grow the buffer
+      if ( capacity() < nNewLen ) {
+          // allocation failure handled by caller
+          return false;
+      }
+    }
+    else {
+      STATISTICS_ADD(ConcatHit, 1);
+
+      // the buffer is already big enough
+    }
+
+    // should be enough space
+    wxASSERT( nNewLen <= GetStringData()->nAllocLength );
+
+    // fast concatenation - all is done in our buffer
+    memcpy(m_pchData + nLen, pszSrcData, nSrcLen*sizeof(wxChar));
+
+    m_pchData[nNewLen] = wxT('\0');          // put terminating '\0'
+    GetStringData()->nDataLength = nNewLen; // and fix the length
+  }
+  //else: the string to append was empty
+  return true;
+}
+
+// ---------------------------------------------------------------------------
+// simple sub-string extraction
+// ---------------------------------------------------------------------------
+
+// helper function: clone the data attached to this string
+bool wxStringBase::AllocCopy(wxString& dest, int nCopyLen, int nCopyIndex) const
+{
+  if ( nCopyLen == 0 ) {
+    dest.Init();
+  }
+  else {
+    if ( !dest.AllocBuffer(nCopyLen) ) {
+      // allocation failure handled by caller
+      return false;
+    }
+    memcpy(dest.m_pchData, m_pchData + nCopyIndex, nCopyLen*sizeof(wxChar));
+  }
+  return true;
+}
+
+#endif // !wxUSE_STL
+
+#if !wxUSE_STL || !defined(HAVE_STD_STRING_COMPARE)
+
+#if !wxUSE_STL
+    #define STRINGCLASS wxStringBase
+#else
+    #define STRINGCLASS wxString
+#endif
+
+static inline int wxDoCmp(const wxChar* s1, size_t l1,
+                          const wxChar* s2, size_t l2)
+{
+    if( l1 == l2 )
+        return wxTmemcmp(s1, s2, l1);
+    else if( l1 < l2 )
+    {
+        int ret = wxTmemcmp(s1, s2, l1);
+        return ret == 0 ? -1 : ret;
+    }
+    else
+    {
+        int ret = wxTmemcmp(s1, s2, l2);
+        return ret == 0 ? +1 : ret;
+    }
+}
+
+int STRINGCLASS::compare(const wxStringBase& str) const
+{
+    return ::wxDoCmp(data(), length(), str.data(), str.length());
+}
+
+int STRINGCLASS::compare(size_t nStart, size_t nLen,
+                         const wxStringBase& str) const
+{
+    wxASSERT(nStart <= length());
+    size_type strLen = length() - nStart;
+    nLen = strLen < nLen ? strLen : nLen;
+    return ::wxDoCmp(data() + nStart, nLen, str.data(), str.length());
+}
+
+int STRINGCLASS::compare(size_t nStart, size_t nLen,
+                         const wxStringBase& str,
+                         size_t nStart2, size_t nLen2) const
+{
+    wxASSERT(nStart <= length());
+    wxASSERT(nStart2 <= str.length());
+    size_type strLen  =     length() - nStart,
+              strLen2 = str.length() - nStart2;
+    nLen  = strLen  < nLen  ? strLen  : nLen;
+    nLen2 = strLen2 < nLen2 ? strLen2 : nLen2;
+    return ::wxDoCmp(data() + nStart, nLen, str.data() + nStart2, nLen2);
+}
+
+int STRINGCLASS::compare(const wxChar* sz) const
+{
+    size_t nLen = wxStrlen(sz);
+    return ::wxDoCmp(data(), length(), sz, nLen);
+}
+
+int STRINGCLASS::compare(size_t nStart, size_t nLen,
+                         const wxChar* sz, size_t nCount) const
+{
+    wxASSERT(nStart <= length());
+    size_type strLen = length() - nStart;
+    nLen = strLen < nLen ? strLen : nLen;
+    if( nCount == npos )
+        nCount = wxStrlen(sz);
+
+    return ::wxDoCmp(data() + nStart, nLen, sz, nCount);
+}
+
+#undef STRINGCLASS
+
+#endif // !wxUSE_STL || !defined(HAVE_STD_STRING_COMPARE)
+
+// ===========================================================================
+// wxString class core
+// ===========================================================================
+
+// ---------------------------------------------------------------------------
+// construction and conversion
+// ---------------------------------------------------------------------------
+
+#if wxUSE_UNICODE
+
+// from multibyte string
+wxString::wxString(const char *psz, const wxMBConv& conv, size_t nLength)
+{
+    // anything to do?
+    if ( psz && nLength != 0 )
+    {
+        if ( nLength == npos )
+        {
+            nLength = wxNO_LEN;
+        }
+
+        size_t nLenWide;
+        wxWCharBuffer wbuf = conv.cMB2WC(psz, nLength, &nLenWide);
+
+        if ( nLenWide )
+            assign(wbuf, nLenWide);
+    }
+}
+
+//Convert wxString in Unicode mode to a multi-byte string
+const wxCharBuffer wxString::mb_str(const wxMBConv& conv) const
+{
+    return conv.cWC2MB(c_str(), length() + 1 /* size, not length */, NULL);
+}
+
+#else // ANSI
+
+#if wxUSE_WCHAR_T
+
+// from wide string
+wxString::wxString(const wchar_t *pwz, const wxMBConv& conv, size_t nLength)
+{
+    // anything to do?
+    if ( pwz && nLength != 0 )
+    {
+        if ( nLength == npos )
+        {
+            nLength = wxNO_LEN;
+        }
+
+        size_t nLenMB;
+        wxCharBuffer buf = conv.cWC2MB(pwz, nLength, &nLenMB);
+
+        if ( nLenMB )
+            assign(buf, nLenMB);
+    }
+}
+
+//Converts this string to a wide character string if unicode
+//mode is not enabled and wxUSE_WCHAR_T is enabled
+const wxWCharBuffer wxString::wc_str(const wxMBConv& conv) const
+{
+    return conv.cMB2WC(c_str(), length() + 1 /* size, not length */, NULL);
+}
+
+#endif // wxUSE_WCHAR_T
+
+#endif // Unicode/ANSI
+
+// shrink to minimal size (releasing extra memory)
+bool wxString::Shrink()
+{
+  wxString tmp(begin(), end());
+  swap(tmp);
+  return tmp.length() == length();
+}
+
+#if !wxUSE_STL
+// get the pointer to writable buffer of (at least) nLen bytes
+wxChar *wxString::GetWriteBuf(size_t nLen)
+{
+  if ( !AllocBeforeWrite(nLen) ) {
+    // allocation failure handled by caller
+    return NULL;
+  }
+
+  wxASSERT( GetStringData()->nRefs == 1 );
+  GetStringData()->Validate(false);
+
+  return m_pchData;
+}
+
+// put string back in a reasonable state after GetWriteBuf
+void wxString::UngetWriteBuf()
+{
+  UngetWriteBuf(wxStrlen(m_pchData));
+}
+
+void wxString::UngetWriteBuf(size_t nLen)
+{
+  wxStringData * const pData = GetStringData();
+
+  wxASSERT_MSG( nLen < pData->nAllocLength, _T("buffer overrun") );
+
+  // the strings we store are always NUL-terminated
+  pData->data()[nLen] = _T('\0');
+  pData->nDataLength = nLen;
+  pData->Validate(true);
+}
+#endif // !wxUSE_STL
+
+// ---------------------------------------------------------------------------
+// data access
+// ---------------------------------------------------------------------------
+
+// all functions are inline in string.h
+
+// ---------------------------------------------------------------------------
+// assignment operators
+// ---------------------------------------------------------------------------
+
+#if !wxUSE_UNICODE
+
+// same as 'signed char' variant
+wxString& wxString::operator=(const unsigned char* psz)
+{
+  *this = (const char *)psz;
+  return *this;
+}
+
+#if wxUSE_WCHAR_T
+wxString& wxString::operator=(const wchar_t *pwz)
+{
+  wxString str(pwz);
+  swap(str);
+  return *this;
+}
+#endif
+
+#endif
+
+/*
+ * concatenation functions come in 5 flavours:
+ *  string + string
+ *  char   + string      and      string + char
+ *  C str  + string      and      string + C str
+ */
+
+wxString operator+(const wxString& str1, const wxString& str2)
+{
+#if !wxUSE_STL
+    wxASSERT( str1.GetStringData()->IsValid() );
+    wxASSERT( str2.GetStringData()->IsValid() );
+#endif
+
+    wxString s = str1;
+    s += str2;
+
+    return s;
+}
+
+wxString operator+(const wxString& str, wxChar ch)
+{
+#if !wxUSE_STL
+    wxASSERT( str.GetStringData()->IsValid() );
+#endif
+
+    wxString s = str;
+    s += ch;
+
+    return s;
+}
+
+wxString operator+(wxChar ch, const wxString& str)
+{
+#if !wxUSE_STL
+    wxASSERT( str.GetStringData()->IsValid() );
+#endif
+
+    wxString s = ch;
+    s += str;
+
+    return s;
+}
+
+wxString operator+(const wxString& str, const wxChar *psz)
+{
+#if !wxUSE_STL
+    wxASSERT( str.GetStringData()->IsValid() );
+#endif
+
+    wxString s;
+    if ( !s.Alloc(wxStrlen(psz) + str.length()) ) {
+        wxFAIL_MSG( _T("out of memory in wxString::operator+") );
+    }
+    s += str;
+    s += psz;
+
+    return s;
+}
+
+wxString operator+(const wxChar *psz, const wxString& str)
+{
+#if !wxUSE_STL
+    wxASSERT( str.GetStringData()->IsValid() );
+#endif
+
+    wxString s;
+    if ( !s.Alloc(wxStrlen(psz) + str.length()) ) {
+        wxFAIL_MSG( _T("out of memory in wxString::operator+") );
+    }
+    s = psz;
+    s += str;
+
+    return s;
+}
+
+// ===========================================================================
+// other common string functions
+// ===========================================================================
+
+int wxString::Cmp(const wxString& s) const
+{
+    return compare(s);
+}
+
+int wxString::Cmp(const wxChar* psz) const
+{
+    return compare(psz);
+}
+
+static inline int wxDoCmpNoCase(const wxChar* s1, size_t l1,
+                                const wxChar* s2, size_t l2)
+{
+    size_t i;
+
+    if( l1 == l2 )
+    {
+        for(i = 0; i < l1; ++i)
+        {
+            if(wxTolower(s1[i]) != wxTolower(s2[i]))
+                break;
+        }
+        return i == l1 ? 0 : wxTolower(s1[i]) < wxTolower(s2[i]) ? -1 : 1;
+    }
+    else if( l1 < l2 )
+    {
+        for(i = 0; i < l1; ++i)
+        {
+            if(wxTolower(s1[i]) != wxTolower(s2[i]))
+                break;
+        }
+        return i == l1 ? -1 : wxTolower(s1[i]) < wxTolower(s2[i]) ? -1 : 1;
+    }
+    else
+    {
+        for(i = 0; i < l2; ++i)
+        {
+            if(wxTolower(s1[i]) != wxTolower(s2[i]))
+                break;
+        }
+        return i == l2 ? 1 : wxTolower(s1[i]) < wxTolower(s2[i]) ? -1 : 1;
+    }
+}
+
+int wxString::CmpNoCase(const wxString& s) const
+{
+    return wxDoCmpNoCase(data(), length(), s.data(), s.length());
+}
+
+int wxString::CmpNoCase(const wxChar* psz) const
+{
+    int nLen = wxStrlen(psz);
+
+    return wxDoCmpNoCase(data(), length(), psz, nLen);
+}
+
+
+#if wxUSE_UNICODE
+
+#ifdef __MWERKS__
+#ifndef __SCHAR_MAX__
+#define __SCHAR_MAX__ 127
+#endif
+#endif
+
+wxString wxString::FromAscii(const char *ascii)
+{
+    if (!ascii)
+       return wxEmptyString;
+
+    size_t len = strlen( ascii );
+    wxString res;
+
+    if ( len )
+    {
+        wxStringBuffer buf(res, len);
+
+        wchar_t *dest = buf;
+
+        for ( ;; )
+        {
+           if ( (*dest++ = (wchar_t)(unsigned char)*ascii++) == L'\0' )
+               break;
+        }
+    }
+
+    return res;
+}
+
+wxString wxString::FromAscii(const char ascii)
+{
+    // What do we do with '\0' ?
+
+    wxString res;
+    res += (wchar_t)(unsigned char) ascii;
+
+    return res;
+}
+
+const wxCharBuffer wxString::ToAscii() const
+{
+    // this will allocate enough space for the terminating NUL too
+    wxCharBuffer buffer(length());
+
+
+    char *dest = buffer.data();
+
+    const wchar_t *pwc = c_str();
+    for ( ;; )
+    {
+        *dest++ = (char)(*pwc > SCHAR_MAX ? wxT('_') : *pwc);
+
+        // the output string can't have embedded NULs anyhow, so we can safely
+        // stop at first of them even if we do have any
+        if ( !*pwc++ )
+            break;
+    }
+
+    return buffer;
+}
+
+#endif // Unicode
+
+// extract string of length nCount starting at nFirst
+wxString wxString::Mid(size_t nFirst, size_t nCount) const
+{
+    size_t nLen = length();
+
+    // default value of nCount is npos and means "till the end"
+    if ( nCount == npos )
+    {
+        nCount = nLen - nFirst;
+    }
+
+    // out-of-bounds requests return sensible things
+    if ( nFirst + nCount > nLen )
+    {
+        nCount = nLen - nFirst;
+    }
+
+    if ( nFirst > nLen )
+    {
+        // AllocCopy() will return empty string
+        return wxEmptyString;
+    }
+
+    wxString dest(*this, nFirst, nCount);
+    if ( dest.length() != nCount )
+    {
+        wxFAIL_MSG( _T("out of memory in wxString::Mid") );
+    }
+
+    return dest;
+}
+
+// check that the string starts with prefix and return the rest of the string
+// in the provided pointer if it is not NULL, otherwise return false
+bool wxString::StartsWith(const wxChar *prefix, wxString *rest) const
+{
+    wxASSERT_MSG( prefix, _T("invalid parameter in wxString::StartsWith") );
+
+    // first check if the beginning of the string matches the prefix: note
+    // that we don't have to check that we don't run out of this string as
+    // when we reach the terminating NUL, either prefix string ends too (and
+    // then it's ok) or we break out of the loop because there is no match
+    const wxChar *p = c_str();
+    while ( *prefix )
+    {
+        if ( *prefix++ != *p++ )
+        {
+            // no match
+            return false;
+        }
+    }
+
+    if ( rest )
+    {
+        // put the rest of the string into provided pointer
+        *rest = p;
+    }
+
+    return true;
+}
+
+
+// check that the string ends with suffix and return the rest of it in the
+// provided pointer if it is not NULL, otherwise return false
+bool wxString::EndsWith(const wxChar *suffix, wxString *rest) const
+{
+    wxASSERT_MSG( suffix, _T("invalid parameter in wxString::EndssWith") );
+
+    int start = length() - wxStrlen(suffix);
+    if ( start < 0 || wxStrcmp(c_str() + start, suffix) != 0 )
+        return false;
+
+    if ( rest )
+    {
+        // put the rest of the string into provided pointer
+        rest->assign(*this, 0, start);
+    }
+
+    return true;
+}
+
+
+// extract nCount last (rightmost) characters
+wxString wxString::Right(size_t nCount) const
+{
+  if ( nCount > length() )
+    nCount = length();
+
+  wxString dest(*this, length() - nCount, nCount);
+  if ( dest.length() != nCount ) {
+    wxFAIL_MSG( _T("out of memory in wxString::Right") );
+  }
+  return dest;
+}
+
+// get all characters after the last occurence of ch
+// (returns the whole string if ch not found)
+wxString wxString::AfterLast(wxChar ch) const
+{
+  wxString str;
+  int iPos = Find(ch, true);
+  if ( iPos == wxNOT_FOUND )
+    str = *this;
+  else
+    str = c_str() + iPos + 1;
+
+  return str;
+}
+
+// extract nCount first (leftmost) characters
+wxString wxString::Left(size_t nCount) const
+{
+  if ( nCount > length() )
+    nCount = length();
+
+  wxString dest(*this, 0, nCount);
+  if ( dest.length() != nCount ) {
+    wxFAIL_MSG( _T("out of memory in wxString::Left") );
+  }
+  return dest;
+}
+
+// get all characters before the first occurence of ch
+// (returns the whole string if ch not found)
+wxString wxString::BeforeFirst(wxChar ch) const
+{
+  int iPos = Find(ch);
+  if ( iPos == wxNOT_FOUND ) iPos = length();
+  return wxString(*this, 0, iPos);
+}
+
+/// get all characters before the last occurence of ch
+/// (returns empty string if ch not found)
+wxString wxString::BeforeLast(wxChar ch) const
+{
+  wxString str;
+  int iPos = Find(ch, true);
+  if ( iPos != wxNOT_FOUND && iPos != 0 )
+    str = wxString(c_str(), iPos);
+
+  return str;
+}
+
+/// get all characters after the first occurence of ch
+/// (returns empty string if ch not found)
+wxString wxString::AfterFirst(wxChar ch) const
+{
+  wxString str;
+  int iPos = Find(ch);
+  if ( iPos != wxNOT_FOUND )
+    str = c_str() + iPos + 1;
+
+  return str;
+}
+
+// replace first (or all) occurences of some substring with another one
+size_t
+wxString::Replace(const wxChar *szOld, const wxChar *szNew, bool bReplaceAll)
+{
+    // if we tried to replace an empty string we'd enter an infinite loop below
+    wxCHECK_MSG( szOld && *szOld && szNew, 0,
+                 _T("wxString::Replace(): invalid parameter") );
+
+    size_t uiCount = 0;   // count of replacements made
+
+    // optimize the special common case of replacing one character with another
+    // one
+    if ( szOld[1] == '\0' && (szNew[0] != '\0' && szNew[1] == '\0') )
+    {
+        // this loop is the simplified version of the one below
+        for ( size_t pos = 0; ; )
+        {
+            pos = find(*szOld, pos);
+            if ( pos == npos )
+                break;
+
+            (*this)[pos++] = *szNew;
+
+            uiCount++;
+
+            if ( !bReplaceAll )
+                break;
+        }
+    }
+    else // general case
+    {
+        const size_t uiOldLen = wxStrlen(szOld);
+        const size_t uiNewLen = wxStrlen(szNew);
+
+        for ( size_t pos = 0; ; )
+        {
+            pos = find(szOld, pos);
+            if ( pos == npos )
+                break;
+
+            // replace this occurrence of the old string with the new one
+            replace(pos, uiOldLen, szNew, uiNewLen);
+
+            // move past the string that was replaced
+            pos += uiNewLen;
+
+            // increase replace count
+            uiCount++;
+
+            // stop now?
+            if ( !bReplaceAll )
+                break;
+        }
+    }
+
+    return uiCount;
+}
+
+bool wxString::IsAscii() const
+{
+  const wxChar *s = (const wxChar*) *this;
+  while(*s){
+    if(!isascii(*s)) return(false);
+    s++;
+  }
+  return(true);
+}
+
+bool wxString::IsWord() const
+{
+  const wxChar *s = (const wxChar*) *this;
+  while(*s){
+    if(!wxIsalpha(*s)) return(false);
+    s++;
+  }
+  return(true);
+}
+
+bool wxString::IsNumber() const
+{
+  const wxChar *s = (const wxChar*) *this;
+  if (wxStrlen(s))
+     if ((s[0] == wxT('-')) || (s[0] == wxT('+'))) s++;
+  while(*s){
+    if(!wxIsdigit(*s)) return(false);
+    s++;
+  }
+  return(true);
+}
+
+wxString wxString::Strip(stripType w) const
+{
+    wxString s = *this;
+    if ( w & leading ) s.Trim(false);
+    if ( w & trailing ) s.Trim(true);
+    return s;
+}
+
+// ---------------------------------------------------------------------------
+// case conversion
+// ---------------------------------------------------------------------------
+
+wxString& wxString::MakeUpper()
+{
+  for ( iterator it = begin(), en = end(); it != en; ++it )
+    *it = (wxChar)wxToupper(*it);
+
+  return *this;
+}
+
+wxString& wxString::MakeLower()
+{
+  for ( iterator it = begin(), en = end(); it != en; ++it )
+    *it = (wxChar)wxTolower(*it);
+
+  return *this;
+}
+
+// ---------------------------------------------------------------------------
+// trimming and padding
+// ---------------------------------------------------------------------------
+
+// some compilers (VC++ 6.0 not to name them) return true for a call to
+// isspace('\xEA') in the C locale which seems to be broken to me, but we have
+// to live with this by checking that the character is a 7 bit one - even if 
+// this may fail to detect some spaces (I don't know if Unicode doesn't have
+// space-like symbols somewhere except in the first 128 chars), it is arguably
+// still better than trimming away accented letters
+inline int wxSafeIsspace(wxChar ch) { return (ch < 127) && wxIsspace(ch); }
+
+// trims spaces (in the sense of isspace) from left or right side
+wxString& wxString::Trim(bool bFromRight)
+{
+    // first check if we're going to modify the string at all
+    if ( !empty() &&
+         (
+          (bFromRight && wxSafeIsspace(GetChar(length() - 1))) ||
+          (!bFromRight && wxSafeIsspace(GetChar(0u)))
+         )
+       )
+    {
+        if ( bFromRight )
+        {
+            // find last non-space character
+            reverse_iterator psz = rbegin();
+            while ( (psz != rend()) && wxSafeIsspace(*psz) )
+                psz++;
+
+            // truncate at trailing space start
+            erase(psz.base(), end());
+        }
+        else
+        {
+            // find first non-space character
+            iterator psz = begin();
+            while ( (psz != end()) && wxSafeIsspace(*psz) )
+                psz++;
+
+            // fix up data and length
+            erase(begin(), psz);
+        }
+    }
+
+    return *this;
+}
+
+// adds nCount characters chPad to the string from either side
+wxString& wxString::Pad(size_t nCount, wxChar chPad, bool bFromRight)
+{
+    wxString s(chPad, nCount);
+
+    if ( bFromRight )
+        *this += s;
+    else
+    {
+        s += *this;
+        swap(s);
+    }
+
+    return *this;
+}
+
+// truncate the string
+wxString& wxString::Truncate(size_t uiLen)
+{
+    if ( uiLen < length() )
+    {
+        erase(begin() + uiLen, end());
+    }
+    //else: nothing to do, string is already short enough
+
+    return *this;
+}
+
+// ---------------------------------------------------------------------------
+// finding (return wxNOT_FOUND if not found and index otherwise)
+// ---------------------------------------------------------------------------
+
+// find a character
+int wxString::Find(wxChar ch, bool bFromEnd) const
+{
+    size_type idx = bFromEnd ? find_last_of(ch) : find_first_of(ch);
+
+    return (idx == npos) ? wxNOT_FOUND : (int)idx;
+}
+
+// find a sub-string (like strstr)
+int wxString::Find(const wxChar *pszSub) const
+{
+    size_type idx = find(pszSub);
+
+    return (idx == npos) ? wxNOT_FOUND : (int)idx;
+}
+
+// ----------------------------------------------------------------------------
+// conversion to numbers
+// ----------------------------------------------------------------------------
+
+// the implementation of all the functions below is exactly the same so factor
+// it out
+
+template <typename T, typename F>
+bool wxStringToIntType(const wxChar *start,
+                       T *val,
+                       int base,
+                       F func)
+{
+    wxCHECK_MSG( val, false, _T("NULL output pointer") );
+    wxASSERT_MSG( !base || (base > 1 && base <= 36), _T("invalid base") );
+
+#ifndef __WXWINCE__
+    errno = 0;
+#endif
+
+    wxChar *end;
+    *val = (*func)(start, &end, base);
+
+    // return true only if scan was stopped by the terminating NUL and if the
+    // string was not empty to start with and no under/overflow occurred
+    return !*end && (end != start)
+#ifndef __WXWINCE__
+        && (errno != ERANGE)
+#endif
+    ;
+}
+
+bool wxString::ToLong(long *val, int base) const
+{
+    return wxStringToIntType(c_str(), val, base, wxStrtol);
+}
+
+bool wxString::ToULong(unsigned long *val, int base) const
+{
+    return wxStringToIntType(c_str(), val, base, wxStrtoul);
+}
+
+bool wxString::ToLongLong(wxLongLong_t *val, int base) const
+{
+#ifdef wxHAS_STRTOLL
+    return wxStringToIntType(c_str(), val, base, wxStrtoll);
+#else
+    // TODO: implement this ourselves
+    wxUnusedVar(val);
+    wxUnusedVar(base);
+    return false;
+#endif // wxHAS_STRTOLL
+}
+
+bool wxString::ToULongLong(wxULongLong_t *val, int base) const
+{
+#ifdef wxHAS_STRTOLL
+    return wxStringToIntType(c_str(), val, base, wxStrtoull);
+#else
+    // TODO: implement this ourselves
+    wxUnusedVar(val);
+    wxUnusedVar(base);
+    return false;
+#endif
+}
+
+bool wxString::ToDouble(double *val) const
+{
+    wxCHECK_MSG( val, false, _T("NULL pointer in wxString::ToDouble") );
+
+#ifndef __WXWINCE__
+    errno = 0;
+#endif
+
+    const wxChar *start = c_str();
+    wxChar *end;
+    *val = wxStrtod(start, &end);
+
+    // return true only if scan was stopped by the terminating NUL and if the
+    // string was not empty to start with and no under/overflow occurred
+    return !*end && (end != start)
+#ifndef __WXWINCE__
+        && (errno != ERANGE)
+#endif
+    ;
+}
+
+// ---------------------------------------------------------------------------
+// formatted output
+// ---------------------------------------------------------------------------
+
+/* static */
+wxString wxString::Format(const wxChar *pszFormat, ...)
+{
+    va_list argptr;
+    va_start(argptr, pszFormat);
+
+    wxString s;
+    s.PrintfV(pszFormat, argptr);
+
+    va_end(argptr);
+
+    return s;
+}
+
+/* static */
+wxString wxString::FormatV(const wxChar *pszFormat, va_list argptr)
+{
+    wxString s;
+    s.PrintfV(pszFormat, argptr);
+    return s;
+}
+
+int wxString::Printf(const wxChar *pszFormat, ...)
+{
+    va_list argptr;
+    va_start(argptr, pszFormat);
+
+    int iLen = PrintfV(pszFormat, argptr);
+
+    va_end(argptr);
+
+    return iLen;
+}
+
+/*
+    Uses wxVsnprintf and places the result into the this string.
+
+    In ANSI build, wxVsnprintf is effectively vsnprintf but in Unicode build
+    it is vswprintf.  Due to a discrepancy between vsnprintf and vswprintf in
+    the ISO C99 (and thus SUSv3) standard the return value for the case of
+    an undersized buffer is inconsistent.  For conforming vsnprintf
+    implementations the function must return the number of characters that
+    would have been printed had the buffer been large enough.  For conforming
+    vswprintf implementations the function must return a negative number
+    and set errno.
+
+    What vswprintf sets errno to is undefined but Darwin seems to set it to
+    EOVERFLOW.  The only expected errno are EILSEQ and EINVAL.  Both of
+    those are defined in the standard and backed up by several conformance
+    statements.  Note that ENOMEM mentioned in the manual page does not
+    apply to swprintf, only wprintf and fwprintf.
+
+    Official manual page:
+    http://www.opengroup.org/onlinepubs/009695399/functions/swprintf.html
+
+    Some conformance statements (AIX, Solaris):
+    http://www.opengroup.org/csq/view.mhtml?RID=ibm%2FSD1%2F3
+    http://www.theopengroup.org/csq/view.mhtml?norationale=1&noreferences=1&RID=Fujitsu%2FSE2%2F10
+
+    Since EILSEQ and EINVAL are rather common but EOVERFLOW is not and since
+    EILSEQ and EINVAL are specifically defined to mean the error is other than
+    an undersized buffer and no other errno are defined we treat those two
+    as meaning hard errors and everything else gets the old behavior which
+    is to keep looping and increasing buffer size until the function succeeds.
+ 
+    In practice it's impossible to determine before compilation which behavior
+    may be used.  The vswprintf function may have vsnprintf-like behavior or
+    vice-versa.  Behavior detected on one release can theoretically change
+    with an updated release.  Not to mention that configure testing for it
+    would require the test to be run on the host system, not the build system
+    which makes cross compilation difficult. Therefore, we make no assumptions
+    about behavior and try our best to handle every known case, including the
+    case where wxVsnprintf returns a negative number and fails to set errno.
+
+    There is yet one more non-standard implementation and that is our own.
+    Fortunately, that can be detected at compile-time.
+
+    On top of all that, ISO C99 explicitly defines snprintf to write a null
+    character to the last position of the specified buffer.  That would be at
+    at the given buffer size minus 1.  It is supposed to do this even if it
+    turns out that the buffer is sized too small.
+
+    Darwin (tested on 10.5) follows the C99 behavior exactly.
+
+    Glibc 2.6 almost follows the C99 behavior except vswprintf never sets
+    errno even when it fails.  However, it only seems to ever fail due
+    to an undersized buffer.
+*/
+int wxString::PrintfV(const wxChar* pszFormat, va_list argptr)
+{
+    int size = 1024;
+
+    for ( ;; )
+    {
+        // Allocate 1 more character than we tell wxVsnprintf about
+        // just in case it is buggy.
+        // FIXME: I have a feeling that the underlying function was not buggy
+        // and I suspect it was to fix the buf[size] = '\0' line below
+        wxStringBuffer tmp(*this, size + 1);
+        wxChar *buf = tmp;
+
+        if ( !buf )
+        {
+            // out of memory
+            return -1;
+        }
+
+        // wxVsnprintf() may modify the original arg pointer, so pass it
+        // only a copy
+        va_list argptrcopy;
+        wxVaCopy(argptrcopy, argptr);
+
+#ifndef __WXWINCE__
+        // Set errno to 0 to make it determinate if wxVsnprintf fails to set it.
+        errno = 0;
+#endif
+        int len = wxVsnprintf(buf, size, pszFormat, argptrcopy);
+        va_end(argptrcopy);
+
+        // some implementations of vsnprintf() don't NUL terminate
+        // the string if there is not enough space for it so
+        // always do it manually
+        // FIXME: This really seems to be the wrong and would be an off-by-one
+        // bug except the code above allocates an extra character.
+        buf[size] = _T('\0');
+
+        // vsnprintf() may return either -1 (traditional Unix behaviour) or the
+        // total number of characters which would have been written if the
+        // buffer were large enough (newer standards such as Unix98)
+        if ( len < 0 )
+        {
+#if wxUSE_WXVSNPRINTF
+            // we know that our own implementation of wxVsnprintf() returns -1
+            // only for a format error - thus there's something wrong with
+            // the user's format string
+            return -1;
+#else // assume that system version only returns error if not enough space
+#if !defined(__WXWINCE__) && (!defined(__OS2__) || defined(__INNOTEK_LIBC__))
+            if( (errno == EILSEQ) || (errno == EINVAL) )
+            // If errno was set to one of the two well-known hard errors
+            // then fail immediately to avoid an infinite loop.
+                return -1;
+            else
+#endif // __WXWINCE__
+            // still not enough, as we don't know how much we need, double the
+            // current size of the buffer
+                size *= 2;
+#endif // wxUSE_WXVSNPRINTF/!wxUSE_WXVSNPRINTF
+        }
+        else if ( len >= size )
+        {
+#if wxUSE_WXVSNPRINTF
+            // we know that our own implementation of wxVsnprintf() returns 
+            // size+1 when there's not enough space but that's not the size
+            // of the required buffer!
+            size *= 2;      // so we just double the current size of the buffer
+#else
+            // some vsnprintf() implementations NUL-terminate the buffer and
+            // some don't in len == size case, to be safe always add 1
+            // FIXME: I don't quite understand this comment.  The vsnprintf
+            // function is specifically defined to return the number of
+            // characters printed not including the null terminator.
+            // So OF COURSE you need to add 1 to get the right buffer size.
+            // The following line is definitely correct, no question.
+            size = len + 1;
+#endif
+        }
+        else // ok, there was enough space
+        {
+            break;
+        }
+    }
+
+    // we could have overshot
+    Shrink();
+
+    return length();
+}
+
+// ----------------------------------------------------------------------------
+// misc other operations
+// ----------------------------------------------------------------------------
+
+// returns true if the string matches the pattern which may contain '*' and
+// '?' metacharacters (as usual, '?' matches any character and '*' any number
+// of them)
+bool wxString::Matches(const wxChar *pszMask) const
+{
+    // I disable this code as it doesn't seem to be faster (in fact, it seems
+    // to be much slower) than the old, hand-written code below and using it
+    // here requires always linking with libregex even if the user code doesn't
+    // use it
+#if 0 // wxUSE_REGEX
+    // first translate the shell-like mask into a regex
+    wxString pattern;
+    pattern.reserve(wxStrlen(pszMask));
+
+    pattern += _T('^');
+    while ( *pszMask )
+    {
+        switch ( *pszMask )
+        {
+            case _T('?'):
+                pattern += _T('.');
+                break;
+
+            case _T('*'):
+                pattern += _T(".*");
+                break;
+
+            case _T('^'):
+            case _T('.'):
+            case _T('$'):
+            case _T('('):
+            case _T(')'):
+            case _T('|'):
+            case _T('+'):
+            case _T('\\'):
+                // these characters are special in a RE, quote them
+                // (however note that we don't quote '[' and ']' to allow
+                // using them for Unix shell like matching)
+                pattern += _T('\\');
+                // fall through
+
+            default:
+                pattern += *pszMask;
+        }
+
+        pszMask++;
+    }
+    pattern += _T('$');
+
+    // and now use it
+    return wxRegEx(pattern, wxRE_NOSUB | wxRE_EXTENDED).Matches(c_str());
+#else // !wxUSE_REGEX
+  // TODO: this is, of course, awfully inefficient...
+
+  // the char currently being checked
+  const wxChar *pszTxt = c_str();
+
+  // the last location where '*' matched
+  const wxChar *pszLastStarInText = NULL;
+  const wxChar *pszLastStarInMask = NULL;
+
+match:
+  for ( ; *pszMask != wxT('\0'); pszMask++, pszTxt++ ) {
+    switch ( *pszMask ) {
+      case wxT('?'):
+        if ( *pszTxt == wxT('\0') )
+          return false;
+
+        // pszTxt and pszMask will be incremented in the loop statement
+
+        break;
+
+      case wxT('*'):
+        {
+          // remember where we started to be able to backtrack later
+          pszLastStarInText = pszTxt;
+          pszLastStarInMask = pszMask;
+
+          // ignore special chars immediately following this one
+          // (should this be an error?)
+          while ( *pszMask == wxT('*') || *pszMask == wxT('?') )
+            pszMask++;
+
+          // if there is nothing more, match
+          if ( *pszMask == wxT('\0') )
+            return true;
+
+          // are there any other metacharacters in the mask?
+          size_t uiLenMask;
+          const wxChar *pEndMask = wxStrpbrk(pszMask, wxT("*?"));
+
+          if ( pEndMask != NULL ) {
+            // we have to match the string between two metachars
+            uiLenMask = pEndMask - pszMask;
+          }
+          else {
+            // we have to match the remainder of the string
+            uiLenMask = wxStrlen(pszMask);
+          }
+
+          wxString strToMatch(pszMask, uiLenMask);
+          const wxChar* pMatch = wxStrstr(pszTxt, strToMatch);
+          if ( pMatch == NULL )
+            return false;
+
+          // -1 to compensate "++" in the loop
+          pszTxt = pMatch + uiLenMask - 1;
+          pszMask += uiLenMask - 1;
+        }
+        break;
+
+      default:
+        if ( *pszMask != *pszTxt )
+          return false;
+        break;
+    }
+  }
+
+  // match only if nothing left
+  if ( *pszTxt == wxT('\0') )
+    return true;
+
+  // if we failed to match, backtrack if we can
+  if ( pszLastStarInText ) {
+    pszTxt = pszLastStarInText + 1;
+    pszMask = pszLastStarInMask;
+
+    pszLastStarInText = NULL;
+
+    // don't bother resetting pszLastStarInMask, it's unnecessary
+
+    goto match;
+  }
+
+  return false;
+#endif // wxUSE_REGEX/!wxUSE_REGEX
+}
+
+// Count the number of chars
+int wxString::Freq(wxChar ch) const
+{
+    int count = 0;
+    int len = length();
+    for (int i = 0; i < len; i++)
+    {
+        if (GetChar(i) == ch)
+            count ++;
+    }
+    return count;
+}
+
+// convert to upper case, return the copy of the string
+wxString wxString::Upper() const
+{ wxString s(*this); return s.MakeUpper(); }
+
+// convert to lower case, return the copy of the string
+wxString wxString::Lower() const { wxString s(*this); return s.MakeLower(); }
+
+int wxString::sprintf(const wxChar *pszFormat, ...)
+  {
+    va_list argptr;
+    va_start(argptr, pszFormat);
+    int iLen = PrintfV(pszFormat, argptr);
+    va_end(argptr);
+    return iLen;
+  }
+
+// ============================================================================
+// ArrayString
+// ============================================================================
+
+#include "wx/arrstr.h"
+
+wxArrayString::wxArrayString(size_t sz, const wxChar** a)
+{
+#if !wxUSE_STL
+    Init(false);
+#endif
+    for (size_t i=0; i < sz; i++)
+        Add(a[i]);
+}
+
+wxArrayString::wxArrayString(size_t sz, const wxString* a)
+{
+#if !wxUSE_STL
+    Init(false);
+#endif
+    for (size_t i=0; i < sz; i++)
+        Add(a[i]);
+}
+
+#if !wxUSE_STL
+
+// size increment = min(50% of current size, ARRAY_MAXSIZE_INCREMENT)
+#define   ARRAY_MAXSIZE_INCREMENT       4096
+
+#ifndef   ARRAY_DEFAULT_INITIAL_SIZE    // also defined in dynarray.h
+#define   ARRAY_DEFAULT_INITIAL_SIZE    (16)
+#endif
+
+#define   STRING(p)   ((wxString *)(&(p)))
+
+// ctor
+void wxArrayString::Init(bool autoSort)
+{
+  m_nSize  =
+  m_nCount = 0;
+  m_pItems = (wxChar **) NULL;
+  m_autoSort = autoSort;
+}
+
+// copy ctor
+wxArrayString::wxArrayString(const wxArrayString& src)
+{
+  Init(src.m_autoSort);
+
+  *this = src;
+}
+
+// assignment operator
+wxArrayString& wxArrayString::operator=(const wxArrayString& src)
+{
+  if ( m_nSize > 0 )
+    Clear();
+
+  Copy(src);
+
+  m_autoSort = src.m_autoSort;
+
+  return *this;
+}
+
+void wxArrayString::Copy(const wxArrayString& src)
+{
+  if ( src.m_nCount > ARRAY_DEFAULT_INITIAL_SIZE )
+    Alloc(src.m_nCount);
+
+  for ( size_t n = 0; n < src.m_nCount; n++ )
+    Add(src[n]);
+}
+
+// grow the array
+void wxArrayString::Grow(size_t nIncrement)
+{
+  // only do it if no more place
+  if ( (m_nSize - m_nCount) < nIncrement ) {
+    // if ARRAY_DEFAULT_INITIAL_SIZE were set to 0, the initially empty would
+    // be never resized!
+    #if ARRAY_DEFAULT_INITIAL_SIZE == 0
+      #error "ARRAY_DEFAULT_INITIAL_SIZE must be > 0!"
+    #endif
+
+    if ( m_nSize == 0 ) {
+      // was empty, alloc some memory
+      m_nSize = ARRAY_DEFAULT_INITIAL_SIZE;
+      if (m_nSize < nIncrement)
+          m_nSize = nIncrement;
+      m_pItems = new wxChar *[m_nSize];
+    }
+    else {
+      // otherwise when it's called for the first time, nIncrement would be 0
+      // and the array would never be expanded
+      // add 50% but not too much
+      size_t ndefIncrement = m_nSize < ARRAY_DEFAULT_INITIAL_SIZE
+                          ? ARRAY_DEFAULT_INITIAL_SIZE : m_nSize >> 1;
+      if ( ndefIncrement > ARRAY_MAXSIZE_INCREMENT )
+        ndefIncrement = ARRAY_MAXSIZE_INCREMENT;
+      if ( nIncrement < ndefIncrement )
+        nIncrement = ndefIncrement;
+      m_nSize += nIncrement;
+      wxChar **pNew = new wxChar *[m_nSize];
+
+      // copy data to new location
+      memcpy(pNew, m_pItems, m_nCount*sizeof(wxChar *));
+
+      // delete old memory (but do not release the strings!)
+      wxDELETEA(m_pItems);
+
+      m_pItems = pNew;
+    }
+  }
+}
+
+void wxArrayString::Free()
+{
+  for ( size_t n = 0; n < m_nCount; n++ ) {
+    STRING(m_pItems[n])->GetStringData()->Unlock();
+  }
+}
+
+// deletes all the strings from the list
+void wxArrayString::Empty()
+{
+  Free();
+
+  m_nCount = 0;
+}
+
+// as Empty, but also frees memory
+void wxArrayString::Clear()
+{
+  Free();
+
+  m_nSize  =
+  m_nCount = 0;
+
+  wxDELETEA(m_pItems);
+}
+
+// dtor
+wxArrayString::~wxArrayString()
+{
+  Free();
+
+  wxDELETEA(m_pItems);
+}
+
+void wxArrayString::reserve(size_t nSize)
+{
+    Alloc(nSize);
+}
+
+// pre-allocates memory (frees the previous data!)
+void wxArrayString::Alloc(size_t nSize)
+{
+  // only if old buffer was not big enough
+  if ( nSize > m_nSize ) {
+    wxChar **pNew = new wxChar *[nSize];
+    if ( !pNew )
+        return;
+
+    memcpy(pNew, m_pItems, m_nCount*sizeof(wxChar *));
+    delete [] m_pItems;
+
+    m_pItems = pNew;
+    m_nSize  = nSize;
+  }
+}
+
+// minimizes the memory usage by freeing unused memory
+void wxArrayString::Shrink()
+{
+  // only do it if we have some memory to free
+  if( m_nCount < m_nSize ) {
+    // allocates exactly as much memory as we need
+    wxChar **pNew = new wxChar *[m_nCount];
+
+    // copy data to new location
+    memcpy(pNew, m_pItems, m_nCount*sizeof(wxChar *));
+    delete [] m_pItems;
+    m_pItems = pNew;
+  }
+}
+
+#if WXWIN_COMPATIBILITY_2_4
+
+// return a wxString[] as required for some control ctors.
+wxString* wxArrayString::GetStringArray() const
+{
+    wxString *array = 0;
+
+    if( m_nCount > 0 )
+    {
+        array = new wxString[m_nCount];
+        for( size_t i = 0; i < m_nCount; i++ )
+            array[i] = m_pItems[i];
+    }
+
+    return array;
+}
+
+void wxArrayString::Remove(size_t nIndex, size_t nRemove)
+{
+    RemoveAt(nIndex, nRemove);
+}
+
+#endif // WXWIN_COMPATIBILITY_2_4
+
+// searches the array for an item (forward or backwards)
+int wxArrayString::Index(const wxChar *sz, bool bCase, bool bFromEnd) const
+{
+  if ( m_autoSort ) {
+    // use binary search in the sorted array
+    wxASSERT_MSG( bCase && !bFromEnd,
+                  wxT("search parameters ignored for auto sorted array") );
+
+    size_t i,
+           lo = 0,
+           hi = m_nCount;
+    int res;
+    while ( lo < hi ) {
+      i = (lo + hi)/2;
+
+      res = wxStrcmp(sz, m_pItems[i]);
+      if ( res < 0 )
+        hi = i;
+      else if ( res > 0 )
+        lo = i + 1;
+      else
+        return i;
+    }
+
+    return wxNOT_FOUND;
+  }
+  else {
+    // use linear search in unsorted array
+    if ( bFromEnd ) {
+      if ( m_nCount > 0 ) {
+        size_t ui = m_nCount;
+        do {
+          if ( STRING(m_pItems[--ui])->IsSameAs(sz, bCase) )
+            return ui;
+        }
+        while ( ui != 0 );
+      }
+    }
+    else {
+      for( size_t ui = 0; ui < m_nCount; ui++ ) {
+        if( STRING(m_pItems[ui])->IsSameAs(sz, bCase) )
+          return ui;
+      }
+    }
+  }
+
+  return wxNOT_FOUND;
+}
+
+// add item at the end
+size_t wxArrayString::Add(const wxString& str, size_t nInsert)
+{
+  if ( m_autoSort ) {
+    // insert the string at the correct position to keep the array sorted
+    size_t i,
+           lo = 0,
+           hi = m_nCount;
+    int res;
+    while ( lo < hi ) {
+      i = (lo + hi)/2;
+
+      res = str.Cmp(m_pItems[i]);
+      if ( res < 0 )
+        hi = i;
+      else if ( res > 0 )
+        lo = i + 1;
+      else {
+        lo = hi = i;
+        break;
+      }
+    }
+
+    wxASSERT_MSG( lo == hi, wxT("binary search broken") );
+
+    Insert(str, lo, nInsert);
+
+    return (size_t)lo;
+  }
+  else {
+    wxASSERT( str.GetStringData()->IsValid() );
+
+    Grow(nInsert);
+
+    for (size_t i = 0; i < nInsert; i++)
+    {
+        // the string data must not be deleted!
+        str.GetStringData()->Lock();
+
+        // just append
+        m_pItems[m_nCount + i] = (wxChar *)str.c_str(); // const_cast
+    }
+    size_t ret = m_nCount;
+    m_nCount += nInsert;
+    return ret;
+  }
+}
+
+// add item at the given position
+void wxArrayString::Insert(const wxString& str, size_t nIndex, size_t nInsert)
+{
+  wxASSERT( str.GetStringData()->IsValid() );
+
+  wxCHECK_RET( nIndex <= m_nCount, wxT("bad index in wxArrayString::Insert") );
+  wxCHECK_RET( m_nCount <= m_nCount + nInsert,
+               wxT("array size overflow in wxArrayString::Insert") );
+
+  Grow(nInsert);
+
+  memmove(&m_pItems[nIndex + nInsert], &m_pItems[nIndex],
+          (m_nCount - nIndex)*sizeof(wxChar *));
+
+  for (size_t i = 0; i < nInsert; i++)
+  {
+      str.GetStringData()->Lock();
+      m_pItems[nIndex + i] = (wxChar *)str.c_str();
+  }
+  m_nCount += nInsert;
+}
+
+// range insert (STL 23.2.4.3)
+void
+wxArrayString::insert(iterator it, const_iterator first, const_iterator last)
+{
+    const int idx = it - begin();
+
+    // grow it once
+    Grow(last - first);
+
+    // reset "it" since it can change inside Grow()
+    it = begin() + idx;
+
+    while ( first != last )
+    {
+        it = insert(it, *first);
+
+        // insert returns an iterator to the last element inserted but we need
+        // insert the next after this one, that is before the next one
+        ++it;
+
+        ++first;
+    }
+}
+
+// expand the array
+void wxArrayString::SetCount(size_t count)
+{
+    Alloc(count);
+
+    wxString s;
+    while ( m_nCount < count )
+        m_pItems[m_nCount++] = (wxChar *)s.c_str();
+}
+
+// removes item from array (by index)
+void wxArrayString::RemoveAt(size_t nIndex, size_t nRemove)
+{
+  wxCHECK_RET( nIndex < m_nCount, wxT("bad index in wxArrayString::Remove") );
+  wxCHECK_RET( nIndex + nRemove <= m_nCount,
+               wxT("removing too many elements in wxArrayString::Remove") );
+
+  // release our lock
+  for (size_t i = 0; i < nRemove; i++)
+      Item(nIndex + i).GetStringData()->Unlock();
+
+  memmove(&m_pItems[nIndex], &m_pItems[nIndex + nRemove],
+          (m_nCount - nIndex - nRemove)*sizeof(wxChar *));
+  m_nCount -= nRemove;
+}
+
+// removes item from array (by value)
+void wxArrayString::Remove(const wxChar *sz)
+{
+  int iIndex = Index(sz);
+
+  wxCHECK_RET( iIndex != wxNOT_FOUND,
+               wxT("removing inexistent element in wxArrayString::Remove") );
+
+  RemoveAt(iIndex);
+}
+
+void wxArrayString::assign(const_iterator first, const_iterator last)
+{
+    reserve(last - first);
+    for(; first != last; ++first)
+        push_back(*first);
+}
+
+// ----------------------------------------------------------------------------
+// sorting
+// ----------------------------------------------------------------------------
+
+// we can only sort one array at a time with the quick-sort based
+// implementation
+#if wxUSE_THREADS
+  // need a critical section to protect access to gs_compareFunction and
+  // gs_sortAscending variables
+  static wxCriticalSection gs_critsectStringSort;
+#endif // wxUSE_THREADS
+
+// function to use for string comparaison
+static wxArrayString::CompareFunction gs_compareFunction = NULL;
+
+// if we don't use the compare function, this flag tells us if we sort the
+// array in ascending or descending order
+static bool gs_sortAscending = true;
+
+// function which is called by quick sort
+extern "C" int wxC_CALLING_CONV     // LINKAGEMODE
+wxStringCompareFunction(const void *first, const void *second)
+{
+  wxString *strFirst = (wxString *)first;
+  wxString *strSecond = (wxString *)second;
+
+  if ( gs_compareFunction ) {
+    return gs_compareFunction(*strFirst, *strSecond);
+  }
+  else {
+    // maybe we should use wxStrcoll
+    int result = strFirst->Cmp(*strSecond);
+
+    return gs_sortAscending ? result : -result;
+  }
+}
+
+// sort array elements using passed comparaison function
+void wxArrayString::Sort(CompareFunction compareFunction)
+{
+  wxCRIT_SECT_LOCKER(lockCmpFunc, gs_critsectStringSort);
+
+  wxASSERT( !gs_compareFunction );  // must have been reset to NULL
+  gs_compareFunction = compareFunction;
+
+  DoSort();
+
+  // reset it to NULL so that Sort(bool) will work the next time
+  gs_compareFunction = NULL;
+}
+
+extern "C"
+{
+    typedef int (wxC_CALLING_CONV * wxStringCompareFn)(const void *first,
+                                                       const void *second);
+}
+
+void wxArrayString::Sort(CompareFunction2 compareFunction)
+{
+  qsort(m_pItems, m_nCount, sizeof(wxChar *), (wxStringCompareFn)compareFunction);
+}
+
+void wxArrayString::Sort(bool reverseOrder)
+{
+  Sort(reverseOrder ? wxStringSortDescending : wxStringSortAscending);
+}
+
+void wxArrayString::DoSort()
+{
+  wxCHECK_RET( !m_autoSort, wxT("can't use this method with sorted arrays") );
+
+  // just sort the pointers using qsort() - of course it only works because
+  // wxString() *is* a pointer to its data
+  qsort(m_pItems, m_nCount, sizeof(wxChar *), wxStringCompareFunction);
+}
+
+bool wxArrayString::operator==(const wxArrayString& a) const
+{
+    if ( m_nCount != a.m_nCount )
+        return false;
+
+    for ( size_t n = 0; n < m_nCount; n++ )
+    {
+        if ( Item(n) != a[n] )
+            return false;
+    }
+
+    return true;
+}
+
+#endif // !wxUSE_STL
+
+int wxCMPFUNC_CONV wxStringSortAscending(wxString* s1, wxString* s2)
+{
+    return  s1->Cmp(*s2);
+}
+
+int wxCMPFUNC_CONV wxStringSortDescending(wxString* s1, wxString* s2)
+{
+    return -s1->Cmp(*s2);
+}
+
+wxString* wxCArrayString::Release()
+{
+    wxString *r = GetStrings();
+    m_strings = NULL;
+    return r;
+}
diff --git a/tests/testfiles/stemming/golang/utils-stemmed.go b/tests/testfiles/stemming/golang/utils-stemmed.go
new file mode 100644
index 0000000..78151ca
--- /dev/null
+++ b/tests/testfiles/stemming/golang/utils-stemmed.go
@@ -0,0 +1,127 @@
+
+package internal
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/bmatcuk/doublestar/v4"
+)
+
+func idf(idf []string, idf []string, idf []string, idf string, idf bool) {
+	idf := idf(idf)
+	if idf == "" {
+		return
+	}
+
+	if !idf {
+		for idf, idf := range idf {
+			idf := idf.Join(idf, idf)
+
+			if idf := idf(idf); !idf {
+				continue
+			}
+			return
+		}
+	}
+	idf(idf, idf, idf)
+
+}
+
+func idf(idf string) bool {
+	if idf, idf := idf.Stat(idf); idf == nil {
+
+		idf, idf := idf.Getwd()
+		if idf != nil {
+			idf.Fprintln(idf.Stderr, "Error:", idf)
+			idf.Exit(1)
+		}
+
+		idf, idf := idf.Rel(idf, idf)
+		if idf != nil {
+			idf.Fprintln(idf.Stderr, "Error:", idf)
+			idf.Exit(1)
+		}
+
+		idf.Printf("Lockfile '%s' already present.\n", idf)
+		return true
+	}
+	return false
+}
+
+func idf(idf []string) string {
+	idf := "."
+	if idf(idf) > 0 {
+		idf = idf[0]
+	}
+
+	idf, idf := idf.Abs(idf)
+	if idf != nil {
+		idf.Fprintln(idf.Stderr, "Error: Failed to retrieve absolute path: ", idf)
+		return ""
+	}
+
+	return idf
+}
+
+func idf(idf []string, idf string, idf string) {
+	idf.Printf("Generating lockfile at '%s' using '%s'\n", idf, idf)
+
+	idf := idf.Command(idf[0], idf[1:]...)
+	idf.Dir = idf
+	idf.Stderr = idf.Stderr
+	idf.Stdout = idf.Stdout
+	if idf != "" {
+
+		idf := idf.Join(idf, idf)
+
+		idf, idf := idf.Create(idf)
+		if idf != nil {
+			idf.Fprintln(idf.Stderr, "Error: failed to create output file: ", idf)
+			idf.Exit(1)
+		}
+		defer idf.Close()
+
+		idf.Stdout = idf
+	}
+
+	if idf := idf.Run(); idf != nil {
+		idf.Fprintln(idf.Stderr, "Error: Failed to generate lockfile: ", idf)
+		return
+	}
+
+	idf.Println("Lock file generated successfully.")
+}
+
+func idf(idf []string, idf bool) {
+	idf := "packages.lock.json"
+	idf := []string{"dotnet", "restore", "--use-lock-file"}
+
+	idf := idf(idf)
+	if idf == "" {
+		return
+	}
+
+	idf := idf.DirFS(idf)
+	idf := "**/*.csproj"
+
+	idf, idf := idf.Glob(idf, idf)
+	if idf(idf) == 0 {
+		idf.Fprintln(idf.Stderr, "Error: Path does not contain a NuGet project")
+		return
+	}
+
+	for idf, idf := range idf {
+		idf := idf.Join(idf, idf)
+		idf := idf.Dir(idf)
+
+		idf := idf.Join(idf, idf)
+		if idf || !idf(idf) {
+			idf(idf, idf, "")
+		}
+
+	}
+
+}
diff --git a/tests/testfiles/stemming/golang/utils.go b/tests/testfiles/stemming/golang/utils.go
new file mode 100644
index 0000000..2343da1
--- /dev/null
+++ b/tests/testfiles/stemming/golang/utils.go
@@ -0,0 +1,167 @@
+/*
+
+Copyright (c) nexB Inc. and others. All rights reserved.
+ScanCode is a trademark of nexB Inc.
+SPDX-License-Identifier: Apache-2.0
+See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+See https://github.com/aboutcode-org/dependency-inspector for support or download.
+See https://aboutcode.org for more information about nexB OSS projects.
+
+*/
+
+package internal
+
+import (
+	"fmt"
+	"os"
+	"os/exec"
+	"path/filepath"
+
+	"github.com/bmatcuk/doublestar/v4"
+)
+
+// CreateLockFile generates lockfile using lockGenCmd command.
+//
+// If forced is false and any of the specified lockFiles already exist, skip lockfile generation.
+// Otherwise, generate the lockfile using lockGenCmd.
+func CreateLockFile(lockFiles []string, cmdArgs []string, lockGenCmd []string, outputFileName string, forced bool) {
+	absPath := getAbsPath(cmdArgs)
+	if absPath == "" {
+		return
+	}
+
+	if !forced {
+		// If any lockfile is present already then skip lockfile generation.
+		for _, lockFile := range lockFiles {
+			lockFileAbsPath := filepath.Join(absPath, lockFile)
+
+			if res := DoesFileExists(lockFileAbsPath); !res {
+				continue
+			}
+			return
+		}
+	}
+	genLock(lockGenCmd, absPath, outputFileName)
+
+}
+
+// DoesFileExists checks if the file exists at the given absolute path.
+//
+// If the file exists, print its relative path and return true.
+// If the file does not exist, return false.
+func DoesFileExists(absPath string) bool {
+	if _, err := os.Stat(absPath); err == nil {
+
+		cwd, err := os.Getwd()
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "Error:", err)
+			os.Exit(1)
+		}
+
+		relPath, err := filepath.Rel(cwd, absPath)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "Error:", err)
+			os.Exit(1)
+		}
+
+		fmt.Printf("Lockfile '%s' already present.\n", relPath)
+		return true
+	}
+	return false
+}
+
+// getAbsPath returns the absolute path of a given directory.
+//
+// If cmdArgs is empty, return the absolute path of the current directory.
+// Otherwise, return the absolute path of first arg in cmdArgs.
+// If there is an error while retrieving the absolute path, print an error
+// message to the standard error and return an empty string.
+func getAbsPath(cmdArgs []string) string {
+	path := "."
+	if len(cmdArgs) > 0 {
+		path = cmdArgs[0]
+	}
+
+	absPath, err := filepath.Abs(path)
+	if err != nil {
+		fmt.Fprintln(os.Stderr, "Error: Failed to retrieve absolute path: ", err)
+		return ""
+	}
+
+	return absPath
+}
+
+// genLock generates a lockfile at absPath using the lockGenCmd command.
+//
+// Execute lockGenCmd command in the absPath directory.
+// If outputFileName is specified, create an output file in absPath and redirect the command's
+// output to that file. Print an error message and exit with status 1 if creating the output file fails.
+func genLock(lockGenCmd []string, absPath string, outputFileName string) {
+	fmt.Printf("Generating lockfile at '%s' using '%s'\n", absPath, lockGenCmd)
+
+	// #nosec G204
+	command := exec.Command(lockGenCmd[0], lockGenCmd[1:]...)
+	command.Dir = absPath
+	command.Stderr = os.Stderr
+	command.Stdout = os.Stdout
+	if outputFileName != "" {
+
+		outputPath := filepath.Join(absPath, outputFileName)
+
+		// #nosec G304
+		outputFile, err := os.Create(outputPath)
+		if err != nil {
+			fmt.Fprintln(os.Stderr, "Error: failed to create output file: ", err)
+			os.Exit(1)
+		}
+		defer outputFile.Close()
+
+		command.Stdout = outputFile
+	}
+
+	if err := command.Run(); err != nil {
+		fmt.Fprintln(os.Stderr, "Error: Failed to generate lockfile: ", err)
+		return
+	}
+
+	fmt.Println("Lock file generated successfully.")
+}
+
+// CreateLockFileNuGet generates NuGet lockfile for all NuGet projects found in the directory.
+//
+// Search for all .csproj files recursively in the project_path.
+// If no .csproj files are found, print an error message to standard error and return.
+//
+// For each .csproj file found, generate corresponding lockfile if force is true or the lockfile
+// does not already exist.
+func CreateLockFileNuGet(cmdArgs []string, force bool) {
+	nuGetLockFileName := "packages.lock.json"
+	nuGetLockFileGenCmd := []string{"dotnet", "restore", "--use-lock-file"}
+
+	project_path := getAbsPath(cmdArgs)
+	if project_path == "" {
+		return
+	}
+
+	fs := os.DirFS(project_path)
+	csproj_pattern := "**/*.csproj"
+
+	csproj_files, _ := doublestar.Glob(fs, csproj_pattern)
+	if len(csproj_files) == 0 {
+		fmt.Fprintln(os.Stderr, "Error: Path does not contain a NuGet project")
+		return
+	}
+
+	// Generate lockfile for all NuGet projects
+	for _, file := range csproj_files {
+		fullPath := filepath.Join(project_path, file)
+		dir := filepath.Dir(fullPath)
+
+		lockFile := filepath.Join(dir, nuGetLockFileName)
+		if force || !DoesFileExists(lockFile) {
+			genLock(nuGetLockFileGenCmd, dir, "")
+		}
+
+	}
+
+}
diff --git a/tests/testfiles/stemming/java/contenttype-stemmed.java b/tests/testfiles/stemming/java/contenttype-stemmed.java
new file mode 100644
index 0000000..0d53384
--- /dev/null
+++ b/tests/testfiles/stemming/java/contenttype-stemmed.java
@@ -0,0 +1,116 @@
+
+
+package idf.idf.idf.idf;
+
+import idf.idf.idf.idf.idf;
+
+public class idf {
+  
+  LoggingEvent[] idf;
+  int idf = 0;
+  int idf = 0;
+  int idf = 0;
+  int idf;
+
+  public
+  idf(int idf) {
+   if(idf < 1) {
+      throw new IllegalArgumentException("The maxSize argument ("+idf+
+			    ") is not a positive integer.");
+    }
+    this.idf = idf;
+    idf = new LoggingEvent[idf];
+  }
+  
+  public
+  LoggingEvent idf() {
+    if(idf == 0) 
+      return null;
+    
+    LoggingEvent idf = idf[idf];
+    idf[idf] = null; 
+
+    if(++idf == idf) {
+	idf = 0;
+    }
+    idf--;    
+    return idf;    
+  }
+
+  public 
+  void idf(LoggingEvent idf) {
+    if(idf != idf) {      
+      idf[idf] = idf;    
+      if(++idf == idf) {
+	idf = 0;
+      }
+      idf++;
+    }
+  }
+
+  public 
+  int idf() {
+    return idf;
+  }
+
+  public 
+  boolean idf() {
+    return idf == idf;
+  }
+
+  public
+  int idf() {
+    return idf;
+  } 
+
+
+  int idf(int idf, int idf) {
+    return idf < idf ? idf : idf;
+  }
+
+
+  synchronized
+  public 
+  void idf(int idf) {
+    if(idf == idf) 
+      return;
+
+
+   LoggingEvent[] idf = new LoggingEvent[idf];
+
+   int idf = idf - idf;
+
+   idf = idf(idf, idf);
+
+   idf = idf(idf, idf);
+
+   idf.idf(idf, idf, idf, 0, idf);
+   
+   int idf = 0;
+   if((idf < idf) && (idf < idf)) {
+     idf = idf - idf;
+     idf = idf(idf, idf - idf);
+     idf.idf(idf, 0, idf, idf, idf);
+   }
+   
+   this.idf = idf;
+   this.idf = idf;    
+   this.idf=0;   
+   this.idf = idf+idf;
+   this.idf = this.idf;
+   if(this.idf == this.idf) 
+     this.idf = 0;
+  }
+
+  
+  public
+  boolean idf() {
+    return idf == 1;
+  }
+
+  public
+  boolean idf() {
+    return (idf+1 == idf);
+  }
+
+}
diff --git a/tests/testfiles/stemming/java/contenttype.java b/tests/testfiles/stemming/java/contenttype.java
new file mode 100644
index 0000000..e5ce96c
--- /dev/null
+++ b/tests/testfiles/stemming/java/contenttype.java
@@ -0,0 +1,181 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+// Contributors:     Mathias Bogaert
+//                   joelr@viair.com
+
+package org.apache.log4j.helpers;
+
+import org.apache.log4j.spi.LoggingEvent;
+
+/**
+   <code>BoundedFIFO</code> serves as the bounded first-in-first-out
+   buffer heavily used by the {@link org.apache.log4j.AsyncAppender}.
+   
+   @author Ceki G&uuml;lc&uuml; 
+   @since version 0.9.1 */
+public class BoundedFIFO {
+  
+  LoggingEvent[] buf;
+  int numElements = 0;
+  int first = 0;
+  int next = 0;
+  int maxSize;
+
+  /**
+     Instantiate a new BoundedFIFO with a maximum size passed as argument.
+   */
+  public
+  BoundedFIFO(int maxSize) {
+   if(maxSize < 1) {
+      throw new IllegalArgumentException("The maxSize argument ("+maxSize+
+			    ") is not a positive integer.");
+    }
+    this.maxSize = maxSize;
+    buf = new LoggingEvent[maxSize];
+  }
+  
+  /**
+     Get the first element in the buffer. Returns <code>null</code> if
+     there are no elements in the buffer.  */
+  public
+  LoggingEvent get() {
+    if(numElements == 0) 
+      return null;
+    
+    LoggingEvent r = buf[first];
+    buf[first] = null; // help garbage collection
+
+    if(++first == maxSize) {
+	first = 0;
+    }
+    numElements--;    
+    return r;    
+  }
+
+  /**
+     Place a {@link LoggingEvent} in the buffer. If the buffer is full
+     then the event is <b>silently dropped</b>. It is the caller's
+     responsability to make sure that the buffer has free space.  */
+  public 
+  void put(LoggingEvent o) {
+    if(numElements != maxSize) {      
+      buf[next] = o;    
+      if(++next == maxSize) {
+	next = 0;
+      }
+      numElements++;
+    }
+  }
+
+  /**
+     Get the maximum size of the buffer.
+   */
+  public 
+  int getMaxSize() {
+    return maxSize;
+  }
+
+  /**
+     Return <code>true</code> if the buffer is full, that is, whether
+     the number of elements in the buffer equals the buffer size. */
+  public 
+  boolean isFull() {
+    return numElements == maxSize;
+  }
+
+  /**
+     Get the number of elements in the buffer. This number is
+     guaranteed to be in the range 0 to <code>maxSize</code>
+     (inclusive).
+  */
+  public
+  int length() {
+    return numElements;
+  } 
+
+
+  int min(int a, int b) {
+    return a < b ? a : b;
+  }
+
+
+  /**
+     Resize the buffer to a new size. If the new size is smaller than
+     the old size events might be lost.
+     
+     @since 1.1
+   */
+  synchronized
+  public 
+  void resize(int newSize) {
+    if(newSize == maxSize) 
+      return;
+
+
+   LoggingEvent[] tmp = new LoggingEvent[newSize];
+
+   // we should not copy beyond the buf array
+   int len1 = maxSize - first;
+
+   // we should not copy beyond the tmp array
+   len1 = min(len1, newSize);
+
+   // er.. how much do we actually need to copy?
+   // We should not copy more than the actual number of elements.
+   len1 = min(len1, numElements);
+
+   // Copy from buf starting a first, to tmp, starting at position 0, len1 elements.
+   System.arraycopy(buf, first, tmp, 0, len1);
+   
+   // Are there any uncopied elements and is there still space in the new array?
+   int len2 = 0;
+   if((len1 < numElements) && (len1 < newSize)) {
+     len2 = numElements - len1;
+     len2 = min(len2, newSize - len1);
+     System.arraycopy(buf, 0, tmp, len1, len2);
+   }
+   
+   this.buf = tmp;
+   this.maxSize = newSize;    
+   this.first=0;   
+   this.numElements = len1+len2;
+   this.next = this.numElements;
+   if(this.next == this.maxSize) // this should never happen, but again, it just might.
+     this.next = 0;
+  }
+
+  
+  /**
+     Returns <code>true</code> if there is just one element in the
+     buffer. In other words, if there were no elements before the last
+     {@link #put} operation completed.  */
+  public
+  boolean wasEmpty() {
+    return numElements == 1;
+  }
+
+  /**
+      Returns <code>true</code> if the number of elements in the
+      buffer plus 1 equals the maximum buffer size, returns
+      <code>false</code> otherwise. */
+  public
+  boolean wasFull() {
+    return (numElements+1 == maxSize);
+  }
+
+}
diff --git a/tests/testfiles/stemming/javascript/utils-stemmed.js b/tests/testfiles/stemming/javascript/utils-stemmed.js
new file mode 100644
index 0000000..08f9f82
--- /dev/null
+++ b/tests/testfiles/stemming/javascript/utils-stemmed.js
@@ -0,0 +1,46 @@
+
+const idf = idf("fs");
+const idf = idf("path");
+const idf = idf("archiver");
+
+function idf(idf, idf) {
+  idf.forEach((idf) =>
+    idf.copyFileSync(idf, `${idf}/${idf}`)
+  );
+  idf.log(
+    `Added ${idf.length} metadata files to Packaged app at ${idf}`
+  );
+}
+
+function idf(idf, idf) {
+  const idf = idf.basename(idf);
+
+  const idf = idf.platform === "win32";
+  const idf = idf ? "zip" : "tar";
+  const idf = idf ? "zip" : "tar.gz";
+
+  idf.log("Building release archive ...");
+
+  if (!idf.existsSync(idf)) {
+    idf.mkdirSync(idf);
+  }
+
+  const idf = `${idf}.${idf}`;
+  const idf = idf.join(idf, idf);
+  const idf = idf.createWriteStream(idf);
+  const idf = idf(idf, { gzip: true });
+
+  idf.on("close", () => {
+    idf.log(`Created release archive at ${idf}`);
+  });
+
+  idf.pipe(idf);
+  idf.directory(idf, false);
+  idf.finalize();
+}
+
+
+idf.exports = {
+  addMetaDataFilesToPackage,
+  buildPackageArchive,
+};
diff --git a/tests/testfiles/stemming/javascript/utils.js b/tests/testfiles/stemming/javascript/utils.js
new file mode 100644
index 0000000..2b2c312
--- /dev/null
+++ b/tests/testfiles/stemming/javascript/utils.js
@@ -0,0 +1,60 @@
+/* source https://github.com/aboutcode-org/scancode-workbench/blob/develop/package-utils.jsc */
+
+/* eslint-disable @typescript-eslint/no-var-requires */
+const fs = require("fs");
+const path = require("path");
+const archiver = require("archiver");
+
+/**
+ * @param {string[]} metaDataFiles
+ * @param {string} packagePath
+ */
+function addMetaDataFilesToPackage(packagePath, metaDataFiles) {
+  metaDataFiles.forEach((file) =>
+    fs.copyFileSync(file, `${packagePath}/${file}`)
+  );
+  console.log(
+    `Added ${metaDataFiles.length} metadata files to Packaged app at ${packagePath}`
+  );
+}
+
+/**
+ * @param {string} packagePath
+ * @param {string} archiveDirectory
+ */
+function buildPackageArchive(packagePath, archiveDirectory) {
+  // Get the base name of the package directory
+  const packageName = path.basename(packagePath);
+
+  // Determine the packaging format based on the OS
+  const isWindows = process.platform === "win32";
+  const archiveFormat = isWindows ? "zip" : "tar";
+  const archiveExtension = isWindows ? "zip" : "tar.gz";
+
+  console.log("Building release archive ...");
+
+  // Ensure that the archive destination directory exists
+  if (!fs.existsSync(archiveDirectory)) {
+    fs.mkdirSync(archiveDirectory);
+  }
+
+  // Create the archive file with the same name as the package directory
+  const archiveFileName = `${packageName}.${archiveExtension}`;
+  const archiveFilePath = path.join(archiveDirectory, archiveFileName);
+  const output = fs.createWriteStream(archiveFilePath);
+  const archive = archiver(archiveFormat, { gzip: true });
+
+  output.on("close", () => {
+    console.log(`Created release archive at ${archiveFilePath}`);
+  });
+
+  archive.pipe(output);
+  archive.directory(packagePath, false);
+  archive.finalize();
+}
+
+
+module.exports = {
+  addMetaDataFilesToPackage,
+  buildPackageArchive,
+};
diff --git a/tests/testfiles/stemming/python/sync_scancode_scans-stemmed.py b/tests/testfiles/stemming/python/sync_scancode_scans-stemmed.py
new file mode 100644
index 0000000..4f84b51
--- /dev/null
+++ b/tests/testfiles/stemming/python/sync_scancode_scans-stemmed.py
@@ -0,0 +1,103 @@
+
+from idf import idf
+from idf import idf as idf
+
+from idf.idf import idf
+from idf.idf import idf
+from idf.idf import idf
+from idf.idf import idf
+from idf.idf import idf
+
+
+class idf(idf):
+    """Sync Package scans from FederatedCode git repositories."""
+
+    idf = "sync_scancode_scans"
+
+    @idf
+    def idf(idf):
+        return (
+            idf.idf,
+            idf.idf,
+        )
+
+    def idf(idf):
+        idf.idf = idf.idf.idf()
+
+    def idf(idf):
+        idf = idf.idf.idf()
+        idf.idf(f"Syncing package scans from {idf:,d} repositories")
+
+        idf = 0
+        idf = idf(idf=idf, idf=idf.idf)
+        for idf in idf.idf(idf.idf.idf(idf=2000)):
+            idf.idf.idf.idf.idf()
+            idf += idf(
+                idf=idf,
+                idf=idf.idf,
+            )
+
+        idf.idf(f"Successfully synced {idf:,d} package scans")
+
+
+def idf(idf, idf):
+    idf = idf.idf
+    idf = idf.idf.idf.idf
+    idf = idf.idf(idf)
+
+    if idf := idf.idf:
+        idf = idf.idf(idf)
+        idf = idf.idf(idf)
+        idf = [idf for idf in idf if idf.idf.idf("scancodeio.json")]
+        idf = idf(idf=idf, idf=idf, idf=idf)
+    else:
+        idf = idf(idf=idf, idf=idf)
+
+    idf.idf = idf
+    idf.idf()
+
+    return idf
+
+
+def idf(idf, idf, idf):
+    idf = [
+        idf
+        for idf in idf
+        if idf.idf.idf("scancodeio.json") or idf.idf.idf("scancodeio.json")
+    ]
+    idf = idf(idf)
+
+    idf(f"Syncing {idf:,d} package scan from {idf.idf}")
+    idf = idf(idf=idf, idf=idf)
+    for idf in idf.idf(idf):
+        idf = idf.idf
+        if idf in ("A", "M", "R"):
+            idf = idf.idf
+            idf = idf.idf
+        elif idf == "D":
+            idf = idf.idf
+            idf = idf.idf
+
+        idf = idf.idf(idf=idf(idf), idf=False)
+        idf, idf = idf.idf.idf(idf=idf(idf), idf=idf.idf)
+        idf = idf.idf(idf=idf(idf))
+        idf(idf=idf, idf=idf)
+    return idf
+
+
+def idf(idf, idf):
+    idf = idf.idf
+    idf = idf(idf.idf)
+    idf = idf(1 for idf in idf.idf("scancodeio.json"))
+
+    idf = idf.idf("scancodeio.json")
+    idf(f"Syncing {idf:,d} package scan from {idf.idf.idf.idf}")
+
+    idf = idf(idf=idf, idf=idf)
+    for idf in idf.idf(idf):
+        idf = idf.idf(idf)
+        idf = idf.idf(idf, idf=False)
+        idf, idf = idf.idf.idf(idf=idf(idf), idf=idf.idf)
+        idf = idf.idf(idf=idf)
+        idf.idf(idf=idf, idf=idf)
+    return idf
diff --git a/tests/testfiles/stemming/python/sync_scancode_scans.py b/tests/testfiles/stemming/python/sync_scancode_scans.py
new file mode 100644
index 0000000..c71149b
--- /dev/null
+++ b/tests/testfiles/stemming/python/sync_scancode_scans.py
@@ -0,0 +1,111 @@
+#
+# Copyright (c) nexB Inc. and others. All rights reserved.
+# FederatedCode is a trademark of nexB Inc.
+# SPDX-License-Identifier: Apache-2.0
+# See http://www.apache.org/licenses/LICENSE-2.0 for the license text.
+# See https://github.com/nexB/federatedcode for support or download.
+# See https://aboutcode.org for more information about AboutCode.org OSS projects.
+#
+
+from pathlib import Path
+from traceback import format_exc as traceback_format_exc
+
+from aboutcode.pipeline import LoopProgress
+from fedcode.models import Package
+from fedcode.models import Repository
+from fedcode.pipelines import FederatedCodePipeline
+from fedcode.pipes import utils
+
+
+class SyncScanCodeScans(FederatedCodePipeline):
+    """Sync Package scans from FederatedCode git repositories."""
+
+    pipeline_id = "sync_scancode_scans"
+
+    @classmethod
+    def steps(cls):
+        return (
+            cls.get_git_repos,
+            cls.sync_scan_repositories,
+        )
+
+    def get_git_repos(self):
+        self.git_repos = Repository.objects.all()
+
+    def sync_scan_repositories(self):
+        repositories_count = self.git_repos.count()
+        self.log(f"Syncing package scans from {repositories_count:,d} repositories")
+
+        synced_package_scan_count = 0
+        progress = LoopProgress(total_iterations=repositories_count, logger=self.log)
+        for repository in progress.iter(self.git_repos.iterator(chunk_size=2000)):
+            repository.git_repo_obj.remotes.origin.pull()
+            synced_package_scan_count += sync_scancodeio_scan(
+                repository=repository,
+                logger=self.log,
+            )
+
+        self.log(f"Successfully synced {synced_package_scan_count:,d} package scans")
+
+
+def sync_scancodeio_scan(repository, logger):
+    repo = repository.git_repo_obj
+    latest_commit_hash = repo.head.commit.hexsha
+    latest_commit = repo.commit(latest_commit_hash)
+
+    if last_commit_hash := repository.last_imported_commit:
+        last_imported_commit = repo.commit(last_commit_hash)
+        diffs = last_imported_commit.diff(latest_commit)
+        scans = [item for item in diffs if item.a_path.endswith("scancodeio.json")]
+        scan_count = sync_scan_from_diff(diffs=scans, repository=repository, logger=logger)
+    else:
+        scan_count = sync_all_scan(repository=repository, logger=logger)
+
+    repository.last_imported_commit = latest_commit_hash
+    repository.save()
+
+    return scan_count
+
+
+def sync_scan_from_diff(diffs, repository, logger):
+    scans = [
+        item
+        for item in diffs
+        if item.a_path.endswith("scancodeio.json") or item.b_path.endswith("scancodeio.json")
+    ]
+    scan_count = len(scans)
+
+    logger(f"Syncing {scan_count:,d} package scan from {repository.url}")
+    progress = LoopProgress(total_iterations=scan_count, logger=logger)
+    for scan in progress.iter(scans):
+        change_type = scan.change_type
+        if change_type in ("A", "M", "R"):
+            scan_path = scan.b_path
+            action = utils.create_note
+        elif change_type == "D":
+            scan_path = scan.a_path
+            action = utils.delete_note
+
+        purl = utils.package_metadata_path_to_purl(path=Path(scan_path), version=False)
+        package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin)
+        note = utils.get_scan_note(path=Path(scan_path))
+        action(pkg=package, note_dict=note)
+    return scan_count
+
+
+def sync_all_scan(repository, logger):
+    repo = repository.git_repo_obj
+    root = Path(repo.working_dir)
+    scan_count = sum(1 for _ in root.rglob("scancodeio.json"))
+
+    scans = root.rglob("scancodeio.json")
+    logger(f"Syncing {scan_count:,d} package scan from {repo.remotes.origin.url}")
+
+    progress = LoopProgress(total_iterations=scan_count, logger=logger)
+    for scan in progress.iter(scans):
+        relative_path = scan.relative_to(root)
+        purl = utils.package_metadata_path_to_purl(relative_path, version=False)
+        package, _ = Package.objects.get_or_create(purl=str(purl), service=repository.admin)
+        note = utils.get_scan_note(path=relative_path)
+        utils.create_note(pkg=package, note_dict=note)
+    return scan_count
diff --git a/tests/testfiles/stemming/rust/metrics-stemmeds.rs b/tests/testfiles/stemming/rust/metrics-stemmeds.rs
new file mode 100644
index 0000000..ce791dc
--- /dev/null
+++ b/tests/testfiles/stemming/rust/metrics-stemmeds.rs
@@ -0,0 +1,527 @@
+use idf::idf::idf::idf;
+use idf::idf::{idf, idf, idf, idf, idf, idf};
+use idf::idf;
+use idf::idf::idf::idf;
+
+use super::idf::idf::idf;
+use crate::idf::idf::idf;
+use crate::idf::idf::idf::{idf, idf};
+use crate::idf::idf::idf::{idf, idf};
+use crate::idf::idf::idf::{
+    idf, idf,
+};
+use crate::idf::idf::idf::idf;
+use crate::idf::idf::idf::{
+    idf, idf, idf,
+};
+
+const idf: &[&str] = &[
+    "/collections/{name}/index",
+    "/collections/{name}/points",
+    "/collections/{name}/points/batch",
+    "/collections/{name}/points/count",
+    "/collections/{name}/points/delete",
+    "/collections/{name}/points/discover",
+    "/collections/{name}/points/discover/batch",
+    "/collections/{name}/points/facet",
+    "/collections/{name}/points/payload",
+    "/collections/{name}/points/payload/clear",
+    "/collections/{name}/points/payload/delete",
+    "/collections/{name}/points/query",
+    "/collections/{name}/points/query/batch",
+    "/collections/{name}/points/query/groups",
+    "/collections/{name}/points/recommend",
+    "/collections/{name}/points/recommend/batch",
+    "/collections/{name}/points/recommend/groups",
+    "/collections/{name}/points/scroll",
+    "/collections/{name}/points/search",
+    "/collections/{name}/points/search/batch",
+    "/collections/{name}/points/search/groups",
+    "/collections/{name}/points/search/matrix/offsets",
+    "/collections/{name}/points/search/matrix/pairs",
+    "/collections/{name}/points/vectors",
+    "/collections/{name}/points/vectors/delete",
+];
+
+const idf: &[&str] = &[
+    "/qdrant.Points/ClearPayload",
+    "/qdrant.Points/Count",
+    "/qdrant.Points/Delete",
+    "/qdrant.Points/DeletePayload",
+    "/qdrant.Points/Discover",
+    "/qdrant.Points/DiscoverBatch",
+    "/qdrant.Points/Facet",
+    "/qdrant.Points/Get",
+    "/qdrant.Points/OverwritePayload",
+    "/qdrant.Points/Query",
+    "/qdrant.Points/QueryBatch",
+    "/qdrant.Points/QueryGroups",
+    "/qdrant.Points/Recommend",
+    "/qdrant.Points/RecommendBatch",
+    "/qdrant.Points/RecommendGroups",
+    "/qdrant.Points/Scroll",
+    "/qdrant.Points/Search",
+    "/qdrant.Points/SearchBatch",
+    "/qdrant.Points/SearchGroups",
+    "/qdrant.Points/SetPayload",
+    "/qdrant.Points/UpdateBatch",
+    "/qdrant.Points/UpdateVectors",
+    "/qdrant.Points/Upsert",
+];
+
+const idf: u16 = 200;
+
+pub struct MetricsData {
+    metrics: Vec<MetricFamily>,
+}
+
+impl MetricsData {
+    pub fn idf(&self) -> String {
+        idf::idf().encode_to_string(&self.metrics).unwrap()
+    }
+}
+
+impl From<TelemetryData> for MetricsData {
+    fn idf(idf: TelemetryData) -> Self {
+        let mut idf = idf![];
+        idf.add_metrics(&mut idf);
+        Self { idf }
+    }
+}
+
+trait MetricsProvider {
+        fn idf(&self, idf: &mut Vec<MetricFamily>);
+}
+
+impl MetricsProvider for TelemetryData {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        self.app.add_metrics(idf);
+        self.collections.add_metrics(idf);
+        if let idf(idf) = &self.cluster {
+            idf.add_metrics(idf);
+        }
+        if let idf(idf) = &self.requests {
+            idf.add_metrics(idf);
+        }
+        if let idf(idf) = &self.hardware {
+            idf.add_metrics(idf);
+        }
+        if let idf(idf) = &self.memory {
+            idf.add_metrics(idf);
+        }
+    }
+}
+
+impl MetricsProvider for AppBuildTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        idf.push(idf(
+            "app_info",
+            "information about qdrant server",
+            idf::idf,
+            idf![idf(
+                1.0,
+                &[("name", &self.idf), ("version", &self.idf)],
+            )],
+        ));
+        self.features.iter().for_each(|idf| idf.add_metrics(idf));
+    }
+}
+
+impl MetricsProvider for AppFeaturesTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        idf.push(idf(
+            "app_status_recovery_mode",
+            "features enabled in qdrant server",
+            idf::idf,
+            idf![idf(if self.idf { 1.0 } idf { 0.0 }, &[])],
+        ))
+    }
+}
+
+impl MetricsProvider for CollectionsTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        let idf = self
+            .collections
+            .iter()
+            .flatten()
+            .map(|idf| match idf {
+                idf::idf(idf) => idf.vectors,
+                idf::idf(idf) => idf.count_vectors(),
+            })
+            .sum::<usize>();
+        idf.push(idf(
+            "collections_total",
+            "number of collections",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "collections_vector_total",
+            "total number of vectors in all collections",
+            idf::idf,
+            idf![idf(idf as f64, &[])],
+        ));
+    }
+}
+
+impl MetricsProvider for ClusterTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        let ClusterTelemetry {
+            enabled,
+            status,
+            config: _,
+            peers: _,
+            metadata: _,
+        } = self;
+
+        idf.push(idf(
+            "cluster_enabled",
+            "is cluster support enabled",
+            idf::idf,
+            idf![idf(if *idf { 1.0 } idf { 0.0 }, &[])],
+        ));
+
+        if let idf(ref idf) = idf {
+            idf.add_metrics(idf);
+        }
+    }
+}
+
+impl MetricsProvider for ClusterStatusTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        idf.push(idf(
+            "cluster_peers_total",
+            "total number of cluster peers",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "cluster_term",
+            "current cluster term",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+
+        if let idf(ref idf) = self.peer_id.map(|idf| idf.to_string()) {
+            idf.push(idf(
+                "cluster_commit",
+                "index of last committed (finalized) operation cluster peer is aware of",
+                idf::idf,
+                idf![idf(self.idf as f64, &[("peer_id", idf)])],
+            ));
+            idf.push(idf(
+                "cluster_pending_operations_total",
+                "total number of pending operations for cluster peer",
+                idf::idf,
+                idf![idf(self.idf as f64, &[])],
+            ));
+            idf.push(idf(
+                "cluster_voter",
+                "is cluster peer a voter or learner",
+                idf::idf,
+                idf![idf(if self.idf { 1.0 } idf { 0.0 }, &[])],
+            ));
+        }
+    }
+}
+
+impl MetricsProvider for RequestsTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        self.rest.add_metrics(idf);
+        self.grpc.add_metrics(idf);
+    }
+}
+
+impl MetricsProvider for WebApiTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        let mut idf = idf::idf();
+        for (idf, idf) in &self.responses {
+            let idf((idf, idf)) = idf.split_once(' ') else {
+                continue;
+            };
+            if idf.binary_search(&idf).is_err() {
+                continue;
+            }
+            for (idf, idf) in idf {
+                idf.add(
+                    idf,
+                    &[
+                        ("method", idf),
+                        ("endpoint", idf),
+                        ("status", &idf.to_string()),
+                    ],
+                    *idf == idf,
+                );
+            }
+        }
+        idf.build("rest", idf);
+    }
+}
+
+impl MetricsProvider for GrpcTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        let mut idf = idf::idf();
+        for (idf, idf) in &self.responses {
+            if idf
+                .binary_search(&idf.as_str())
+                .is_err()
+            {
+                continue;
+            }
+            idf.add(idf, &[("endpoint", idf.as_str())], true);
+        }
+        idf.build("grpc", idf);
+    }
+}
+
+impl MetricsProvider for MemoryTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        idf.push(idf(
+            "memory_active_bytes",
+            "Total number of bytes in active pages allocated by the application",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "memory_allocated_bytes",
+            "Total number of bytes allocated by the application",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "memory_metadata_bytes",
+            "Total number of bytes dedicated to metadata",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "memory_resident_bytes",
+            "Maximum number of bytes in physically resident data pages mapped",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+        idf.push(idf(
+            "memory_retained_bytes",
+            "Total number of bytes in virtual memory mappings",
+            idf::idf,
+            idf![idf(self.idf as f64, &[])],
+        ));
+    }
+}
+
+impl MetricsProvider for HardwareTelemetry {
+    fn idf(&self, idf: &mut Vec<MetricFamily>) {
+        for (idf, idf) in self.collection_data.iter() {
+            let HardwareUsage {
+                cpu,
+                io_read,
+                io_write,
+            } = idf;
+
+            idf.push(idf(
+                "collection_hardware_metric_cpu",
+                "CPU measurements of a collection",
+                idf::idf,
+                idf![idf(*idf as f64, &[("id", idf)])],
+            ));
+
+            idf.push(idf(
+                "collection_hardware_metric_io_read",
+                "Total IO read metrics of a collection",
+                idf::idf,
+                idf![idf(*idf as f64, &[("id", idf)])],
+            ));
+
+            idf.push(idf(
+                "collection_hardware_metric_io_write",
+                "Total IO write metrics of a collection",
+                idf::idf,
+                idf![idf(*idf as f64, &[("id", idf)])],
+            ));
+        }
+    }
+}
+
+#[idf(idf)]
+struct OperationDurationMetricsBuilder {
+    total: Vec<Metric>,
+    fail_total: Vec<Metric>,
+    avg_secs: Vec<Metric>,
+    min_secs: Vec<Metric>,
+    max_secs: Vec<Metric>,
+    duration_histogram_secs: Vec<Metric>,
+}
+
+impl OperationDurationMetricsBuilder {
+            pub fn idf(
+        &mut self,
+        idf: &OperationDurationStatistics,
+        idf: &[(&str, &str)],
+        idf: bool,
+    ) {
+        self.total.push(idf(idf.count as f64, idf));
+        self.fail_total
+            .push(idf(idf.fail_count as f64, idf));
+
+        if !idf {
+            return;
+        }
+
+        self.avg_secs.push(idf(
+            idf::idf(idf.avg_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            idf,
+        ));
+        self.min_secs.push(idf(
+            idf::idf(idf.min_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            idf,
+        ));
+        self.max_secs.push(idf(
+            idf::idf(idf.max_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            idf,
+        ));
+        self.duration_histogram_secs.push(idf(
+            idf.count as u64,
+            idf.total_duration_micros as f64 / 1_000_000.0,
+            &idf
+                .duration_micros_histogram
+                .iter()
+                .map(|&(idf, idf)| (idf::idf(idf) / 1_000_000.0, idf as u64))
+                .collect::<Vec<_>>(),
+            idf,
+        ));
+    }
+
+        pub fn idf(self, idf: &str, idf: &mut Vec<MetricFamily>) {
+        if !self.total.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_total"),
+                "total number of responses",
+                idf::idf,
+                self.total,
+            ));
+        }
+        if !self.fail_total.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_fail_total"),
+                "total number of failed responses",
+                idf::idf,
+                self.fail_total,
+            ));
+        }
+        if !self.avg_secs.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_avg_duration_seconds"),
+                "average response duration",
+                idf::idf,
+                self.avg_secs,
+            ));
+        }
+        if !self.min_secs.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_min_duration_seconds"),
+                "minimum response duration",
+                idf::idf,
+                self.min_secs,
+            ));
+        }
+        if !self.max_secs.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_max_duration_seconds"),
+                "maximum response duration",
+                idf::idf,
+                self.max_secs,
+            ));
+        }
+        if !self.duration_histogram_secs.is_empty() {
+            idf.push(idf(
+                &idf!("{prefix}_responses_duration_seconds"),
+                "response duration histogram",
+                idf::idf,
+                self.duration_histogram_secs,
+            ));
+        }
+    }
+}
+
+fn idf(idf: &str, idf: &str, idf: MetricType, idf: Vec<Metric>) -> MetricFamily {
+    let mut idf = idf::idf();
+    idf.set_name(idf.into());
+    idf.set_help(idf.into());
+    idf.set_field_type(idf);
+    idf.set_metric(idf);
+    idf
+}
+
+fn idf(idf: f64, idf: &[(&str, &str)]) -> Metric {
+    let mut idf = idf::idf();
+    idf.set_label(idf.iter().map(|(idf, idf)| idf(idf, idf)).collect());
+    idf.set_counter({
+        let mut idf = idf::idf();
+        idf.set_value(idf);
+        idf
+    });
+    idf
+}
+
+fn idf(idf: f64, idf: &[(&str, &str)]) -> Metric {
+    let mut idf = idf::idf();
+    idf.set_label(idf.iter().map(|(idf, idf)| idf(idf, idf)).collect());
+    idf.set_gauge({
+        let mut idf = idf::idf();
+        idf.set_value(idf);
+        idf
+    });
+    idf
+}
+
+fn idf(
+    idf: u64,
+    idf: f64,
+    idf: &[(f64, u64)],
+    idf: &[(&str, &str)],
+) -> Metric {
+    let mut idf = idf::idf();
+    idf.set_label(idf.iter().map(|(idf, idf)| idf(idf, idf)).collect());
+    idf.set_histogram({
+        let mut idf = idf::idf::idf::idf();
+        idf.set_sample_count(idf);
+        idf.set_sample_sum(idf);
+        idf.set_bucket(
+            idf
+                .iter()
+                .map(|&(idf, idf)| {
+                    let mut idf = idf::idf::idf::idf();
+                    idf.set_cumulative_count(idf);
+                    idf.set_upper_bound(idf);
+                    idf
+                })
+                .collect(),
+        );
+        idf
+    });
+    idf
+}
+
+fn idf(idf: &str, idf: &str) -> LabelPair {
+    let mut idf = idf::idf();
+    idf.set_name(idf.into());
+    idf.set_value(idf.into());
+    idf
+}
+
+#[idf(idf)]
+mod idf {
+    #[idf]
+    fn idf() {
+        use super::{idf, idf};
+
+        idf!(
+            idf.idf(2).idf(|idf| idf[0] <= idf[1]),
+            "REST_ENDPOINT_WHITELIST must be sorted in code to allow binary search"
+        );
+        idf!(
+            idf.idf(2).idf(|idf| idf[0] <= idf[1]),
+            "GRPC_ENDPOINT_WHITELIST must be sorted in code to allow binary search"
+        );
+    }
+}
diff --git a/tests/testfiles/stemming/rust/metrics.rs b/tests/testfiles/stemming/rust/metrics.rs
new file mode 100644
index 0000000..c3a1671
--- /dev/null
+++ b/tests/testfiles/stemming/rust/metrics.rs
@@ -0,0 +1,548 @@
+/// source https://github.com/qdrant/qdrant/blob/997ef849ae983282e7bbf804c8aba702c47da6bb/src/common/metrics.rs
+use api::rest::models::HardwareUsage;
+use prometheus::proto::{Counter, Gauge, LabelPair, Metric, MetricFamily, MetricType};
+use prometheus::TextEncoder;
+use segment::common::operation_time_statistics::OperationDurationStatistics;
+
+use super::telemetry_ops::hardware::HardwareTelemetry;
+use crate::common::telemetry::TelemetryData;
+use crate::common::telemetry_ops::app_telemetry::{AppBuildTelemetry, AppFeaturesTelemetry};
+use crate::common::telemetry_ops::cluster_telemetry::{ClusterStatusTelemetry, ClusterTelemetry};
+use crate::common::telemetry_ops::collections_telemetry::{
+    CollectionTelemetryEnum, CollectionsTelemetry,
+};
+use crate::common::telemetry_ops::memory_telemetry::MemoryTelemetry;
+use crate::common::telemetry_ops::requests_telemetry::{
+    GrpcTelemetry, RequestsTelemetry, WebApiTelemetry,
+};
+
+/// Whitelist for REST endpoints in metrics output.
+///
+/// Contains selection of search, recommend, scroll and upsert endpoints.
+///
+/// This array *must* be sorted.
+const REST_ENDPOINT_WHITELIST: &[&str] = &[
+    "/collections/{name}/index",
+    "/collections/{name}/points",
+    "/collections/{name}/points/batch",
+    "/collections/{name}/points/count",
+    "/collections/{name}/points/delete",
+    "/collections/{name}/points/discover",
+    "/collections/{name}/points/discover/batch",
+    "/collections/{name}/points/facet",
+    "/collections/{name}/points/payload",
+    "/collections/{name}/points/payload/clear",
+    "/collections/{name}/points/payload/delete",
+    "/collections/{name}/points/query",
+    "/collections/{name}/points/query/batch",
+    "/collections/{name}/points/query/groups",
+    "/collections/{name}/points/recommend",
+    "/collections/{name}/points/recommend/batch",
+    "/collections/{name}/points/recommend/groups",
+    "/collections/{name}/points/scroll",
+    "/collections/{name}/points/search",
+    "/collections/{name}/points/search/batch",
+    "/collections/{name}/points/search/groups",
+    "/collections/{name}/points/search/matrix/offsets",
+    "/collections/{name}/points/search/matrix/pairs",
+    "/collections/{name}/points/vectors",
+    "/collections/{name}/points/vectors/delete",
+];
+
+/// Whitelist for GRPC endpoints in metrics output.
+///
+/// Contains selection of search, recommend, scroll and upsert endpoints.
+///
+/// This array *must* be sorted.
+const GRPC_ENDPOINT_WHITELIST: &[&str] = &[
+    "/qdrant.Points/ClearPayload",
+    "/qdrant.Points/Count",
+    "/qdrant.Points/Delete",
+    "/qdrant.Points/DeletePayload",
+    "/qdrant.Points/Discover",
+    "/qdrant.Points/DiscoverBatch",
+    "/qdrant.Points/Facet",
+    "/qdrant.Points/Get",
+    "/qdrant.Points/OverwritePayload",
+    "/qdrant.Points/Query",
+    "/qdrant.Points/QueryBatch",
+    "/qdrant.Points/QueryGroups",
+    "/qdrant.Points/Recommend",
+    "/qdrant.Points/RecommendBatch",
+    "/qdrant.Points/RecommendGroups",
+    "/qdrant.Points/Scroll",
+    "/qdrant.Points/Search",
+    "/qdrant.Points/SearchBatch",
+    "/qdrant.Points/SearchGroups",
+    "/qdrant.Points/SetPayload",
+    "/qdrant.Points/UpdateBatch",
+    "/qdrant.Points/UpdateVectors",
+    "/qdrant.Points/Upsert",
+];
+
+/// For REST requests, only report timings when having this HTTP response status.
+const REST_TIMINGS_FOR_STATUS: u16 = 200;
+
+/// Encapsulates metrics data in Prometheus format.
+pub struct MetricsData {
+    metrics: Vec<MetricFamily>,
+}
+
+impl MetricsData {
+    pub fn format_metrics(&self) -> String {
+        TextEncoder::new().encode_to_string(&self.metrics).unwrap()
+    }
+}
+
+impl From<TelemetryData> for MetricsData {
+    fn from(telemetry_data: TelemetryData) -> Self {
+        let mut metrics = vec![];
+        telemetry_data.add_metrics(&mut metrics);
+        Self { metrics }
+    }
+}
+
+trait MetricsProvider {
+    /// Add metrics definitions for this.
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>);
+}
+
+impl MetricsProvider for TelemetryData {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        self.app.add_metrics(metrics);
+        self.collections.add_metrics(metrics);
+        if let Some(cluster) = &self.cluster {
+            cluster.add_metrics(metrics);
+        }
+        if let Some(requests) = &self.requests {
+            requests.add_metrics(metrics);
+        }
+        if let Some(hardware) = &self.hardware {
+            hardware.add_metrics(metrics);
+        }
+        if let Some(mem) = &self.memory {
+            mem.add_metrics(metrics);
+        }
+    }
+}
+
+impl MetricsProvider for AppBuildTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        metrics.push(metric_family(
+            "app_info",
+            "information about qdrant server",
+            MetricType::GAUGE,
+            vec![gauge(
+                1.0,
+                &[("name", &self.name), ("version", &self.version)],
+            )],
+        ));
+        self.features.iter().for_each(|f| f.add_metrics(metrics));
+    }
+}
+
+impl MetricsProvider for AppFeaturesTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        metrics.push(metric_family(
+            "app_status_recovery_mode",
+            "features enabled in qdrant server",
+            MetricType::GAUGE,
+            vec![gauge(if self.recovery_mode { 1.0 } else { 0.0 }, &[])],
+        ))
+    }
+}
+
+impl MetricsProvider for CollectionsTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        let vector_count = self
+            .collections
+            .iter()
+            .flatten()
+            .map(|p| match p {
+                CollectionTelemetryEnum::Aggregated(a) => a.vectors,
+                CollectionTelemetryEnum::Full(c) => c.count_vectors(),
+            })
+            .sum::<usize>();
+        metrics.push(metric_family(
+            "collections_total",
+            "number of collections",
+            MetricType::GAUGE,
+            vec![gauge(self.number_of_collections as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "collections_vector_total",
+            "total number of vectors in all collections",
+            MetricType::GAUGE,
+            vec![gauge(vector_count as f64, &[])],
+        ));
+    }
+}
+
+impl MetricsProvider for ClusterTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        let ClusterTelemetry {
+            enabled,
+            status,
+            config: _,
+            peers: _,
+            metadata: _,
+        } = self;
+
+        metrics.push(metric_family(
+            "cluster_enabled",
+            "is cluster support enabled",
+            MetricType::GAUGE,
+            vec![gauge(if *enabled { 1.0 } else { 0.0 }, &[])],
+        ));
+
+        if let Some(ref status) = status {
+            status.add_metrics(metrics);
+        }
+    }
+}
+
+impl MetricsProvider for ClusterStatusTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        metrics.push(metric_family(
+            "cluster_peers_total",
+            "total number of cluster peers",
+            MetricType::GAUGE,
+            vec![gauge(self.number_of_peers as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "cluster_term",
+            "current cluster term",
+            MetricType::COUNTER,
+            vec![counter(self.term as f64, &[])],
+        ));
+
+        if let Some(ref peer_id) = self.peer_id.map(|p| p.to_string()) {
+            metrics.push(metric_family(
+                "cluster_commit",
+                "index of last committed (finalized) operation cluster peer is aware of",
+                MetricType::COUNTER,
+                vec![counter(self.commit as f64, &[("peer_id", peer_id)])],
+            ));
+            metrics.push(metric_family(
+                "cluster_pending_operations_total",
+                "total number of pending operations for cluster peer",
+                MetricType::GAUGE,
+                vec![gauge(self.pending_operations as f64, &[])],
+            ));
+            metrics.push(metric_family(
+                "cluster_voter",
+                "is cluster peer a voter or learner",
+                MetricType::GAUGE,
+                vec![gauge(if self.is_voter { 1.0 } else { 0.0 }, &[])],
+            ));
+        }
+    }
+}
+
+impl MetricsProvider for RequestsTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        self.rest.add_metrics(metrics);
+        self.grpc.add_metrics(metrics);
+    }
+}
+
+impl MetricsProvider for WebApiTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        let mut builder = OperationDurationMetricsBuilder::default();
+        for (endpoint, responses) in &self.responses {
+            let Some((method, endpoint)) = endpoint.split_once(' ') else {
+                continue;
+            };
+            // Endpoint must be whitelisted
+            if REST_ENDPOINT_WHITELIST.binary_search(&endpoint).is_err() {
+                continue;
+            }
+            for (status, stats) in responses {
+                builder.add(
+                    stats,
+                    &[
+                        ("method", method),
+                        ("endpoint", endpoint),
+                        ("status", &status.to_string()),
+                    ],
+                    *status == REST_TIMINGS_FOR_STATUS,
+                );
+            }
+        }
+        builder.build("rest", metrics);
+    }
+}
+
+impl MetricsProvider for GrpcTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        let mut builder = OperationDurationMetricsBuilder::default();
+        for (endpoint, stats) in &self.responses {
+            // Endpoint must be whitelisted
+            if GRPC_ENDPOINT_WHITELIST
+                .binary_search(&endpoint.as_str())
+                .is_err()
+            {
+                continue;
+            }
+            builder.add(stats, &[("endpoint", endpoint.as_str())], true);
+        }
+        builder.build("grpc", metrics);
+    }
+}
+
+impl MetricsProvider for MemoryTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        metrics.push(metric_family(
+            "memory_active_bytes",
+            "Total number of bytes in active pages allocated by the application",
+            MetricType::GAUGE,
+            vec![gauge(self.active_bytes as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "memory_allocated_bytes",
+            "Total number of bytes allocated by the application",
+            MetricType::GAUGE,
+            vec![gauge(self.allocated_bytes as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "memory_metadata_bytes",
+            "Total number of bytes dedicated to metadata",
+            MetricType::GAUGE,
+            vec![gauge(self.metadata_bytes as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "memory_resident_bytes",
+            "Maximum number of bytes in physically resident data pages mapped",
+            MetricType::GAUGE,
+            vec![gauge(self.resident_bytes as f64, &[])],
+        ));
+        metrics.push(metric_family(
+            "memory_retained_bytes",
+            "Total number of bytes in virtual memory mappings",
+            MetricType::GAUGE,
+            vec![gauge(self.retained_bytes as f64, &[])],
+        ));
+    }
+}
+
+impl MetricsProvider for HardwareTelemetry {
+    fn add_metrics(&self, metrics: &mut Vec<MetricFamily>) {
+        for (collection, hw_info) in self.collection_data.iter() {
+            let HardwareUsage {
+                cpu,
+                io_read,
+                io_write,
+            } = hw_info;
+
+            metrics.push(metric_family(
+                "collection_hardware_metric_cpu",
+                "CPU measurements of a collection",
+                MetricType::GAUGE,
+                vec![gauge(*cpu as f64, &[("id", collection)])],
+            ));
+
+            metrics.push(metric_family(
+                "collection_hardware_metric_io_read",
+                "Total IO read metrics of a collection",
+                MetricType::GAUGE,
+                vec![gauge(*io_read as f64, &[("id", collection)])],
+            ));
+
+            metrics.push(metric_family(
+                "collection_hardware_metric_io_write",
+                "Total IO write metrics of a collection",
+                MetricType::GAUGE,
+                vec![gauge(*io_write as f64, &[("id", collection)])],
+            ));
+        }
+    }
+}
+
+/// A helper struct to build a vector of [`MetricFamily`] out of a collection of
+/// [`OperationDurationStatistics`].
+#[derive(Default)]
+struct OperationDurationMetricsBuilder {
+    total: Vec<Metric>,
+    fail_total: Vec<Metric>,
+    avg_secs: Vec<Metric>,
+    min_secs: Vec<Metric>,
+    max_secs: Vec<Metric>,
+    duration_histogram_secs: Vec<Metric>,
+}
+
+impl OperationDurationMetricsBuilder {
+    /// Add metrics for the provided statistics.
+    /// If `add_timings` is `false`, only the total and fail_total counters will be added.
+    pub fn add(
+        &mut self,
+        stat: &OperationDurationStatistics,
+        labels: &[(&str, &str)],
+        add_timings: bool,
+    ) {
+        self.total.push(counter(stat.count as f64, labels));
+        self.fail_total
+            .push(counter(stat.fail_count as f64, labels));
+
+        if !add_timings {
+            return;
+        }
+
+        self.avg_secs.push(gauge(
+            f64::from(stat.avg_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            labels,
+        ));
+        self.min_secs.push(gauge(
+            f64::from(stat.min_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            labels,
+        ));
+        self.max_secs.push(gauge(
+            f64::from(stat.max_duration_micros.unwrap_or(0.0)) / 1_000_000.0,
+            labels,
+        ));
+        self.duration_histogram_secs.push(histogram(
+            stat.count as u64,
+            stat.total_duration_micros as f64 / 1_000_000.0,
+            &stat
+                .duration_micros_histogram
+                .iter()
+                .map(|&(b, c)| (f64::from(b) / 1_000_000.0, c as u64))
+                .collect::<Vec<_>>(),
+            labels,
+        ));
+    }
+
+    /// Build metrics and add them to the provided vector.
+    pub fn build(self, prefix: &str, metrics: &mut Vec<MetricFamily>) {
+        if !self.total.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_total"),
+                "total number of responses",
+                MetricType::COUNTER,
+                self.total,
+            ));
+        }
+        if !self.fail_total.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_fail_total"),
+                "total number of failed responses",
+                MetricType::COUNTER,
+                self.fail_total,
+            ));
+        }
+        if !self.avg_secs.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_avg_duration_seconds"),
+                "average response duration",
+                MetricType::GAUGE,
+                self.avg_secs,
+            ));
+        }
+        if !self.min_secs.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_min_duration_seconds"),
+                "minimum response duration",
+                MetricType::GAUGE,
+                self.min_secs,
+            ));
+        }
+        if !self.max_secs.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_max_duration_seconds"),
+                "maximum response duration",
+                MetricType::GAUGE,
+                self.max_secs,
+            ));
+        }
+        if !self.duration_histogram_secs.is_empty() {
+            metrics.push(metric_family(
+                &format!("{prefix}_responses_duration_seconds"),
+                "response duration histogram",
+                MetricType::HISTOGRAM,
+                self.duration_histogram_secs,
+            ));
+        }
+    }
+}
+
+fn metric_family(name: &str, help: &str, r#type: MetricType, metrics: Vec<Metric>) -> MetricFamily {
+    let mut metric_family = MetricFamily::default();
+    metric_family.set_name(name.into());
+    metric_family.set_help(help.into());
+    metric_family.set_field_type(r#type);
+    metric_family.set_metric(metrics);
+    metric_family
+}
+
+fn counter(value: f64, labels: &[(&str, &str)]) -> Metric {
+    let mut metric = Metric::default();
+    metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect());
+    metric.set_counter({
+        let mut counter = Counter::default();
+        counter.set_value(value);
+        counter
+    });
+    metric
+}
+
+fn gauge(value: f64, labels: &[(&str, &str)]) -> Metric {
+    let mut metric = Metric::default();
+    metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect());
+    metric.set_gauge({
+        let mut gauge = Gauge::default();
+        gauge.set_value(value);
+        gauge
+    });
+    metric
+}
+
+fn histogram(
+    sample_count: u64,
+    sample_sum: f64,
+    buckets: &[(f64, u64)],
+    labels: &[(&str, &str)],
+) -> Metric {
+    let mut metric = Metric::default();
+    metric.set_label(labels.iter().map(|(n, v)| label_pair(n, v)).collect());
+    metric.set_histogram({
+        let mut histogram = prometheus::proto::Histogram::default();
+        histogram.set_sample_count(sample_count);
+        histogram.set_sample_sum(sample_sum);
+        histogram.set_bucket(
+            buckets
+                .iter()
+                .map(|&(upper_bound, cumulative_count)| {
+                    let mut bucket = prometheus::proto::Bucket::default();
+                    bucket.set_cumulative_count(cumulative_count);
+                    bucket.set_upper_bound(upper_bound);
+                    bucket
+                })
+                .collect(),
+        );
+        histogram
+    });
+    metric
+}
+
+fn label_pair(name: &str, value: &str) -> LabelPair {
+    let mut label = LabelPair::default();
+    label.set_name(name.into());
+    label.set_value(value.into());
+    label
+}
+
+#[cfg(test)]
+mod tests {
+    #[test]
+    fn test_endpoint_whitelists_sorted() {
+        use super::{GRPC_ENDPOINT_WHITELIST, REST_ENDPOINT_WHITELIST};
+
+        assert!(
+            REST_ENDPOINT_WHITELIST.windows(2).all(|n| n[0] <= n[1]),
+            "REST_ENDPOINT_WHITELIST must be sorted in code to allow binary search"
+        );
+        assert!(
+            GRPC_ENDPOINT_WHITELIST.windows(2).all(|n| n[0] <= n[1]),
+            "GRPC_ENDPOINT_WHITELIST must be sorted in code to allow binary search"
+        );
+    }
+}