diff --git a/.github/workflows/python.yaml b/.github/workflows/python.yaml new file mode 100644 index 0000000..20f28bf --- /dev/null +++ b/.github/workflows/python.yaml @@ -0,0 +1,141 @@ +name: python + +on: [push, pull_request, workflow_dispatch] + +permissions: + contents: read + +defaults: + run: + working-directory: python + +jobs: + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Install dependencies + run: | + pip install --upgrade pip wheel setuptools + pip install -r dev/requirements.txt + - name: Build wheel + uses: PyO3/maturin-action@v1 + with: + working-directory: python + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Install wheel + run: pip install pycases --find-links dist --force-reinstall + - name: Test + run: pytest --benchmark-disable + + linux: + runs-on: ubuntu-latest + needs: test + strategy: + matrix: + target: [x86_64, x86, aarch64, armv7, s390x, ppc64le] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + manylinux: auto + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + windows: + runs-on: windows-latest + needs: test + strategy: + matrix: + target: [x64, x86] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + architecture: ${{ matrix.target }} + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + macos: + runs-on: macos-latest + needs: test + strategy: + matrix: + target: [x86_64, aarch64] + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v4 + with: + python-version: '3.11' + - name: Build wheels + uses: PyO3/maturin-action@v1 + with: + working-directory: python + target: ${{ matrix.target }} + args: --release --out dist --find-interpreter + sccache: 'true' + - name: Upload wheels + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + sdist: + runs-on: ubuntu-latest + needs: test + steps: + - uses: actions/checkout@v3 + - name: Build sdist + uses: PyO3/maturin-action@v1 + with: + working-directory: python + command: sdist + args: --out dist + - name: Upload sdist + uses: actions/upload-artifact@v3 + with: + name: wheels + path: dist + + release: + name: Release + runs-on: ubuntu-latest + if: "startsWith(github.ref, 'refs/tags/')" + needs: [linux, windows, macos, sdist] + steps: + - uses: actions/download-artifact@v3 + with: + name: wheels + - name: Publish to PyPI + uses: PyO3/maturin-action@v1 + env: + MATURIN_PYPI_TOKEN: ${{ secrets.PYPI_TOKEN }} + with: + command: upload + args: --non-interactive --skip-existing * diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..969d061 --- /dev/null +++ b/LICENSE @@ -0,0 +1,17 @@ +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/python/.gitignore b/python/.gitignore new file mode 100644 index 0000000..af3ca5e --- /dev/null +++ b/python/.gitignore @@ -0,0 +1,72 @@ +/target + +# Byte-compiled / optimized / DLL files +__pycache__/ +.pytest_cache/ +*.py[cod] + +# C extensions +*.so + +# Distribution / packaging +.Python +.venv/ +env/ +bin/ +build/ +develop-eggs/ +dist/ +eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +include/ +man/ +venv/ +*.egg-info/ +.installed.cfg +*.egg + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt +pip-selfcheck.json + +# Unit test / coverage reports +htmlcov/ +.tox/ +.coverage +.cache +nosetests.xml +coverage.xml + +# Translations +*.mo + +# Mr Developer +.mr.developer.cfg +.project +.pydevproject + +# Rope +.ropeproject + +# Django stuff: +*.log +*.pot + +.DS_Store + +# Sphinx documentation +docs/_build/ + +# PyCharm +.idea/ + +# VSCode +.vscode/ + +# Pyenv +.python-version \ No newline at end of file diff --git a/python/Cargo.lock b/python/Cargo.lock new file mode 100644 index 0000000..fcbae74 --- /dev/null +++ b/python/Cargo.lock @@ -0,0 +1,273 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 3 + +[[package]] +name = "autocfg" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + +[[package]] +name = "cases" +version = "0.0.0" +dependencies = [ + "pyo3", +] + +[[package]] +name = "cfg-if" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" + +[[package]] +name = "indoc" +version = "1.0.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bfa799dd5ed20a7e349f3b4639aa80d74549c81716d9ec4f994c9b5815598306" + +[[package]] +name = "libc" +version = "0.2.148" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9cdc71e17332e86d2e1d38c1f99edcb6288ee11b815fb1a4b049eaa2114d369b" + +[[package]] +name = "lock_api" +version = "0.4.10" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1cc9717a20b1bb222f333e6a92fd32f7d8a18ddc5a3191a11af45dcbf4dcd16" +dependencies = [ + "autocfg", + "scopeguard", +] + +[[package]] +name = "memoffset" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5a634b1c61a95585bd15607c6ab0c4e5b226e695ff2800ba0cdccddf208c406c" +dependencies = [ + "autocfg", +] + +[[package]] +name = "once_cell" +version = "1.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dd8b5dd2ae5ed71462c540258bedcb51965123ad7e7ccf4b9a8cafaa4a63576d" + +[[package]] +name = "parking_lot" +version = "0.12.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3742b2c103b9f06bc9fff0a37ff4912935851bee6d36f3c02bcc755bcfec228f" +dependencies = [ + "lock_api", + "parking_lot_core", +] + +[[package]] +name = "parking_lot_core" +version = "0.9.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93f00c865fe7cabf650081affecd3871070f26767e7b2070a3ffae14c654b447" +dependencies = [ + "cfg-if", + "libc", + "redox_syscall", + "smallvec", + "windows-targets", +] + +[[package]] +name = "proc-macro2" +version = "1.0.66" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18fb31db3f9bddb2ea821cde30a9f70117e3f119938b5ee630b7403aa6e2ead9" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "pyo3" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e681a6cfdc4adcc93b4d3cf993749a4552018ee0a9b65fc0ccfad74352c72a38" +dependencies = [ + "cfg-if", + "indoc", + "libc", + "memoffset", + "parking_lot", + "pyo3-build-config", + "pyo3-ffi", + "pyo3-macros", + "unindent", +] + +[[package]] +name = "pyo3-build-config" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "076c73d0bc438f7a4ef6fdd0c3bb4732149136abd952b110ac93e4edb13a6ba5" +dependencies = [ + "once_cell", + "target-lexicon", +] + +[[package]] +name = "pyo3-ffi" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e53cee42e77ebe256066ba8aa77eff722b3bb91f3419177cf4cd0f304d3284d9" +dependencies = [ + "libc", + "pyo3-build-config", +] + +[[package]] +name = "pyo3-macros" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dfeb4c99597e136528c6dd7d5e3de5434d1ceaf487436a3f03b2d56b6fc9efd1" +dependencies = [ + "proc-macro2", + "pyo3-macros-backend", + "quote", + "syn", +] + +[[package]] +name = "pyo3-macros-backend" +version = "0.19.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "947dc12175c254889edc0c02e399476c2f652b4b9ebd123aa655c224de259536" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "quote" +version = "1.0.33" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "redox_syscall" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "567664f262709473930a4bf9e51bf2ebf3348f2e748ccc50dea20646858f8f29" +dependencies = [ + "bitflags", +] + +[[package]] +name = "scopeguard" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" + +[[package]] +name = "smallvec" +version = "1.11.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "62bb4feee49fdd9f707ef802e22365a35de4b7b299de4763d44bfea899442ff9" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "target-lexicon" +version = "0.12.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d0e916b1148c8e263850e1ebcbd046f333e0683c724876bb0da63ea4373dc8a" + +[[package]] +name = "unicode-ident" +version = "1.0.12" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" + +[[package]] +name = "unindent" +version = "0.1.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e1766d682d402817b5ac4490b3c3002d91dfa0d22812f341609f97b08757359c" + +[[package]] +name = "windows-targets" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9a2fa6e2155d7247be68c096456083145c183cbbbc2764150dda45a87197940c" +dependencies = [ + "windows_aarch64_gnullvm", + "windows_aarch64_msvc", + "windows_i686_gnu", + "windows_i686_msvc", + "windows_x86_64_gnu", + "windows_x86_64_gnullvm", + "windows_x86_64_msvc", +] + +[[package]] +name = "windows_aarch64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2b38e32f0abccf9987a4e3079dfb67dcd799fb61361e53e2882c3cbaf0d905d8" + +[[package]] +name = "windows_aarch64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dc35310971f3b2dbbf3f0690a219f40e2d9afcf64f9ab7cc1be722937c26b4bc" + +[[package]] +name = "windows_i686_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a75915e7def60c94dcef72200b9a8e58e5091744960da64ec734a6c6e9b3743e" + +[[package]] +name = "windows_i686_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8f55c233f70c4b27f66c523580f78f1004e8b5a8b659e05a4eb49d4166cca406" + +[[package]] +name = "windows_x86_64_gnu" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "53d40abd2583d23e4718fddf1ebec84dbff8381c07cae67ff7768bbf19c6718e" + +[[package]] +name = "windows_x86_64_gnullvm" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0b7b52767868a23d5bab768e390dc5f5c55825b6d30b86c844ff2dc7414044cc" + +[[package]] +name = "windows_x86_64_msvc" +version = "0.48.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ed94fce61571a4006852b7389a063ab983c02eb1bb37b47f8272ce92d06d9538" diff --git a/python/Cargo.toml b/python/Cargo.toml new file mode 100644 index 0000000..b49f8e5 --- /dev/null +++ b/python/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "cases" +version = "0.0.0" +edition = "2021" +publish = false +include = ["/src/**/*", "/*.py", "/*.pyi", "/LICENSE", "/README.md"] + +[lib] +name = "cases" +crate-type = ["cdylib"] + +[dependencies] +pyo3 = "0.19.0" diff --git a/python/README.md b/python/README.md new file mode 100644 index 0000000..0cffd49 --- /dev/null +++ b/python/README.md @@ -0,0 +1,101 @@ +# pycases + +[![PyPI version](https://badgers.space/pypi/version/pycases)](https://pypi.org/project/pycases) +[![License](https://badgers.space/github/license/rossmacarthur/pycases)](https://github.com/rossmacarthur/pycases#license) +[![Build Status](https://badgers.space/github/checks/rossmacarthur/pycases/trunk?label=build)](https://github.com/rossmacarthur/pycases/actions/workflows/python.yaml) + +A case conversion library for Python. + +## Features + +- Automatic case detection, no need to specify the input case +- Extremely fast, written in Rust ✨ +- Support for Unicode characters +- Support for providing acronyms in title case + +**Supported cases** + +| Function | Output | +| :---------------------------- | :--------------------- | +| `cases.to_camel(s)` | `camelCase` | +| `cases.to_pascal(s)` | `PascalCase` | +| `cases.to_snake(s)` | `snake_case` | +| `cases.to_screaming_snake(s)` | `SCREAMING_SNAKE_CASE` | +| `cases.to_kebab(s)` | `kebab-case` | +| `cases.to_screaming_kebab(s)` | `SCREAMING-KEBAB-CASE` | +| `cases.to_train(s)` | `Train-Case` | +| `cases.to_lower(s)` | `lower case` | +| `cases.to_title(s)` | `Title Case` | +| `cases.to_upper(s)` | `UPPER CASE` | + + +## Getting started + +Install using + +```sh +pip install pycases +``` + +Now convert a string using the relevant function. + +```python +import cases + +cases.to_snake("XMLHttpRequest") # returns "xml_http_request" +``` + +## Details + +Each of the provided functions using the same underlying implementation which +does the following: +- Divide the input string into words +- Convert each word as required +- Join the words back together optionally with a separator + +Word boundaries are defined as follows: + +- A set of consecutive Unicode non-letter and non-number characters. + + For example: 'foo _bar' is two words (foo and bar) + +- A transition from a lowercase letter to an uppercase letter. + + For example: fooBar is two words (foo and Bar) + +- A transition from multiple uppercase letters to a single uppercase letter + followed by lowercase letters. + + For example: FOOBar is two words (FOO and Bar) + +Functions where the transform is "title" accept an optional `acronyms` argument, +which is a mapping of lowercase words to their output. For example: + +```python +>>> cases.to_pascal("xml_http_request", acronyms={"xml": "XML"}) +'XMLHttpRequest' +>>> cases.to_pascal("xml_http_request", acronyms={"xml": "XML", "http": "HTTP"}) +'XMLHTTPRequest' +``` + +## Benchmarks + +A simple benchmark against various other libraries is provided in +[./benches](./benches). The following table shows the results when run on my +Macbook M2 Max. + +| Library | Min (µs) | Max (µs) | Mean (µs) | +| :------------------------ | --------: | --------: | ------------: | +| cases | 26.666 | 176.834 | **30.909** | +| pyheck | 51.000 | 131.416 | **53.565** | +| pure python | 63.583 | 108.125 | **65.075** | +| re | 81.916 | 171.000 | **87.856** | +| stringcase | 99.250 | 222.292 | **102.197** | +| pydantic.alias_generators | 182.000 | 304.458 | **189.063** | +| inflection | 229.750 | 360.792 | **239.153** | +| caseconversion | 1,430.042 | 1,838.375 | **1,559.019** | + +## License + +This project is licensed under the terms of the MIT license. See +[LICENSE](LICENSE) for more details. diff --git a/python/benches/test_bench.py b/python/benches/test_bench.py new file mode 100644 index 0000000..2dd9475 --- /dev/null +++ b/python/benches/test_bench.py @@ -0,0 +1,60 @@ +from pytest_benchmark.fixture import BenchmarkFixture + + +LEN = 100 +INPUT = "thisIsACamelCaseString" * LEN +EXPECT = "this_is_a_camel_case_string" * LEN + + +def test_bench_to_snake_pure_python(benchmark: BenchmarkFixture): + def to_snake(s: str) -> str: + return "".join(["_" + c.lower() if c.isupper() else c for c in s]).lstrip("_") + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_python_re(benchmark: BenchmarkFixture): + import re + + pattern = re.compile(r"(? str: + return pattern.sub("_", s).lower() + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_cases(benchmark: BenchmarkFixture): + from cases import to_snake + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_caseconversion(benchmark: BenchmarkFixture): + from case_conversion import snakecase as to_snake + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_inflection(benchmark: BenchmarkFixture): + from inflection import underscore as to_snake + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_pydantic(benchmark: BenchmarkFixture): + from pydantic.alias_generators import to_snake + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_pyheck(benchmark: BenchmarkFixture): + from pyheck import snake as to_snake + + assert benchmark(to_snake, INPUT) == EXPECT + + +def test_bench_to_snake_stringcase(benchmark: BenchmarkFixture): + from stringcase import snakecase as to_snake + + assert benchmark(to_snake, INPUT) == EXPECT diff --git a/python/cases/__init__.py b/python/cases/__init__.py new file mode 100644 index 0000000..12e0323 --- /dev/null +++ b/python/cases/__init__.py @@ -0,0 +1,4 @@ +from .cases import * + +__doc__ = cases.__doc__ +__all__ = cases.__all__ diff --git a/python/cases/__init__.pyi b/python/cases/__init__.pyi new file mode 100644 index 0000000..fb69224 --- /dev/null +++ b/python/cases/__init__.pyi @@ -0,0 +1,209 @@ +""" +A case conversion library with Unicode support, implemented in Rust. + +Each of the provided functions using the same underlying implementation which +does the following: +- Divide the input string into words +- Convert each word as required +- Join the words back together optionally with a separator + +Word boundaries are defined as follows: + +- A set of consecutive Unicode non-letter and non-number characters. + + For example: 'foo _bar' is two words (foo and bar) + +- A transition from a lowercase letter to an uppercase letter. + + For example: fooBar is two words (foo and Bar) + +- A transition from multiple uppercase letters to a single uppercase letter + followed by lowercase letters. + + For example: FOOBar is two words (FOO and Bar) + +""" + +from typing import Optional + +def to_camel(s: str, acronyms: Optional[dict[str, str]] = None) -> str: + """ + Convert a string to 'camelCase'. + + The first word will be converted to lowercase and subsequent words to title + case. See module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_camel("foo_bar") + 'fooBar' + + The `acronyms` argument is a mapping of lowercase words to an override + value. This value will be used instead of the camel case conversion. + + For example: + + >>> cases.to_camel("xml http request", acronyms={"http": "HTTP"}) + 'xmlHTTPRequest' + + """ + ... + +def to_pascal(s: str, acronyms: Optional[dict[str, str]] = None) -> str: + """ + Convert a string to 'PascalCase'. + + Each word will be converted to title case. See module documentation for how + word boundaries are defined. + + For example: + + >>> cases.to_pascal("foo_bar") + 'FooBar' + + The `acronyms` argument is a mapping of lowercase words to an override + value. This value will be used instead of the pascal case conversion. + + For example: + + >>> cases.to_pascal("xml http request", acronyms={"http": "HTTP"}) + 'XmlHTTPRequest' + + """ + ... + +def to_snake(s: str) -> str: + """ + Convert a string to 'snake_case'. + + Each word will be converted to lower case and separated with an underscore. + See module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_snake("fooBar") + 'foo_bar' + + """ + ... + +def to_screaming_snake(s: str) -> str: + """ + Convert a string to 'SCREAMING_SNAKE_CASE'. + + Each word will be converted to upper case and separated with an underscore. + See module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_screaming_snake("fooBar") + 'FOO_BAR' + + """ + ... + +def to_kebab(s: str) -> str: + """ + Convert a string to 'kebab-case'. + + Each word will be converted to lower case and separated with a hyphen. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_kebab("fooBar") + 'foo-bar' + + """ + ... + +def to_screaming_kebab(s: str) -> str: + """ + Convert a string to 'SCREAMING-KEBAB-CASE'. + + Each word will be converted to upper case and separated with a hyphen. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_screaming_kebab("fooBar") + 'FOO-BAR' + + """ + ... + +def to_train(s: str, acronyms: Optional[dict[str, str]] = None) -> str: + """ + Convert a string to 'Train-Case'. + + Each word will be converted to title case and separated with a hyphen. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_train("fooBar") + 'Foo-Bar' + + The `acronyms` argument is a mapping of lowercase words to an override + value. This value will be used instead of the train case conversion. + + For example: + + >>> cases.to_train("xml http request", acronyms={"http": "HTTP"}) + 'Xml-HTTP-Request' + + """ + ... + +def to_lower(s: str) -> str: + """ + Convert a string to 'lower case'. + + Each word will be converted to lower case and separated with a space. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_lower("FooBar") + 'foo bar' + + """ + ... + +def to_title(s: str, acronyms: Optional[dict[str, str]] = None) -> str: + """ + Convert a string to 'Title Case'. + + Each word will be converted to title case and separated with a space. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_title("foo_bar") + 'Foo Bar' + + The `acronyms` argument is a mapping of lowercase words to an override + value. This value will be used instead of the title case conversion. + + For example: + + >>> cases.to_title("xml_http_request", acronyms={"http": "HTTP"}) + 'Xml HTTP Request' + + """ + ... + +def to_upper(s: str) -> str: + """ + Convert a string to 'UPPER CASE'. + + Each word will be converted to upper case and separated with a space. See + module documentation for how word boundaries are defined. + + For example: + + >>> cases.to_upper("fooBar") + 'FOO BAR' + + """ + ... diff --git a/python/cases/py.typed b/python/cases/py.typed new file mode 100644 index 0000000..e69de29 diff --git a/python/dev/requirements.in b/python/dev/requirements.in new file mode 100644 index 0000000..2b11501 --- /dev/null +++ b/python/dev/requirements.in @@ -0,0 +1,10 @@ +maturin +ruff +pytest +pytest-benchmark + +case_conversion +inflection +pydantic +pyheck +stringcase diff --git a/python/dev/requirements.txt b/python/dev/requirements.txt new file mode 100644 index 0000000..f3a31ba --- /dev/null +++ b/python/dev/requirements.txt @@ -0,0 +1,40 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile -o dev/requirements.txt dev/requirements.in +annotated-types==0.7.0 + # via pydantic +case-conversion==2.1.0 + # via -r dev/requirements.in +inflection==0.5.1 + # via -r dev/requirements.in +iniconfig==2.0.0 + # via pytest +maturin==1.6.0 + # via -r dev/requirements.in +packaging==23.2 + # via pytest +pluggy==1.3.0 + # via pytest +py-cpuinfo==9.0.0 + # via pytest-benchmark +pydantic==2.8.2 + # via -r dev/requirements.in +pydantic-core==2.20.1 + # via pydantic +pyheck==0.1.5 + # via -r dev/requirements.in +pytest==7.4.3 + # via + # -r dev/requirements.in + # pytest-benchmark +pytest-benchmark==4.0.0 + # via -r dev/requirements.in +regex==2023.10.3 + # via case-conversion +ruff==0.1.3 + # via -r dev/requirements.in +stringcase==1.2.0 + # via -r dev/requirements.in +typing-extensions==4.12.2 + # via + # pydantic + # pydantic-core diff --git a/python/pyproject.toml b/python/pyproject.toml new file mode 100644 index 0000000..1abfdf2 --- /dev/null +++ b/python/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["maturin>=1.2,<2.0"] +build-backend = "maturin" + +[tool.maturin] +features = ["pyo3/extension-module"] + +[project] +name = "pycases" +version = "0.1.3" +description = "A case conversion library with Unicode support" +requires-python = ">=3.7" +license = { text = "MIT" } +authors = [{ name = "Ross MacArthur", email = "ross@macarthur.io" }] +readme = "README.md" +keywords = ["convert", "case", "snake", "camel", "pascal"] +classifiers = [ + "Development Status :: 3 - Alpha", + "Intended Audience :: Developers", + "License :: OSI Approved :: MIT License", + "Natural Language :: English", + "Operating System :: OS Independent", + "Programming Language :: Rust", + "Programming Language :: Python", + "Programming Language :: Python :: 3.7", + "Programming Language :: Python :: 3.8", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", +] + +[project.urls] +Homepage = "https://github.com/rossmacarthur/pycases" +Repository = "https://github.com/rossmacarthur/pycases" diff --git a/python/src/lib.rs b/python/src/lib.rs new file mode 100644 index 0000000..06ad9e5 --- /dev/null +++ b/python/src/lib.rs @@ -0,0 +1,130 @@ +mod transform; + +use std::fmt; +use std::fmt::Write; + +use pyo3::prelude::*; +use pyo3::types::PyDict; + +use crate::transform::{fmt_lower, fmt_title, fmt_upper}; + +/// Convert a string to 'camelCase'. +#[pyfunction] +#[pyo3(signature = (s, /, acronyms = None))] +fn to_camel(s: &str, acronyms: Option<&PyDict>) -> String { + let mut first = true; + let word_fn = |buf: &mut String, s: &str| -> fmt::Result { + if first { + first = false; + fmt_lower(buf, s) + } else { + match get_acronym(s, acronyms) { + Some(acronym) => write!(buf, "{}", acronym), + None => fmt_title(buf, s), + } + } + }; + + transform::to_string(s, word_fn, "") +} + +/// Convert a string to 'PascalCase'. +#[pyfunction] +#[pyo3(signature = (s, /, acronyms = None))] +fn to_pascal(s: &str, acronyms: Option<&PyDict>) -> String { + let word_fn = |buf: &mut String, s: &str| -> fmt::Result { + match get_acronym(s, acronyms) { + Some(acronym) => write!(buf, "{}", acronym), + None => fmt_title(buf, s), + } + }; + + transform::to_string(s, word_fn, "") +} + +/// Convert a string to 'snake_case'. +#[pyfunction] +fn to_snake(s: &str) -> String { + transform::to_string(s, fmt_lower, "_") +} + +/// Convert a string to 'SCREAMING_SNAKE_CASE'. +#[pyfunction] +fn to_screaming_snake(s: &str) -> String { + transform::to_string(s, fmt_upper, "_") +} + +/// Convert a string to 'kebab-case'. +#[pyfunction] +fn to_kebab(s: &str) -> String { + transform::to_string(s, fmt_lower, "-") +} + +/// Convert a string to 'SCREAMING-KEBAB-CASE'. +#[pyfunction] +fn to_screaming_kebab(s: &str) -> String { + transform::to_string(s, fmt_upper, "-") +} + +/// Convert a string to 'Train-Case'. +#[pyfunction] +#[pyo3(signature = (s, /, acronyms = None))] +fn to_train(s: &str, acronyms: Option<&PyDict>) -> String { + let word_fn = |buf: &mut String, s: &str| -> fmt::Result { + match get_acronym(s, acronyms) { + Some(acronym) => write!(buf, "{}", acronym), + None => fmt_title(buf, s), + } + }; + + transform::to_string(s, word_fn, "-") +} + +/// Convert a string to 'lower case'. +#[pyfunction] +fn to_lower(s: &str) -> String { + transform::to_string(s, fmt_lower, " ") +} + +/// Convert a string to 'Title Case'. +#[pyfunction] +#[pyo3(signature = (s, /, acronyms = None))] +fn to_title(s: &str, acronyms: Option<&PyDict>) -> String { + let word_fn = |buf: &mut String, s: &str| -> fmt::Result { + match get_acronym(s, acronyms) { + Some(acronym) => write!(buf, "{}", acronym), + None => fmt_title(buf, s), + } + }; + + transform::to_string(s, word_fn, " ") +} + +/// Convert a string to 'UPPER CASE'. +#[pyfunction] +fn to_upper(s: &str) -> String { + transform::to_string(s, fmt_upper, " ") +} + +fn get_acronym<'a>(s: &str, acronyms: Option<&'a PyDict>) -> Option<&'a str> { + acronyms + .as_ref() + .and_then(|d| d.get_item(s.to_lowercase())) + .and_then(|v| v.extract::<&str>().ok()) +} + +/// A case conversion library with Unicode support, implemented in Rust. +#[pymodule] +fn cases(_py: Python, m: &PyModule) -> PyResult<()> { + m.add_function(wrap_pyfunction!(to_camel, m)?)?; + m.add_function(wrap_pyfunction!(to_pascal, m)?)?; + m.add_function(wrap_pyfunction!(to_snake, m)?)?; + m.add_function(wrap_pyfunction!(to_screaming_snake, m)?)?; + m.add_function(wrap_pyfunction!(to_kebab, m)?)?; + m.add_function(wrap_pyfunction!(to_screaming_kebab, m)?)?; + m.add_function(wrap_pyfunction!(to_train, m)?)?; + m.add_function(wrap_pyfunction!(to_lower, m)?)?; + m.add_function(wrap_pyfunction!(to_title, m)?)?; + m.add_function(wrap_pyfunction!(to_upper, m)?)?; + Ok(()) +} diff --git a/python/src/transform.rs b/python/src/transform.rs new file mode 100644 index 0000000..43424e0 --- /dev/null +++ b/python/src/transform.rs @@ -0,0 +1,128 @@ +use std::fmt; +use std::fmt::Write; + +#[derive(Copy, Clone, PartialEq)] +enum State { + Unknown, + Delims, + Lower, + Upper, +} + +/// Transforms reconstructs the string into a new string using the given +/// functions. +pub fn to_string(s: &str, word_fn: F, delim: &str) -> String +where + F: FnMut(&mut String, &str) -> fmt::Result, +{ + let mut buf = String::with_capacity(s.len()); + transform(s, &mut buf, word_fn, delim).unwrap(); + buf +} + +/// Transform reconstructs the string into the given buffer using the given +/// functions. +pub fn transform(s: &str, buf: &mut B, mut word_fn: W, delim: &str) -> fmt::Result +where + B: Write, + W: FnMut(&mut B, &str) -> fmt::Result, +{ + // when we are on the first word + let mut first = true; + // the byte index of the start of the current word + let mut w0 = 0; + // the byte index of the end of the current word + let mut w1 = None; + // the current state of the word boundary machine + let mut state = State::Unknown; + + let mut write = |w0: usize, w1: usize| -> fmt::Result { + if w1 - w0 > 0 { + if first { + first = false; + } else { + buf.write_str(delim)?; + } + word_fn(buf, &s[w0..w1])?; + } + Ok(()) + }; + + let mut iter = s.char_indices().peekable(); + + while let Some((i, c)) = iter.next() { + if !c.is_alphanumeric() { + state = State::Delims; + w1 = w1.or(Some(i)); + continue; + } + + let is_lower = c.is_lowercase(); + let is_upper = c.is_uppercase(); + + match state { + State::Delims => { + if let Some(w1) = w1 { + write(w0, w1)?; + } + w0 = i; + w1 = None; + } + State::Lower if is_upper => { + write(w0, i)?; + w0 = i; + } + State::Upper + if is_upper && matches!(iter.peek(), Some((_, c2)) if c2.is_lowercase()) => + { + write(w0, i)?; + w0 = i; + } + _ => {} + } + + if is_lower { + state = State::Lower; + } else if is_upper { + state = State::Upper; + } else if state == State::Delims { + state = State::Unknown; + } + } + + match state { + State::Delims => { + if let Some(w1) = w1 { + write(w0, w1)?; + } + } + _ => write(w0, s.len())?, + } + + Ok(()) +} + +pub fn fmt_lower(buf: &mut W, s: &str) -> fmt::Result { + for c in s.chars() { + write!(buf, "{}", c.to_lowercase())? + } + Ok(()) +} + +pub fn fmt_upper(buf: &mut W, s: &str) -> fmt::Result { + for c in s.chars() { + write!(buf, "{}", c.to_uppercase())? + } + Ok(()) +} + +pub fn fmt_title(buf: &mut W, s: &str) -> fmt::Result { + let mut iter = s.chars(); + if let Some(c) = iter.next() { + write!(buf, "{}", c.to_uppercase())?; + for c in iter { + write!(buf, "{}", c.to_lowercase())?; + } + } + Ok(()) +} diff --git a/python/tests/test_cases.py b/python/tests/test_cases.py new file mode 100644 index 0000000..04cb93c --- /dev/null +++ b/python/tests/test_cases.py @@ -0,0 +1,170 @@ +import re +import os + +import cases +import pytest + +TESTS = [ + ("", "", ""), + ("Test", "test", "test"), + ("test case", "test_case", "testCase"), + (" test case", "test_case", "testCase"), + ("test case ", "test_case", "testCase"), + ("Test Case", "test_case", "testCase"), + (" Test Case", "test_case", "testCase"), + ("Test Case ", "test_case", "testCase"), + ("camelCase", "camel_case", "camelCase"), + ("PascalCase", "pascal_case", "pascalCase"), + ("snake_case", "snake_case", "snakeCase"), + (" Test Case", "test_case", "testCase"), + ("SCREAMING_SNAKE_CASE", "screaming_snake_case", "screamingSnakeCase"), + ("kebab-case", "kebab_case", "kebabCase"), + ("SCREAMING-KEBAB-CASE", "screaming_kebab_case", "screamingKebabCase"), + ("Title Case ", "title_case", "titleCase"), + ("Train-Case ", "train_case", "trainCase"), + ("This is a Test case.", "this_is_a_test_case", "thisIsATestCase"), + ( + "MixedUP CamelCase, with some Spaces", + "mixed_up_camel_case_with_some_spaces", + "mixedUpCamelCaseWithSomeSpaces", + ), + ( + "mixed_up_ snake_case with some _spaces", + "mixed_up_snake_case_with_some_spaces", + "mixedUpSnakeCaseWithSomeSpaces", + ), + ( + "this-contains_ ALLKinds OfWord_Boundaries", + "this_contains_all_kinds_of_word_boundaries", + "thisContainsAllKindsOfWordBoundaries", + ), + ("XΣXΣ baffle", "xσxσ_baffle", "xσxσBaffle"), + ("XMLHttpRequest", "xml_http_request", "xmlHttpRequest"), + ("FIELD_NAME11", "field_name11", "fieldName11"), + ("FIELD_NAME_11", "field_name_11", "fieldName11"), + ("FIELD_NAME_1", "field_name_1", "fieldName1"), + ("99BOTTLES", "99bottles", "99bottles"), + ("FieldNamE11", "field_nam_e11", "fieldNamE11"), + ("abc123def456", "abc123def456", "abc123def456"), + ("abc123DEF456", "abc123_def456", "abc123Def456"), + ("abc123Def456", "abc123_def456", "abc123Def456"), + ("abc123DEf456", "abc123_d_ef456", "abc123DEf456"), + ("ABC123def456", "abc123def456", "abc123def456"), + ("ABC123DEF456", "abc123def456", "abc123def456"), + ("ABC123Def456", "abc123_def456", "abc123Def456"), + ("ABC123DEf456", "abc123d_ef456", "abc123dEf456"), + ("ABC123dEEf456FOO", "abc123d_e_ef456_foo", "abc123dEEf456Foo"), + ("abcDEF", "abc_def", "abcDef"), + ("ABcDE", "a_bc_de", "aBcDe"), +] + + +def test_to_camel(): + for s, _, camel in TESTS: + assert cases.to_camel(s) == camel + + +def test_to_camel_with_acronyms(): + assert ( + cases.to_camel("xml_http_request", acronyms={"xml": "XML"}) == "xmlHttpRequest" + ) + assert ( + cases.to_camel("xml_http_request", acronyms={"http": "HTTP"}) + == "xmlHTTPRequest" + ) + + +def test_to_pascal(): + assert cases.to_pascal("test case") == "TestCase" + + +def test_to_pascal_with_acronyms(): + assert ( + cases.to_pascal("xml_http_request", acronyms={"xml": "XML"}) == "XMLHttpRequest" + ) + assert ( + cases.to_pascal("xml_http_request", acronyms={"xml": "XML", "http": "HTTP"}) + == "XMLHTTPRequest" + ) + assert ( + cases.to_pascal("xml_http_request", acronyms={"xml": "XML", "http": "Http"}) + == "XMLHttpRequest" + ) + + +def test_to_snake(): + for s, snake, _ in TESTS: + assert cases.to_snake(s) == snake + + +def test_to_screaming_snake(): + assert cases.to_screaming_snake("test case") == "TEST_CASE" + + +def test_to_kebab(): + assert cases.to_kebab("test case") == "test-case" + + +def test_to_screaming_kebab(): + assert cases.to_screaming_kebab("test case") == "TEST-CASE" + + +def test_to_train(): + assert cases.to_train("test case") == "Test-Case" + + +def test_to_train_with_acronyms(): + assert ( + cases.to_train("xml_http_request", acronyms={"xml": "XML"}) + == "XML-Http-Request" + ) + assert ( + cases.to_train("xml_http_request", acronyms={"xml": "XML", "http": "HTTP"}) + == "XML-HTTP-Request" + ) + assert ( + cases.to_train("xml_http_request", acronyms={"xml": "XML", "http": "Http"}) + == "XML-Http-Request" + ) + + +def test_to_lower(): + assert cases.to_lower("Test-case") == "test case" + + +def test_to_title(): + assert cases.to_title("Test-case") == "Test Case" + + +def test_to_title_with_acronyms(): + assert ( + cases.to_title("xml_http_request", acronyms={"xml": "XML"}) + == "XML Http Request" + ) + assert ( + cases.to_title("xml_http_request", acronyms={"xml": "XML", "http": "HTTP"}) + == "XML HTTP Request" + ) + assert ( + cases.to_title("xml_http_request", acronyms={"xml": "XML", "http": "Http"}) + == "XML Http Request" + ) + + +def test_to_upper(): + assert cases.to_upper("test case") == "TEST CASE" + + +def examples() -> list[tuple[str, str]]: + pyi_file = os.path.join(os.path.dirname(__file__), "..", "cases", "__init__.pyi") + with open(pyi_file) as f: + contents = f.read() + examples = re.findall(r"^\s*>>> (.*)\n\s*(.*)$", contents, re.MULTILINE) + assert len(examples) == 14 + return list(examples) + + +@pytest.mark.parametrize("case", examples()) +def test_doc_example(case: tuple[str, str]): + code, expected = case + exec(f"""result = {code}\nassert result == {expected}""")