Skip to content

Commit

Permalink
Add mimesis support
Browse files Browse the repository at this point in the history
  • Loading branch information
dacort committed Jun 13, 2024
1 parent ba35f27 commit ec92c33
Show file tree
Hide file tree
Showing 6 changed files with 139 additions and 85 deletions.
32 changes: 19 additions & 13 deletions faker_cli/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,9 @@
from faker import Faker

from faker_cli.parser import infer_column_names, parse_column_types
from faker_cli.templates import (
CloudFrontLogs,
CloudFrontWriter,
S3AccessLogs,
S3AccessWriter,
)
from faker_cli.providers.faker import FakerProvider
from faker_cli.providers.mimesis import MimesisProvider
from faker_cli.templates import CloudFrontWriter, S3AccessWriter
from faker_cli.writer import CSVWriter, JSONWriter

KLAS_MAPPER = {
Expand All @@ -22,10 +19,6 @@
"cloudfront": [CloudFrontWriter, "cloudfront_log"],
}

fake = Faker()
fake.add_provider(S3AccessLogs)
fake.add_provider(CloudFrontLogs)


@click.command()
@click.option("--num-rows", "-n", default=1, help="Number of rows")
Expand All @@ -40,7 +33,8 @@
@click.option("--columns", "-c", help="Column names", default=None, required=False)
@click.option("--template", "-t", help="Template to use", type=click.Choice(["s3access", "cloudfront"]), default=None)
@click.argument("column_types", required=False)
def main(num_rows, format, output, columns, template, column_types):
@click.option("--provider", "-p", type=click.Choice(["faker", "mimesis"]), default="faker")
def main(num_rows, format, output, columns, template, column_types, provider):
"""
Generate fake data, easily.
Expand All @@ -49,13 +43,24 @@ def main(num_rows, format, output, columns, template, column_types):
You can also use --template for real-world synthetic data.
"""
if provider == "faker":
fake = FakerProvider()
elif provider == "mimesis":
fake = MimesisProvider()
else:
pass

# Do some initial validation - we must have either template or column tpes
if not template and not column_types:
ctx = click.get_current_context()
click.echo(ctx.get_help())
ctx.exit()
raise click.BadArgumentUsage("either --template or a list of Faker property names must be provided.")

# Templates are only supported with Faker at the moment
if template and provider != "faker":
raise click.BadArgumentUsage('templates are only supported with the "faker" provider.')

# Parquet output requires a filename
if format in ["parquet", "deltalake"] and output is None:
raise click.BadArgumentUsage("parquet | deltalake formats requires --output/-o filename parameter.")
Expand Down Expand Up @@ -105,13 +110,14 @@ def main(num_rows, format, output, columns, template, column_types):
raise click.ClickException(f"Format {format} not supported.")
writer = format_klas(sys.stdout, headers, output)
for i in range(num_rows):
writer.write(generate_row(fake, col_types))
writer.write(fake.generate_row(col_types))
writer.close()


def generate_row(fake: Faker, column_types: list[tuple[str, list]]) -> list[str]:
return [
fake.format(ctype, *args)
if not ctype.startswith("unique.")
else fake.unique.format(ctype.removeprefix("unique."), *args)
for ctype, args in column_types
]
]
2 changes: 2 additions & 0 deletions faker_cli/providers/base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
class BaseProvider:
pass
21 changes: 21 additions & 0 deletions faker_cli/providers/faker.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from faker import Faker

from faker_cli.templates import CloudFrontLogs, S3AccessLogs


class FakerProvider:
def __init__(self) -> None:
self.fake = Faker()
self.fake.add_provider(S3AccessLogs)
self.fake.add_provider(CloudFrontLogs)

def generate_row(self, column_types: list[tuple[str, list]]) -> list[str]:
return [
self.fake.format(ctype, *args)
if not ctype.startswith("unique.")
else self.fake.unique.format(ctype.removeprefix("unique."), *args)
for ctype, args in column_types
]

def format(self, log_entry) -> list[str]:
return self.fake.format(log_entry)
15 changes: 15 additions & 0 deletions faker_cli/providers/mimesis.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
from mimesis import Field


class MimesisProvider:
def __init__(self) -> None:
self.field = Field()

def generate_row(self, column_types: list[tuple[str, list]]) -> list[str]:
return [self.field(ctype) for ctype, args in column_types]

def format(self, log_entry) -> list[str]:
raise NotImplementedError


# field("person.username", mask="U_d", drange=(100, 1000))
Loading

0 comments on commit ec92c33

Please sign in to comment.