Skip to content

Remove zetasql #4

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 4 additions & 1 deletion .bazelrc
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
# Needed to work with ZetaSQL dependency.
# Zetasql is removed.
# This is a candidate for removal
build --cxxopt="-std=c++17"

# Needed to avoid zetasql proto error.
# Zetasql is removed.
# This is a candidate for removal
build --protocopt=--experimental_allow_proto3_optional

# icu@: In create_linking_context: in call to create_linking_context(),
# parameter 'user_link_flags' is deprecated and will be removed soon.
# It may be temporarily re-enabled by setting --incompatible_require_linker_input_cc_api=false
build --incompatible_require_linker_input_cc_api=false

2 changes: 1 addition & 1 deletion .bazelversion
Original file line number Diff line number Diff line change
@@ -1 +1 @@
6.5.0
6.5.0
21 changes: 21 additions & 0 deletions .github/workflows/ci-lint.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
name: pre-commit

on:
pull_request:
push:
branches: [master]

jobs:
pre-commit:
runs-on: ubuntu-latest
steps:
- uses: actions/[email protected]
with:
# Ensure the full history is fetched
# This is required to run pre-commit on a specific set of commits
# TODO: Remove this when all the pre-commit issues are fixed
fetch-depth: 0
- uses: actions/[email protected]
with:
python-version: 3.13
- uses: pre-commit/[email protected]
4 changes: 3 additions & 1 deletion .github/workflows/test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,9 @@ jobs:
- name: Install built wheel
shell: bash
run: |
pip install dist/*.whl['test']
PYTHON_VERSION_TAG="cp$(echo ${{ matrix.python-version }} | sed 's/\.//')"
WHEEL_FILE=$(ls dist/*${PYTHON_VERSION_TAG}*.whl)
pip install "${WHEEL_FILE}[test]"

- name: Run Test
run: |
Expand Down
2 changes: 1 addition & 1 deletion .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -126,4 +126,4 @@ dmypy.json
.pyre/

# pb2.py files
*_pb2.py
*_pb2.py
39 changes: 39 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
# pre-commit is a tool to perform a predefined set of tasks manually and/or
# automatically before git commits are made.
#
# Config reference: https://pre-commit.com/#pre-commit-configyaml---top-level
#
# Common tasks
#
# - Register git hooks: pre-commit install --install-hooks
# - Run on all files: pre-commit run --all-files
#
# These pre-commit hooks are run as CI.
#
# NOTE: if it can be avoided, add configs/args in pyproject.toml or below instead of creating a new `.config.file`.
# https://pre-commit.ci/#configuration
ci:
autoupdate_schedule: monthly
autofix_commit_msg: |
[pre-commit.ci] Apply automatic pre-commit fixes

repos:
# general
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v4.6.0
hooks:
- id: end-of-file-fixer
exclude: '\.svg$|\.patch$'
- id: trailing-whitespace
exclude: '\.svg$|\.patch$'
- id: check-json
- id: check-yaml
args: [--allow-multiple-documents, --unsafe]
- id: check-toml

- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.5.6
hooks:
- id: ruff
args: ["--fix"]
- id: ruff-format
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -238,4 +238,3 @@ tag.
* [TensorFlow Data Validation PyPI](https://pypi.org/project/tensorflow-data-validation/)
* [TensorFlow Data Validation Paper](https://mlsys.org/Conferences/2019/doc/2019/167.pdf)
* [TensorFlow Data Validation Slides](https://conf.slac.stanford.edu/xldb2018/sites/xldb2018.conf.slac.stanford.edu/files/Tues_09.45_NeoklisPolyzotis_Data%20Analysis%20and%20Validation%20(1).pdf)

62 changes: 10 additions & 52 deletions WORKSPACE
Original file line number Diff line number Diff line change
Expand Up @@ -71,18 +71,6 @@ http_archive(
],
)

# Needed by abseil-py by zetasql.
http_archive(
name = "six_archive",
build_file = "//third_party:six.BUILD",
sha256 = "105f8d68616f8248e24bf0e9372ef04d3cc10104f1980f54d57b2ce73a5ad56a",
strip_prefix = "six-1.10.0",
urls = [
"http://mirror.bazel.build/pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
"https://pypi.python.org/packages/source/s/six/six-1.10.0.tar.gz",
],
)

load("@com_google_protobuf//:protobuf_deps.bzl", "protobuf_deps")

protobuf_deps()
Expand Down Expand Up @@ -112,6 +100,16 @@ http_archive(
url = "https://github.com/abseil/abseil-cpp/archive/%s.tar.gz" % COM_GOOGLE_ABSL_COMMIT,
)


# re2 required for google tests
http_archive(
name = "com_googlesource_code_re2",
# build_file = "//third_party:re2.BUILD",
sha256 = "b90430b2a9240df4459108b3e291be80ae92c68a47bc06ef2dc419c5724de061",
strip_prefix = "re2-a276a8c738735a0fe45a6ee590fe2df69bcf4502",
urls = ["https://github.com/google/re2/archive/a276a8c738735a0fe45a6ee590fe2df69bcf4502.tar.gz"],
)

# Will be loaded by workspace.bzl from head
# TFMD_COMMIT = "404805761e614561cceedc429e67c357c62be26d" # 1.17.1

Expand Down Expand Up @@ -218,46 +216,6 @@ load("@bazel_gazelle//:deps.bzl", "gazelle_dependencies") #, "go_repository")

gazelle_dependencies()

################################################################################
# ZetaSQL #
################################################################################

ZETASQL_COMMIT = "a516c6b26d183efc4f56293256bba92e243b7a61" # 11/01/2024

http_archive(
name = "com_google_zetasql",
patch_args = ["-p1"],
patches = ["//third_party:zetasql.patch"],
sha256 = "1afc2210d4aad371eff0a6bfdd8417ba99e02183a35dff167af2fa6097643f26",
strip_prefix = "zetasql-%s" % ZETASQL_COMMIT,
urls = ["https://github.com/google/zetasql/archive/%s.tar.gz" % ZETASQL_COMMIT],
)

load("@com_google_zetasql//bazel:zetasql_deps_step_1.bzl", "zetasql_deps_step_1")

zetasql_deps_step_1()

load("@com_google_zetasql//bazel:zetasql_deps_step_2.bzl", "zetasql_deps_step_2")

zetasql_deps_step_2(
analyzer_deps = True,
evaluator_deps = True,
java_deps = False,
testing_deps = False,
tools_deps = False,
)

# No need to run zetasql_deps_step_3 and zetasql_deps_step_4 since all necessary dependencies are
# already installed.

# load("@com_google_zetasql//bazel:zetasql_deps_step_3.bzl", "zetasql_deps_step_3")

# zetasql_deps_step_3()

# load("@com_google_zetasql//bazel:zetasql_deps_step_4.bzl", "zetasql_deps_step_4")

# zetasql_deps_step_4()

_PLATFORMS_VERSION = "0.0.6"

http_archive(
Expand Down
47 changes: 0 additions & 47 deletions g3doc/custom_data_validation.md

This file was deleted.

127 changes: 127 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,130 @@ requires = [
# Required for using org_tensorflow bazel repository.
"numpy~=1.22.0",
]

[tool.ruff]
line-length = 88

[tool.ruff.lint]
select = [
# pycodestyle
"E",
"W",
# Pyflakes
"F",
# pyupgrade
"UP",
# flake8-bugbear
"B",
# flake8-simplify
"SIM",
# isort
"I",
# pep8 naming
"N",
# pydocstyle
"D",
# annotations
"ANN",
# debugger
"T10",
# flake8-pytest
"PT",
# flake8-return
"RET",
# flake8-unused-arguments
"ARG",
# flake8-fixme
"FIX",
# flake8-eradicate
"ERA",
# pandas-vet
"PD",
# numpy-specific rules
"NPY",
]

ignore = [
"D104", # Missing docstring in public package
"D100", # Missing docstring in public module
"D211", # No blank line before class
"PD901", # Avoid using 'df' for pandas dataframes. Perfectly fine in functions with limited scope
"ANN201", # Missing return type annotation for public function (makes no sense for NoneType return types...)
"ANN101", # Missing type annotation for `self`
"ANN204", # Missing return type annotation for special method
"ANN002", # Missing type annotation for `*args`
"ANN003", # Missing type annotation for `**kwargs`
"D105", # Missing docstring in magic method
"D203", # 1 blank line before after class docstring
"D204", # 1 blank line required after class docstring
"D413", # 1 blank line after parameters
"SIM108", # Simplify if/else to one line; not always clearer
"D206", # Docstrings should be indented with spaces; unnecessary when running ruff-format
"E501", # Line length too long; unnecessary when running ruff-format
"W191", # Indentation contains tabs; unnecessary when running ruff-format

# REMOVE AFTER FIXING
# ANN rules (flake8-annotations)
"ANN001", # Missing type annotation for function argument `args`
"ANN102", # Missing type annotation for `cls` in classmethod
"ANN202", # Missing Missing return type annotation for private function
"ANN205", # Missing return type annotation for staticmethod
"ANN206", # Missing return type annotation for classmethod
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed in `domain`
# ARG rules (flake8-unused-arguments)
"ARG001", # Unused function argument
"ARG002", # Unused method argument
# B rules (flake8-bugbear)
"B005", # Using `.strip()` with multi-character strings is misleading
"B007", # Loop control variable not used within loop body
"B008", # Do not perform function call in argument defaults; instead, perform the call within the function, or read the default from a module-level singleton variable
"B904", # Within an `except` clause, raise exceptions with `raise ... from err` or `raise ... from None` to distinguish them from errors in exception handling
# D rules (pydocstyle)
"D101", # Missing docstring in public class
"D102", # Missing docstring in public method
"D103", # Missing docstring in public function
"D107", # Missing docstring in `__init__`,
"D401", # First line of docstring should be in imperative mood: "Loads the vocabulary from the specified path."
"D404", # First word of the docstring should not be "This"
"D417", # Missing argument descriptions in the docstring
# E rules (pycodestyle)
"E731", # Do not assign a `lambda` expression, use a `def`
"E741", # Ambiguous variable name
# ERA rules (flake8-eradicate)
"ERA001", # Found commented-out code
# F rules (Pyflakes)
"F821", # Undefined name
# FIX rules (flake8-fixme)
"FIX002", # Line contains TODO, consider resolving the issue
# N rules (pep8-naming)
"N802", # Function name should be lowercase,
# NPY rules (numpy-specific rules)
"NPY002", # Replace legacy
# PD rules (pandas-vet)
"PD002", # `inplace=True` should be avoided; it has inconsistent behavior
"PD003", # `.isna` is preferred to `.isnull`; functionality is equivalent
"PD011", # Use `.to_numpy()` instead of `.values`
"PD015", # Use `.merge` method instead of `pd.merge` function
# PT rules (flake8-pytest-style)
"PT009", # Use a regular `assert` instead of unittest-style `assertEqual`
"PT018", # Assertion should be broken down into multiple parts
"PT027", # Use `pytest.raises` instead of unittest-style `assertRaisesRegex`
# RET rules (flake8-return)
"RET504", # Unnecessary assignment to variable before `return` statement
"RET505", # Unnecessary `elif` after `return` statement
# SIM rules (flake8-simplify)
"SIM101", # Multiple `isinstance` calls for `maybe_collection`, merge into a single call
"SIM102", # Use a single `if` statement instead of nested `if` statements
"SIM103", # Return the condition directly
"SIM105", # Use `contextlib.suppress(...)` instead of `try`-`except`-`pass`
"SIM117", # Use a single `with` statement with multiple contexts instead of nested `with` statements
"SIM211", # Use `not ...` instead of `False if ... else True`
# UP rules (pyupgrade)
"UP008", # Use `super()` instead of `super(__class__, self)`
"UP028", # Replace `yield` over `for` loop with `yield from`
"UP031", # Use format specifiers instead of percent format
]


[tool.ruff.lint.per-file-ignores]
"__init__.py" = ["F401"]
Loading