Skip to content

Commit

Permalink
Moving webhook to its own service (#2849)
Browse files Browse the repository at this point in the history
* Moving webhook to its own service

* Fix webhook tests

* Fix duplicated port

* Apply suggestions from code review

Co-authored-by: Sylvain Lesage <[email protected]>

* Apply code review suggestions

* Address code review observations

---------

Co-authored-by: Sylvain Lesage <[email protected]>
  • Loading branch information
AndreaFrancis and severo authored May 22, 2024
1 parent 6efa88b commit 9ba61fc
Show file tree
Hide file tree
Showing 46 changed files with 4,204 additions and 52 deletions.
4 changes: 4 additions & 0 deletions .github/workflows/_e2e_tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ jobs:
SEARCH_UVICORN_PORT: "8083"
SSE_API_UVICORN_NUM_WORKERS: "2"
SSE_API_UVICORN_PORT: "8085"
WEBHOOK_UVICORN_NUM_WORKERS: "2"
WEBHOOK_UVICORN_PORT: "8087"
COMMON_HF_ENDPOINT: "https://hub-ci.huggingface.co"
COMMON_HF_TOKEN: "hf_app_datasets-server_token"
# ^ hard coded, see e2e/tests/fixtures/hub.py
Expand Down Expand Up @@ -99,6 +101,8 @@ jobs:
SEARCH_UVICORN_PORT: "8083"
SSE_API_UVICORN_NUM_WORKERS: "2"
SSE_API_UVICORN_PORT: "8085"
WEBHOOK_UVICORN_NUM_WORKERS: "2"
WEBHOOK_UVICORN_PORT: "8087"
COMMON_HF_ENDPOINT: "https://hub-ci.huggingface.co"
COMMON_HF_TOKEN: "hf_app_datasets-server_token"
# ^ hard coded, see e2e/tests/fixtures/hub.py
Expand Down
4 changes: 4 additions & 0 deletions .github/workflows/cd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@ jobs:
project: sse-api
- directory: services
project: worker
- directory: services
project: webhook
runs-on: "ubuntu-latest"
steps:
- name: Checkout repository
Expand Down Expand Up @@ -122,6 +124,8 @@ jobs:
tag: sha-${{ steps.vars.outputs.sha_short }}
worker:
tag: sha-${{ steps.vars.outputs.sha_short }}
webhook:
tag: sha-${{ steps.vars.outputs.sha_short }}
END
)
echo "VALUES=$(echo "$VALUES" | yq -o=json | jq tostring)" >> $GITHUB_ENV
Expand Down
35 changes: 35 additions & 0 deletions .github/workflows/s-webhook.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2024 The HuggingFace Authors.

name: services/webhook
on:
workflow_dispatch:
push:
branches:
- main
paths:
- "libs/libapi/**"
- "libs/libcommon/**"
- "services/webhook/**"
- ".github/workflows/s-webhook.yml"
- ".github/workflows/_quality-python.yml"
- ".github/workflows/_unit-tests-python.yml"
- "tools/docker-compose-mongo.yml"
pull_request:
paths:
- "libs/libapi/**"
- "libs/libcommon/**"
- "services/webhook/**"
- ".github/workflows/s-webhook.yml"
- ".github/workflows/_quality-python.yml"
- ".github/workflows/_unit-tests-python.yml"
- "tools/docker-compose-mongo.yml"
jobs:
quality:
uses: ./.github/workflows/_quality-python.yml
with:
working-directory: services/webhook
unit-tests:
uses: ./.github/workflows/_unit-tests-python.yml
with:
working-directory: services/webhook
4 changes: 4 additions & 0 deletions .vscode/monorepo.code-workspace
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,10 @@
{
"name": "services/worker",
"path": "../services/worker"
},
{
"name": "services/webhook",
"path": "../services/webhook"
}
],
"settings": {
Expand Down
2 changes: 1 addition & 1 deletion DEVELOPER_GUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ For now, there are two libraries
- [libapi](./libs/libapi/), which contains common code for authentication, http requests, exceptions and other utilities for the services.
- [services](./services) contains the applications:
- [api](./services/api/), the public API, is a web server that exposes the [API endpoints](https://huggingface.co/docs/datasets-server). All the responses are served from pre-computed responses in Mongo server. That's the main point of this project: generating these responses takes time, and the API server provides this service to the users.
The API service exposes the `/webhook` endpoint which is called by the Hub on every creation, update or deletion of a dataset on the Hub. On deletion, the cached responses are deleted. On creation or update, a new job is appended in the "queue" database.
- [webhook](./services/webhook/), exposes the `/webhook` endpoint which is called by the Hub on every creation, update or deletion of a dataset on the Hub. On deletion, the cached responses are deleted. On creation or update, a new job is appended in the "queue" database.
- [rows](./services/rows/)
- [search](./services/search/)
- [admin](./services/admin/), the admin API (which is separated from the public API and might be published under its own domain at some point)
Expand Down
10 changes: 6 additions & 4 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ export PORT_ROWS := 8182
export PORT_SEARCH := 8183
export PORT_SSE_API := 8185
export PORT_WORKER := 8186
export PORT_WEBHOOK := 8187
export PORT_REVERSE_PROXY := 8100

# environment variables per target
Expand All @@ -24,19 +25,19 @@ include tools/Docker.mk

.PHONY: start
start:
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) up
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} WEBHOOK_UVICORN_PORT=${PORT_WEBHOOK} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) up

.PHONY: stop
stop:
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) down
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} WEBHOOK_UVICORN_PORT=${PORT_WEBHOOK} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) down

.PHONY: dev-start
dev-start:
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) up
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} WEBHOOK_UVICORN_PORT=${PORT_WEBHOOK} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) up

.PHONY: dev-stop
dev-stop:
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) down
MONGO_PORT=${MONGO_PORT} ADMIN_UVICORN_PORT=${PORT_ADMIN} API_UVICORN_PORT=${PORT_API} ROWS_UVICORN_PORT=${PORT_ROWS} SEARCH_UVICORN_PORT=${PORT_SEARCH} SSE_API_UVICORN_PORT=${PORT_SSE_API} WORKER_UVICORN_PORT=${PORT_WORKER} WEBHOOK_UVICORN_PORT=${PORT_WEBHOOK} PORT_REVERSE_PROXY=${PORT_REVERSE_PROXY} DOCKER_COMPOSE=${DOCKER_COMPOSE} $(MAKE) down

.PHONY: e2e
e2e:
Expand All @@ -56,4 +57,5 @@ install:
$(MAKE) -C services/search install
$(MAKE) -C services/sse-api install
$(MAKE) -C services/worker install
$(MAKE) -C services/webhook install
$(MAKE) -C e2e install
33 changes: 32 additions & 1 deletion chart/env/prod.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,11 @@ images:
useGlobalRegistry: false
repository: datasets-server-services-worker
tag: sha-fb3399a

webhook:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-webhook
tag: sha-fb3399a
secrets:
externalSecret:
enabled: true
Expand Down Expand Up @@ -537,3 +541,30 @@ workers:
limits:
cpu: 2
memory: "1Gi"

webhook:
# Number of uvicorn workers for running the application
# (2 x $num_cores) + 1
# https://docs.gunicorn.org/en/stable/design.html#how-many-workers
uvicornNumWorkers: "9"
nodeSelector:
role-datasets-server-webhook: "true"
tolerations:
- key: "huggingface.co/datasets-server-webhook"
operator: "Exists"
effect: "NoSchedule"
replicas: 4
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "5"
alb.ingress.kubernetes.io/target-node-labels: role-datasets-server-webhook=true
resources:
requests:
cpu: 4
memory: "4Gi"
limits:
cpu: 4
memory: "4Gi"
22 changes: 22 additions & 0 deletions chart/env/staging.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,11 @@ images:
useGlobalRegistry: false
repository: datasets-server-services-worker
tag: sha-fb3399a
webhook:
registry: huggingface
useGlobalRegistry: false
repository: datasets-server-services-webhook
tag: sha-fb3399a

secrets:
externalSecret:
Expand Down Expand Up @@ -320,3 +325,20 @@ workers:
limits:
cpu: 1
memory: "4Gi"

webhook:
uvicornNumWorkers: "1"
replicas: 1
service:
type: NodePort
ingress:
enabled: true
annotations:
alb.ingress.kubernetes.io/group.order: "4"
resources:
requests:
cpu: 100m
memory: "512Mi"
limits:
cpu: 1
memory: "4Gi"
9 changes: 9 additions & 0 deletions chart/templates/_common/_helpers.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,10 @@ Docker image management
{{ include "hf.common.images.image" (dict "imageRoot" .Values.images.services.worker "global" .Values.global.huggingface) }}
{{- end -}}

{{- define "services.webhook.image" -}}
{{ include "hf.common.images.image" (dict "imageRoot" .Values.images.services.webhook "global" .Values.global.huggingface) }}
{{- end -}}

{{- define "image.imagePullSecrets" -}}
{{- include "hf.common.images.renderPullSecrets" (dict "images" (list .Values.images) "context" $) -}}
{{- end -}}
Expand Down Expand Up @@ -126,6 +130,11 @@ app.kubernetes.io/component: "{{ include "name" . }}-sse-api"
app.kubernetes.io/component: "{{ include "name" . }}-worker-{{ .workerValues.deployName }}"
{{- end -}}

{{- define "labels.webhook" -}}
{{ include "hf.labels.commons" . }}
app.kubernetes.io/component: "{{ include "name" . }}-webhook"
{{- end -}}

{{/*
The dataset viewer API base url
*/}}
Expand Down
55 changes: 55 additions & 0 deletions chart/templates/services/webhook/_container.tpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,55 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.

{{- define "containerWebhook" -}}
- name: "{{ include "name" . }}-webhook"
image: {{ include "services.webhook.image" . }}
imagePullPolicy: {{ .Values.images.pullPolicy }}
env:
{{ include "envCache" . | nindent 2 }}
{{ include "envS3" . | nindent 2 }}
{{ include "envCloudfront" . | nindent 2 }}
{{ include "envQueue" . | nindent 2 }}
{{ include "envCommon" . | nindent 2 }}
{{ include "envHf" . | nindent 2 }}
{{ include "envLog" . | nindent 2 }}
{{ include "envNumba" . | nindent 2 }}
# storage
{{ include "envAssets" . | nindent 2 }}
{{ include "envCachedAssets" . | nindent 2 }}
# service
- name: API_MAX_AGE_LONG
value: {{ .Values.webhook.maxAgeLong | quote }}
- name: API_MAX_AGE_SHORT
value: {{ .Values.webhook.maxAgeShort | quote }}
# prometheus
- name: PROMETHEUS_MULTIPROC_DIR
value: {{ .Values.webhook.prometheusMultiprocDirectory | quote }}
# uvicorn
- name: API_UVICORN_HOSTNAME
value: {{ .Values.webhook.uvicornHostname | quote }}
- name: API_UVICORN_NUM_WORKERS
value: {{ .Values.webhook.uvicornNumWorkers | quote }}
- name: API_UVICORN_PORT
value: {{ .Values.webhook.uvicornPort | quote }}
securityContext:
allowPrivilegeEscalation: false
readinessProbe:
failureThreshold: 30
periodSeconds: 5
httpGet:
path: /healthcheck
port: {{ .Values.webhook.uvicornPort }}
livenessProbe:
failureThreshold: 30
periodSeconds: 5
httpGet:
path: /healthcheck
port: {{ .Values.webhook.uvicornPort }}
ports:
- containerPort: {{ .Values.webhook.uvicornPort }}
name: http
protocol: TCP
resources:
{{ toYaml .Values.webhook.resources | nindent 4 }}
{{- end -}}
30 changes: 30 additions & 0 deletions chart/templates/services/webhook/deployment.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.

apiVersion: apps/v1
kind: Deployment
metadata:
labels: {{ include "labels.webhook" . | nindent 4 }}
name: "{{ include "name" . }}-webhook"
namespace: {{ .Release.Namespace }}
spec:
progressDeadlineSeconds: 600
replicas: {{ .Values.webhook.replicas }}
revisionHistoryLimit: 10
selector:
matchLabels: {{ include "labels.webhook" . | nindent 6 }}
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
labels: {{ include "labels.webhook" . | nindent 8 }}
spec:
{{- include "dnsConfig" . | nindent 6 }}
{{- include "image.imagePullSecrets" . | nindent 6 }}
containers: {{ include "containerWebhook" . | nindent 8 }}
nodeSelector: {{ toYaml .Values.webhook.nodeSelector | nindent 8 }}
tolerations: {{ toYaml .Values.webhook.tolerations | nindent 8 }}
securityContext: {{ include "securityContext" . | nindent 8 }}
23 changes: 23 additions & 0 deletions chart/templates/services/webhook/ingress.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
{{- if and .Values.global.huggingface.ingress.enabled .Values.ingress.enabled .Values.webhook.ingress.enabled -}}
apiVersion: networking.k8s.io/v1
kind: Ingress
metadata:
{{- $annotations := fromYaml (include "datasetsServer.instance.ingress.annotations" (dict "instance" .Values.webhook "context" $ )) }}
annotations: {{ toYaml $annotations | nindent 4}}
labels: {{ include "labels.webhook" . | nindent 4 }}
name: "{{ include "name" . }}-webhook"
namespace: {{ .Release.Namespace }}
spec:
rules:
- host: {{ include "datasetsServer.ingress.hostname" . }}
http:
paths:
- backend:
service:
name: "{{ include "name" . }}-webhook"
port:
name: http
path: /webhook
pathType: Prefix
{{- include "ingress.tls" (merge (dict "annotations" $annotations) $ ) | indent 2}}
{{- end }}
10 changes: 10 additions & 0 deletions chart/templates/services/webhook/pdb.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,10 @@
apiVersion: policy/v1
kind: PodDisruptionBudget
metadata:
labels: {{ include "labels.webhook" . | nindent 4 }}
name: "{{ include "name" . }}-webhook"
namespace: {{ .Release.Namespace }}
spec:
maxUnavailable: 1
selector:
matchLabels: {{ include "labels.webhook" . | nindent 6 }}
22 changes: 22 additions & 0 deletions chart/templates/services/webhook/service.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.

{{ $serviceType := .Values.webhook.service.type | default .Values.global.huggingface.service.type }}
apiVersion: v1
kind: Service
metadata:
name: "{{ include "name" . }}-webhook"
annotations: {{ toYaml .Values.webhook.service.annotations | nindent 4 }}
namespace: {{ .Release.Namespace }}
labels: {{ include "labels.webhook" . | nindent 4 }}
spec:
ports:
- name: http
port: 80
protocol: TCP
{{- if eq "NodePort" $serviceType }}
nodePort: {{ .Values.global.huggingface.service.ports.datasetsServer.webhook }}
{{- end }}
targetPort: {{ .Values.webhook.uvicornPort }}
selector: {{ include "labels.webhook" . | nindent 4 }}
type: {{ $serviceType }}
20 changes: 20 additions & 0 deletions chart/templates/services/webhook/servicemonitor.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# SPDX-License-Identifier: Apache-2.0
# Copyright 2022 The HuggingFace Authors.

{{- if .Values.monitoring.enabled }}
apiVersion: monitoring.coreos.com/v1
kind: ServiceMonitor
metadata:
labels: {{ include "labels.webhook" . | nindent 4 }}
name: "{{ include "name" . }}-webhook"
namespace: {{ .Release.Namespace }}
spec:
endpoints:
- path: /metrics
port: http
namespaceSelector:
matchNames:
- {{ .Release.Namespace }}
selector:
matchLabels: {{ include "labels.webhook" . | nindent 6 }}
{{- end }}
Loading

0 comments on commit 9ba61fc

Please sign in to comment.