Skip to content

Commit 2d417a9

Browse files
committed
Add build emebddings workflow
1 parent cd169a4 commit 2d417a9

File tree

3 files changed

+90
-2
lines changed

3 files changed

+90
-2
lines changed

.github/workflows/accelerate_doc.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
name: Accelerate doc build
22

3-
on: [pull_request]
3+
# on: [pull_request]
44

55
jobs:
66
integration_doc_build:
+84
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,84 @@
1+
name: Daily Build Embeddings
2+
3+
env:
4+
DIFFUSERS_SLOW_IMPORT: yes
5+
6+
on:
7+
push:
8+
schedule:
9+
- cron: "5 7 * * *" # every day at 07:05
10+
# to run this workflow manually from the Actions tab
11+
workflow_dispatch:
12+
13+
# Steps
14+
# for i in list:
15+
# git clone
16+
# install
17+
# build embeddings
18+
19+
jobs:
20+
matrix-job:
21+
runs-on: ubuntu-latest
22+
container: huggingface/transformers-doc-builder
23+
strategy:
24+
max-parallel: 1 # run the matrix jobs sequentially
25+
matrix:
26+
include:
27+
- repo_id: huggingface/diffusers
28+
doc_folder: docs/source/en
29+
concurrency:
30+
group: ${{ github.workflow }}-${{ github.ref }}
31+
cancel-in-progress: true
32+
timeout-minutes: 360 # Set timeout to 6 hours
33+
steps:
34+
- name: Setup REPO_NAME
35+
shell: bash
36+
run: |
37+
current_path=$(pwd)
38+
repo_id="${{ matrix.repo_id }}"
39+
repo_name="${repo_id#*/}"
40+
echo "REPO_NAME=${repo_name}" >> $GITHUB_ENV
41+
42+
- name: Checkout repository
43+
uses: actions/checkout@v2
44+
with:
45+
repository: ${{ matrix.repo_id }}
46+
path: ${{ github.workspace }}/${{ env.REPO_NAME }}
47+
48+
- name: Install libgl1
49+
run: apt-get install -y libgl1
50+
51+
- name: Setup environment
52+
shell: bash
53+
run: |
54+
current_path=$(pwd)
55+
56+
python -V
57+
58+
cd ${{ env.REPO_NAME }}
59+
pip install .[dev]
60+
61+
cd $current_path
62+
63+
rm -rf doc-builder
64+
rm -rf .git
65+
git clone https://github.com/huggingface/doc-builder.git
66+
cd doc-builder
67+
git fetch
68+
git checkout build_embeddings
69+
pip install .
70+
71+
- name: Build embeddings
72+
shell: bash
73+
run: |
74+
doc-builder embeddings ${{ env.REPO_NAME }} ${{ env.REPO_NAME }}/${{ matrix.doc_folder }}
75+
76+
77+
cleanup-job:
78+
needs: matrix-job
79+
runs-on: ubuntu-latest
80+
steps:
81+
- name: Run cleanup steps
82+
run: |
83+
echo "Performing cleanup tasks"
84+
# Add your cleanup commands here

src/doc_builder/build_embeddings.py

+5-1
Original file line numberDiff line numberDiff line change
@@ -69,11 +69,13 @@ def add_child(self, child, header_level):
6969
)
7070
parent = self
7171
nested_level = header_level - 2
72+
current_level = 1
7273
while nested_level:
7374
if not parent.children:
74-
parent.children.append(MarkdownChunkNode(None, None))
75+
parent.children.append(MarkdownChunkNode("#" * current_level))
7576
parent = parent.children[-1]
7677
nested_level -= 1
78+
current_level += 1
7779
parent.children.append(child)
7880

7981
def get_chunks(self, page_info, chunk_len_chars, prefix=[]):
@@ -449,6 +451,8 @@ def build_embeddings(
449451
is_python_module=is_python_module,
450452
)
451453

454+
return
455+
452456
# Step 2: create embeddings
453457
embeddings = call_embedding_inference(chunks)
454458

0 commit comments

Comments
 (0)