Skip to content

Commit 3fe9d95

Browse files
authored
Merge pull request #262 from tulibraries/devo-62-update-prod-web-content-indeing-dag
DEVO-62: Update prod web-content indexing dag.
2 parents 398e985 + 2b5cec5 commit 3fe9d95

File tree

3 files changed

+48
-7
lines changed

3 files changed

+48
-7
lines changed

cob_datapipeline/prod_sc_web_content_reindex_dag.py

Lines changed: 39 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,15 @@
11
# Airflow DAG to index Web Content into SolrCloud.
22
from datetime import datetime, timedelta
3+
from tulflow import tasks
34
import airflow
45
from airflow.models import Variable
56
from airflow.hooks.base_hook import BaseHook
67
from airflow.operators.bash_operator import BashOperator
78
from airflow.operators.python_operator import PythonOperator
89
from cob_datapipeline.task_slack_posts import web_content_slackpostonsuccess
910
from cob_datapipeline.task_sc_get_num_docs import task_solrgetnumdocs
10-
from tulflow import tasks
11+
from cob_datapipeline.operators import\
12+
PushVariable, DeleteAliasListVariable, DeleteCollectionListVariable
1113

1214
"""
1315
INIT SYSTEMWIDE VARIABLES
@@ -22,11 +24,11 @@
2224

2325
# Get Solr URL & Collection Name for indexing info; error out if not entered
2426
SOLR_CONN = BaseHook.get_connection("SOLRCLOUD")
25-
WEB_CONTENT_SOLR_CONFIG = Variable.get("WEB_CONTENT_SOLR_CONFIG", deserialize_json=True)
27+
SOLR_CONFIG = Variable.get("WEB_CONTENT_SOLR_CONFIG", deserialize_json=True)
2628
# {"configset": "tul_cob-web-2", "replication_factor": 2}
27-
CONFIGSET = WEB_CONTENT_SOLR_CONFIG.get("configset")
29+
CONFIGSET = SOLR_CONFIG.get("configset")
2830
ALIAS = CONFIGSET + "-prod"
29-
REPLICATION_FACTOR = WEB_CONTENT_SOLR_CONFIG.get("replication_factor")
31+
REPLICATION_FACTOR = SOLR_CONFIG.get("replication_factor")
3032
WEB_CONTENT_BRANCH = Variable.get("WEB_CONTENT_PROD_BRANCH")
3133

3234
# Manifold website creds
@@ -109,6 +111,34 @@
109111
ALIAS
110112
)
111113

114+
PUSH_ALIAS = PushVariable(
115+
task_id="push_alias",
116+
name="WEB_CONTENT_QA_ALIASES",
117+
value=ALIAS,
118+
dag=DAG)
119+
120+
DELETE_ALIAS = DeleteAliasListVariable(
121+
task_id="delete_aliases",
122+
solr_conn_id='SOLRCLOUD',
123+
list_variable="WEB_CONTENT_QA_ALIASES",
124+
skip_from_last=2,
125+
skip_included=[ALIAS],
126+
dag=DAG)
127+
128+
PUSH_COLLECTION = PushVariable(
129+
task_id="push_collection",
130+
name="WEB_CONTENT_QA_COLLECTIONS",
131+
value=CONFIGSET +"-{{ ti.xcom_pull(task_ids='set_collection_name') }}",
132+
dag=DAG)
133+
134+
DELETE_COLLECTIONS = DeleteCollectionListVariable(
135+
task_id="delete_collections",
136+
solr_conn_id='SOLRCLOUD',
137+
list_variable="WEB_CONTENT_QA_COLLECTIONS",
138+
skip_from_last=2,
139+
skip_included=[CONFIGSET +"-{{ ti.xcom_pull(task_ids='set_collection_name') }}"],
140+
dag=DAG)
141+
112142
POST_SLACK = PythonOperator(
113143
task_id='slack_post_succ',
114144
python_callable=web_content_slackpostonsuccess,
@@ -122,4 +152,8 @@
122152
INDEX_WEB_CONTENT.set_upstream(CREATE_COLLECTION)
123153
GET_NUM_SOLR_DOCS_POST.set_upstream(INDEX_WEB_CONTENT)
124154
SOLR_ALIAS_SWAP.set_upstream(GET_NUM_SOLR_DOCS_POST)
125-
POST_SLACK.set_upstream(SOLR_ALIAS_SWAP)
155+
PUSH_ALIAS.set_upstream(SOLR_ALIAS_SWAP)
156+
DELETE_ALIAS.set_upstream(PUSH_ALIAS)
157+
PUSH_COLLECTION.set_upstream(DELETE_ALIAS)
158+
DELETE_COLLECTIONS.set_upstream(PUSH_COLLECTION)
159+
POST_SLACK.set_upstream(DELETE_COLLECTIONS)

cob_datapipeline/qa_sc_web_content_reindex_dag.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,6 @@
1111
from cob_datapipeline.operators import\
1212
PushVariable, DeleteAliasListVariable, DeleteCollectionListVariable
1313

14-
1514
"""
1615
INIT SYSTEMWIDE VARIABLES
1716

tests/prod_sc_web_content_reindex_dag_test.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,10 @@ def test_dag_tasks_present(self):
2828
"index_web_content",
2929
"get_num_solr_docs_post",
3030
"solr_alias_swap",
31+
"push_alias",
32+
"delete_aliases",
33+
"push_collection",
34+
"delete_collections",
3135
"slack_post_succ"
3236
])
3337

@@ -39,7 +43,11 @@ def test_dag_task_order(self):
3943
"index_web_content": "create_collection",
4044
"get_num_solr_docs_post": "index_web_content",
4145
"solr_alias_swap": "get_num_solr_docs_post",
42-
"slack_post_succ": "solr_alias_swap",
46+
"push_alias": "solr_alias_swap",
47+
"delete_aliases": "push_alias",
48+
"push_collection": "delete_aliases",
49+
"delete_collections": "push_collection",
50+
"slack_post_succ": "delete_collections",
4351
}
4452

4553
for task, upstream_task in expected_task_deps.items():

0 commit comments

Comments
 (0)