| 
 | 1 | +import typing as t  | 
 | 2 | +import logging  | 
 | 3 | + | 
 | 4 | +logger = logging.getLogger(__name__)  | 
 | 5 | + | 
 | 6 | + | 
 | 7 | +def to_sqlmesh(dbt_select: t.Collection[str], dbt_exclude: t.Collection[str]) -> t.Optional[str]:  | 
 | 8 | +    """  | 
 | 9 | +    Given selectors defined in the format of the dbt cli --select and --exclude arguments, convert them into a selector expression that  | 
 | 10 | +    the SQLMesh selector engine can understand.  | 
 | 11 | +
  | 
 | 12 | +    The main things being mapped are:  | 
 | 13 | +        - set union (" " between items within the same selector string OR multiple --select arguments) is mapped to " | "  | 
 | 14 | +        - set intersection ("," between items within the same selector string) is mapped to " & "  | 
 | 15 | +        - `--exclude`. The SQLMesh selector engine does not treat this as a separate parameter and rather treats exclusion as a normal selector  | 
 | 16 | +          that just happens to contain negation syntax, so we generate these by negating each expression and then intersecting the result  | 
 | 17 | +          with any --select expressions  | 
 | 18 | +
  | 
 | 19 | +    Things that are *not* currently being mapped include:  | 
 | 20 | +        - selectors based on file paths  | 
 | 21 | +        - selectors based on partially qualified names like "model_a". The SQLMesh selector engine requires either:  | 
 | 22 | +            - wildcards, eg "*model_a*"  | 
 | 23 | +            - the full model name qualified with the schema, eg "staging.model_a"  | 
 | 24 | +
  | 
 | 25 | +    Examples:  | 
 | 26 | +        --select "model_a"  | 
 | 27 | +            -> "model_a"  | 
 | 28 | +        --select "main.model_a"  | 
 | 29 | +            -> "main.model_a"  | 
 | 30 | +        --select "main.model_a" --select "main.model_b"  | 
 | 31 | +            -> "main.model_a | main.model_b"  | 
 | 32 | +        --select "main.model_a main.model_b"  | 
 | 33 | +            -> "main.model_a | main.model_b"  | 
 | 34 | +        --select "(main.model_a+ & ^main.model_b)"  | 
 | 35 | +            -> "(main.model_a+ & ^main.model_b)"  | 
 | 36 | +        --select "+main.model_a" --exclude "raw.src_data"  | 
 | 37 | +            -> "+main.model_a & ^(raw.src_data)"  | 
 | 38 | +        --select "+main.model_a" --select "main.*b+" --exclude "raw.src_data"  | 
 | 39 | +            -> "(+main.model_a | main.*b+) & ^(raw.src_data)"  | 
 | 40 | +    """  | 
 | 41 | +    if not dbt_select and not dbt_exclude:  | 
 | 42 | +        return None  | 
 | 43 | + | 
 | 44 | +    select_expr = " | ".join(_to_sqlmesh(expr) for expr in dbt_select)  | 
 | 45 | +    select_expr = _wrap(select_expr) if dbt_exclude and len(dbt_select) > 1 else select_expr  | 
 | 46 | + | 
 | 47 | +    exclude_expr = " | ".join(_to_sqlmesh(expr, negate=True) for expr in dbt_exclude)  | 
 | 48 | +    exclude_expr = _wrap(exclude_expr) if dbt_select and len(dbt_exclude) > 1 else exclude_expr  | 
 | 49 | + | 
 | 50 | +    main_expr = " & ".join([expr for expr in [select_expr, exclude_expr] if expr])  | 
 | 51 | + | 
 | 52 | +    logger.debug(  | 
 | 53 | +        f"Expanded dbt select: {dbt_select}, exclude: {dbt_exclude} into SQLMesh: {main_expr}"  | 
 | 54 | +    )  | 
 | 55 | + | 
 | 56 | +    return main_expr  | 
 | 57 | + | 
 | 58 | + | 
 | 59 | +def _to_sqlmesh(selector_str: str, negate: bool = False) -> str:  | 
 | 60 | +    unions, intersections = _split_unions_and_intersections(selector_str)  | 
 | 61 | + | 
 | 62 | +    if negate:  | 
 | 63 | +        unions = [_negate(u) for u in unions]  | 
 | 64 | +        intersections = [_negate(i) for i in intersections]  | 
 | 65 | + | 
 | 66 | +    union_expr = " | ".join(unions)  | 
 | 67 | +    intersection_expr = " & ".join(intersections)  | 
 | 68 | + | 
 | 69 | +    if len(unions) > 1 and intersections:  | 
 | 70 | +        union_expr = f"({union_expr})"  | 
 | 71 | + | 
 | 72 | +    if len(intersections) > 1 and unions:  | 
 | 73 | +        intersection_expr = f"({intersection_expr})"  | 
 | 74 | + | 
 | 75 | +    return " | ".join([expr for expr in [union_expr, intersection_expr] if expr])  | 
 | 76 | + | 
 | 77 | + | 
 | 78 | +def _split_unions_and_intersections(selector_str: str) -> t.Tuple[t.List[str], t.List[str]]:  | 
 | 79 | +    # break space-separated items like: "my_first_model my_second_model" into a list of selectors to union  | 
 | 80 | +    # and comma-separated items like: "my_first_model,my_second_model" into a list of selectors to intersect  | 
 | 81 | +    # but, take into account brackets, eg "(my_first_model & my_second_model)" should not be split  | 
 | 82 | + | 
 | 83 | +    def _split_by(input: str, delimiter: str) -> t.Iterator[str]:  | 
 | 84 | +        buf = ""  | 
 | 85 | +        depth = 0  | 
 | 86 | + | 
 | 87 | +        for char in input:  | 
 | 88 | +            if char == delimiter and depth <= 0:  | 
 | 89 | +                # only split on a space if we are not within parenthesis  | 
 | 90 | +                yield buf  | 
 | 91 | +                buf = ""  | 
 | 92 | +                continue  | 
 | 93 | +            elif char == "(":  | 
 | 94 | +                depth += 1  | 
 | 95 | +            elif char == ")":  | 
 | 96 | +                depth -= 1  | 
 | 97 | + | 
 | 98 | +            buf += char  | 
 | 99 | + | 
 | 100 | +        if buf:  | 
 | 101 | +            yield buf  | 
 | 102 | + | 
 | 103 | +    # first, break up based on spaces  | 
 | 104 | +    segments = list(_split_by(selector_str, " "))  | 
 | 105 | + | 
 | 106 | +    # then, within each segment, identify the unions and intersections  | 
 | 107 | +    unions = []  | 
 | 108 | +    intersections = []  | 
 | 109 | + | 
 | 110 | +    for segment in segments:  | 
 | 111 | +        maybe_intersections = list(_split_by(segment, ","))  | 
 | 112 | +        if len(maybe_intersections) > 1:  | 
 | 113 | +            intersections.extend(maybe_intersections)  | 
 | 114 | +        else:  | 
 | 115 | +            unions.append(segment)  | 
 | 116 | + | 
 | 117 | +    return unions, intersections  | 
 | 118 | + | 
 | 119 | + | 
 | 120 | +def _negate(expr: str) -> str:  | 
 | 121 | +    return f"^{_wrap(expr)}"  | 
 | 122 | + | 
 | 123 | + | 
 | 124 | +def _wrap(expr: str) -> str:  | 
 | 125 | +    already_wrapped = expr.strip().startswith("(") and expr.strip().endswith(")")  | 
 | 126 | + | 
 | 127 | +    if expr and not already_wrapped:  | 
 | 128 | +        return f"({expr})"  | 
 | 129 | + | 
 | 130 | +    return expr  | 
0 commit comments