diff --git a/cli/api.py b/cli/api.py index daf1eacedab02..da368d91a56d6 100644 --- a/cli/api.py +++ b/cli/api.py @@ -48,24 +48,35 @@ def client(args: 'Namespace'): def export_api(args: 'Namespace'): + import json from .export import api_to_dict + from jina.jaml import JAML from jina import __version__ from jina.logging import default_logger + from jina.schemas import get_full_schema if args.yaml_path: + dump_api = api_to_dict() for yp in args.yaml_path: f_name = (yp % __version__) if '%s' in yp else yp - from jina.jaml import JAML with open(f_name, 'w', encoding='utf8') as fp: - JAML.dump(api_to_dict(), fp) + JAML.dump(dump_api, fp) default_logger.info(f'API is exported to {f_name}') if args.json_path: + dump_api = api_to_dict() for jp in args.json_path: f_name = (jp % __version__) if '%s' in jp else jp - import json with open(f_name, 'w', encoding='utf8') as fp: - json.dump(api_to_dict(), fp, sort_keys=True) + json.dump(dump_api, fp, sort_keys=True) + default_logger.info(f'API is exported to {f_name}') + + if args.schema_path: + dump_api = get_full_schema() + for jp in args.schema_path: + f_name = (jp % __version__) if '%s' in jp else jp + with open(f_name, 'w', encoding='utf8') as fp: + json.dump(dump_api, fp, sort_keys=True) default_logger.info(f'API is exported to {f_name}') diff --git a/cli/autocomplete.py b/cli/autocomplete.py index 484140214efae..0f888d9ae8ce3 100644 --- a/cli/autocomplete.py +++ b/cli/autocomplete.py @@ -86,7 +86,7 @@ def _gaa(key, parser): 'client': ['--help', '--request-size', '--mode', '--top-k', '--mime-type', '--continue-on-error', '--return-results', '--max-message-size', '--proxy', '--prefetch', '--prefetch-on-recv', '--restful', '--rest-api', '--compress', '--compress-min-bytes', '--compress-min-ratio', '--host', - '--port-expose'], 'export-api': ['--help', '--yaml-path', '--json-path'], + '--port-expose'], 'export-api': ['--help', '--yaml-path', '--json-path', '--schema-path'], 'hello-world': ['--help', '--workdir', '--download-proxy', '--shards', '--parallel', '--uses-index', '--index-data-url', '--index-labels-url', '--index-request-size', '--uses-query', '--query-data-url', '--query-labels-url', '--query-request-size', '--num-query', '--top-k']}} diff --git a/extra-requirements.txt b/extra-requirements.txt index 09406faf89a1d..f1eeb6cdcc04d 100644 --- a/extra-requirements.txt +++ b/extra-requirements.txt @@ -73,4 +73,4 @@ pytest-custom_exit_code: cicd, test bs4: test aiostream: devel, cicd click: cicd - +jsonschema: cicd diff --git a/jina/importer.py b/jina/importer.py index 746f8602b54c4..24e780f9cc520 100644 --- a/jina/importer.py +++ b/jina/importer.py @@ -10,6 +10,8 @@ IMPORTED.executors = False IMPORTED.drivers = False IMPORTED.hub = False +IMPORTED.schema_executors = {} +IMPORTED.schema_drivers = {} def import_classes(namespace: str, @@ -292,6 +294,8 @@ def _update_depend_tree(cls_obj, module_name, cur_tree): def _import_module(module_name, import_type, depend_tree, load_stat): from importlib import import_module from .helper import colored + from .schemas.helper import _jina_class_to_schema + bad_imports = [] _mod_obj = import_module(module_name) for _attr in dir(_mod_obj): @@ -303,6 +307,9 @@ def _import_module(module_name, import_type, depend_tree, load_stat): _update_depend_tree(_cls_obj, module_name, depend_tree) if _cls_obj.__class__.__name__ == 'ExecutorType': _load_default_exc_config(_cls_obj) + IMPORTED.schema_executors[f'Jina::Executors::{_cls_obj.__name__}'] = _jina_class_to_schema(_cls_obj) + else: + IMPORTED.schema_drivers[f'Jina::Drivers::{_cls_obj.__name__}'] = _jina_class_to_schema(_cls_obj) # TODO: _success_msg is never used _success_msg = colored('▸', 'green').join(f'{vvv.__name__}' for vvv in _cls_obj.mro()[:-1][::-1]) load_stat[module_name].append((_attr, True, _success_msg)) diff --git a/jina/parsers/export_api.py b/jina/parsers/export_api.py index 52a795c9ed36e..d5adb6b69c9ae 100644 --- a/jina/parsers/export_api.py +++ b/jina/parsers/export_api.py @@ -15,4 +15,6 @@ def set_export_api_parser(parser=None): help='The YAML file path for storing the exported API') parser.add_argument('--json-path', type=str, nargs='*', metavar='PATH', help='The JSON file path for storing the exported API') + parser.add_argument('--schema-path', type=str, nargs='*', metavar='PATH', + help='The JSONSchema file path for storing the exported API') return parser diff --git a/jina/schemas/__init__.py b/jina/schemas/__init__.py new file mode 100644 index 0000000000000..e8807770ae1a6 --- /dev/null +++ b/jina/schemas/__init__.py @@ -0,0 +1,41 @@ +def get_full_schema() -> dict: + """ + Return the full schema for Jina core as a dict. + """ + from .. import __version__ + from ..importer import IMPORTED + from .driver import schema_all_drivers + from .executor import schema_all_executors + from .flow import schema_flow + from .meta import schema_metas + from .request import schema_requests + from .pod import schema_pod + + definitions = {} + for s in [ + schema_all_drivers, + schema_all_executors, + schema_flow, + schema_metas, + schema_requests, + schema_pod, + IMPORTED.schema_executors, + IMPORTED.schema_drivers + ]: + definitions.update(s) + + # fix CompoundExecutor + definitions['Jina::Executors::CompoundExecutor']['properties']['components'] = { + '$ref': '#/definitions/Jina::Executors::All' + } + + return { + '$id': f'https://api.jina.ai/schemas/{__version__}.json', + '$schema': 'http://json-schema.org/draft-07/schema#', + 'description': 'The YAML schema of Jina objects (Flow, Executor, Drivers).', + 'type': 'object', + 'oneOf': + [{'$ref': '#/definitions/Jina::Flow'}] + + [{"$ref": f"#/definitions/{k}"} for k in IMPORTED.schema_executors.keys()], + 'definitions': definitions + } diff --git a/jina/schemas/driver.py b/jina/schemas/driver.py new file mode 100644 index 0000000000000..1b7a77287112a --- /dev/null +++ b/jina/schemas/driver.py @@ -0,0 +1,13 @@ +from ..importer import IMPORTED + +schema_all_drivers = { + 'Jina::Drivers::All': { + 'type': 'array', + 'items': { + 'oneOf': [ + {'$ref': f'#/definitions/{k}'} for k in IMPORTED.schema_drivers.keys() + ] + }, + 'minItems': 1 + } +} diff --git a/jina/schemas/executor.py b/jina/schemas/executor.py new file mode 100644 index 0000000000000..c560b366ec37f --- /dev/null +++ b/jina/schemas/executor.py @@ -0,0 +1,13 @@ +from ..importer import IMPORTED + +schema_all_executors = { + 'Jina::Executors::All': { + 'type': 'array', + 'items': { + 'oneOf': [ + {'$ref': f'#/definitions/{k}'} for k in IMPORTED.schema_executors.keys() + ] + }, + 'minItems': 1 + } +} \ No newline at end of file diff --git a/jina/schemas/flow.py b/jina/schemas/flow.py new file mode 100644 index 0000000000000..9e85dca9364dc --- /dev/null +++ b/jina/schemas/flow.py @@ -0,0 +1,32 @@ +from cli.export import api_to_dict +from jina.schemas.helper import _cli_to_schema + +schema_flow = _cli_to_schema( + api_to_dict(), + 'flow', + extras={ + 'jtype': { + 'description': 'The type of Jina object (Flow, Executor, Driver).\n' + 'A Flow is made up of several sub-tasks, and it manages the states and context of these sub-tasks.\n' + 'The input and output data of Flows are Documents.', + 'type': 'string', + 'default': 'Flow', + 'enum': ['Flow', 'AsyncFlow'] + }, + 'version': { + 'description': 'The YAML version of this Flow.', + 'type': 'string', + 'default': '\'1\'', + }, + 'pods': { + 'description': 'Define the steps in the Flow.\n' + 'A Pod is a container and interface for one or multiple Peas that have the same properties.', + 'type': 'array', + 'items': { + '$ref': '#/definitions/Jina::Pod' + }, + 'minItems': 1 + } + }, + allow_addition=False, + required=['jtype', 'version', 'pods']) diff --git a/jina/schemas/helper.py b/jina/schemas/helper.py new file mode 100644 index 0000000000000..1bfd1cf6e2488 --- /dev/null +++ b/jina/schemas/helper.py @@ -0,0 +1,162 @@ +import inspect +import re +import typing +from functools import reduce + + +def _python_type_to_schema_type(p): + if p == 'str': + dtype = 'string' + elif p == 'int' or p == 'float': + dtype = 'number' + elif p in {'typing.List[str]', 'typing.Tuple[str]', 'list', 'tuple'}: + dtype = 'array' + elif p == 'bool': + dtype = 'boolean' + elif p == 'dict': + dtype = 'object' + else: + dtype = None + # raise TypeError(f'{p} is not supported') + + return dtype + + +def _cli_to_schema(api_dict, target, + extras=None, + required=None, + allow_addition=False, + namespace='Jina'): + pod_api = None + + for d in api_dict['methods']: + if d['name'] == target: + pod_api = d['options'] + break + + _schema = { + 'properties': {}, + 'type': 'object', + 'required': [], + 'additionalProperties': allow_addition} + + for p in pod_api: + dtype = _python_type_to_schema_type(p['type']) + pv = { + 'description': p['help'].strip(), + 'type': dtype, + 'default': p['default'] + } + if p['choices']: + pv['enum'] = p['choices'] + if p['required']: + _schema['required'].append(p['name']) + if dtype == 'array': + _schema['items'] = { + 'type': 'string', + 'minItems': 1, + 'uniqueItems': True + } + + _schema['properties'][p['name']] = pv + + if extras: + _schema['properties'].update(extras) + if required: + _schema['required'].extend(required) + + return { + f'{namespace}::{target.capitalize()}': _schema + } + + +def _get_all_arguments(class_): + def get_class_arguments(class_): + """ + :param class_: the class to check + :return: a list containing the arguments from `class_` + """ + taboo = {'self', 'args', 'kwargs'} + signature = inspect.signature(class_.__init__) + + reg = r'.*?:param.*?%s:(.*)' + + class_arguments = {} + for p in signature.parameters.values(): + if p.name in taboo: + continue + class_arguments[p.name] = {} + if p.default != inspect._empty: + class_arguments[p.name]['default'] = p.default + else: + class_arguments[p.name]['default'] = None + if p.annotation != inspect._empty: + dtype = None + try: + if hasattr(p.annotation, '__origin__') and p.annotation.__origin__ is typing.Union: + dtype = p.annotation.__args__[0].__name__ + else: + dtype = getattr(p.annotation, '__origin__', p.annotation).__name__ + except: + pass + dtype = _python_type_to_schema_type(dtype) + if dtype: + class_arguments[p.name]['type'] = dtype + + if class_.__init__.__doc__: + m = re.search(reg % p.name, class_.__init__.__doc__) + if m and m.group(1): + class_arguments[p.name]['description'] = m.group(1).strip() + + return class_arguments + + def accumulate_classes(cls): + """ + :param cls: the class to check + :return: all classes from which cls inherits from + """ + + def _accumulate_classes(c, cs): + cs.append(c) + if cls == object: + return cs + for base in c.__bases__: + _accumulate_classes(base, cs) + return cs + + classes = [] + _accumulate_classes(cls, classes) + return set(classes) + + all_classes = accumulate_classes(class_) + args = list(map(lambda x: get_class_arguments(x), all_classes)) + return reduce(lambda x, y: {**x, **y}, args) + + +def _jina_class_to_schema(cls): + kwargs = _get_all_arguments(cls) + + return { + 'type': 'object', + 'description': cls.__doc__.strip() if cls.__doc__ else '', + 'properties': { + 'jtype': { + 'type': 'string', + 'const': cls.__name__, + 'description': cls.__doc__.strip().split('\n')[0] if cls.__doc__ else '' + }, + 'with': { + 'type': 'object', + 'description': 'The arguments of this Jina Executor/Driver', + 'properties': kwargs, + 'additionalProperties': False + }, + 'metas': { + '$ref': '#/definitions/Jina::Metas' + }, + 'requests': { + '$ref': '#/definitions/Jina::Requests' + } + }, + 'additionalProperties': False, + } diff --git a/jina/schemas/meta.py b/jina/schemas/meta.py new file mode 100644 index 0000000000000..f3f6ac07db3a2 --- /dev/null +++ b/jina/schemas/meta.py @@ -0,0 +1,112 @@ +schema_metas = { + 'Jina::Metas': { + 'description': 'The meta config of the Executor', + 'type': 'object', + 'required': [], + 'additionalProperties': False, + 'properties': { + 'is_trained': { + 'description': 'Indicates if the executor is trained or not. ' + 'If not, then methods decorated by `@required_train` can not be executed.', + 'type': 'boolean', + 'default': False + }, + 'is_updated': { + 'description': 'Indicates if the executor is updated or changed since last save. ' + 'If not, then save() will do nothing. A forced save is possible to use `touch()` before `save()`', + 'type': 'boolean', + 'default': False + }, + 'batch_size': { + 'description': 'The size of each batch, methods decorated by `@batching` will respect this. ' + 'Useful when incoming data is too large to fit into (GPU) memory.', + 'type': 'number', + 'default': None + }, + 'workspace': { + 'description': ''' +The working directory, for persisting the artifacts of the executor. An artifact is a file or collection of files used during a workflow run. + +By default it is not set, if you expect your executor to be persisted or to persist any data, remember to set it to the desired value. + +When a BaseExecutor is a component of a `CompoundExecutor`, its workspace value will be overriden by the workspace coming from the `CompoundExecutor` unless a particular workspace value is set for the component BaseExecutor. + ''', + 'type': 'string', + 'default': None + }, + 'name': { + 'description': 'The name of the executor.', + 'type': 'string', + 'default': None + }, + 'description': { + 'description': 'The description of this executor. It will be used in automatics docs UI', + 'type': 'string', + 'default': None + }, + 'read_only': { + 'description': 'Do not allow the Pod to modify the Executor, save calls will be ignored. ' + 'If set to true no serialization of the Executor', + 'type': 'boolean', + 'default': False + }, + 'on_gpu': { + 'description': 'If the executor is set to run on GPU.', + 'type': 'boolean', + 'default': False + }, + 'py_modules': { + 'type': 'array', + 'default': None, + 'items': { + 'type': 'string', + 'minItems': 1, + 'uniqueItems': True + }, + 'description': ''' +The external python module paths. it is useful when you want to load external python modules +using :func:`BaseExecutor.load_config` from a YAML file. If a relative path is given then the root path is set to +the path of the current YAML file. + +Example of ``py_module`` usage: + +1. This is a valid structure and it is RECOMMENDED: + - "my_cust_module" is a python module + - all core logic of your customized executor goes to ``__init__.py`` + - to import ``foo.py``, you can use relative import, e.g. ``from .foo import bar`` + - ``helper.py`` needs to be put BEFORE `__init__.py` in YAML ``py_modules`` + +This is also the structure given by ``jina hub new`` CLI. + + .. highlight:: text + .. code-block:: text + + my_cust_module + |- __init__.py + |- helper.py + |- config.yml + |- py_modules + |- helper.py + |- __init__.py + +2. This is a valid structure but not recommended: + - "my_cust_module" is not a python module (lack of __init__.py under the root) + - to import ``foo.py``, you must to use ``from jinahub.foo import bar`` + - ``jinahub`` is a common namespace for all plugin-modules, not changeable. + - ``helper.py`` needs to be put BEFORE `my_cust.py` in YAML ``py_modules`` + + .. highlight:: text + .. code-block:: text + + my_cust_module + |- my_cust.py + |- helper.py + |- config.yml + |- py_modules + |- helper.py + |- my_cust.py + ''' + } + } + } +} diff --git a/jina/schemas/pod.py b/jina/schemas/pod.py new file mode 100644 index 0000000000000..caa9b4269c656 --- /dev/null +++ b/jina/schemas/pod.py @@ -0,0 +1,29 @@ +from cli.export import api_to_dict +from .helper import _cli_to_schema + +schema_pod = _cli_to_schema(api_to_dict(), + 'pod', + extras={ + 'needs': { + 'description': 'The name of the Pod(s) that this Pod receives data from. ' + 'One can also use `gateway` to ' + 'indicate the connection with the gateway.\n' + 'This is useful to create parallelization. ' + 'By default the Flow always works sequentially ' + 'following the defined order of the Pods.', + 'type': ['array', 'string'], + 'items': { + 'type': 'string', + 'minItems': 1, + "uniqueItems": True + } + }, + 'method': { + 'description': 'The method to use when appending the Pod to the Flow', + 'type': 'string', + 'enum': ['add', 'needs', 'inspect', 'needs_all', + 'gather_inspect'], + 'default': 'add' + }, + }, + allow_addition=False) diff --git a/jina/schemas/request.py b/jina/schemas/request.py new file mode 100644 index 0000000000000..c2951a8640ddf --- /dev/null +++ b/jina/schemas/request.py @@ -0,0 +1,35 @@ +schema_requests = { + 'Jina::Requests': { + 'description': 'Define how the Executor behaves under network requests.', + 'type': 'object', + 'additionalProperties': False, + 'properties': { + 'use_default': { + 'description': 'If set, then inherit from the default `Driver` settings for this type of Executor.', + 'type': 'boolean', + 'default': False + }, + 'on': { + 'description': 'Defines how the `Executor` behaves under different types of request', + 'type': 'object', + 'properties': { + f'{r_type}Request': { + 'type': 'object', + 'properties': { + 'with': { + 'type': 'object', + 'description': 'The common kwargs that all drivers defined under this Request.' + }, + 'drivers': { + '$ref': f'#/definitions/Jina::Drivers::All' + } + }, + 'additionalProperties': False, + 'description': f'Defines how the `Executor` behaves under {r_type} request.' + } for r_type in ['Index', 'Train', 'Search', 'Update', 'Delete', 'Control'] + }, + 'additionalProperties': False + }, + } + } +} diff --git a/tests/unit/test_schema.py b/tests/unit/test_schema.py new file mode 100644 index 0000000000000..eb99997688891 --- /dev/null +++ b/tests/unit/test_schema.py @@ -0,0 +1,7 @@ +from jsonschema import Draft7Validator + +from jina.schemas import get_full_schema + + +def test_full_schema(): + Draft7Validator.check_schema(get_full_schema())