Skip to content

Commit 0510e0d

Browse files
committed
Working GUI, improved error reporting and parallel validation
1 parent f30bb33 commit 0510e0d

File tree

8 files changed

+114
-165
lines changed

8 files changed

+114
-165
lines changed

ESSArch_Core/WorkflowEngine/util.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,7 @@ def _create_step(parent_step, flow, ip, responsible, context=None):
5555

5656
child_s = ProcessStep.objects.create(
5757
name=flow_entry['name'],
58+
parallel=flow_entry['parallel'],
5859
parent_step=parent_step,
5960
parent_step_pos=e_idx,
6061
eager=parent_step.eager,

ESSArch_Core/config/urls.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,6 @@
3939
ValidationFilesViewSet,
4040
ValidationViewSet,
4141
ValidatorViewSet,
42-
ValidatorWorkflowViewSet,
4342
)
4443
from ESSArch_Core.ip.views import (
4544
ConsignMethodViewSet,
@@ -323,7 +322,6 @@
323322
router.register(r'conversion-tools', ConversionToolViewSet)
324323
router.register(r'features', FeatureViewSet, basename='features')
325324
router.register(r'validators', ValidatorViewSet, basename='validators')
326-
router.register(r'validator-workflows', ValidatorWorkflowViewSet, basename='validator-workflows')
327325
router.register(r'validations', ValidationViewSet)
328326
router.register(r'events', EventIPViewSet)
329327
router.register(r'event-types', EventTypeViewSet)

ESSArch_Core/fixity/serializers.py

Lines changed: 16 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -20,24 +20,32 @@ class ValidatorWorkflowSerializer(serializers.Serializer):
2020

2121
def validate_validators(self, validators):
2222
new_data = []
23-
sub_context = {'information_package': self.context['request'].data.get('information_package', None)}
23+
ip = self.context['request'].data.get('information_package', None)
24+
sub_context = {'information_package': ip}
25+
sub_context.update(self.context)
2426

2527
for validator in validators:
2628
name = validator.pop('name')
2729
klass = get_validator(name)
28-
options_serializer = klass.get_options_serializer_class()(
29-
data=validator.pop('options', {}),
30-
context=sub_context,
31-
)
30+
3231
serializer = klass.get_serializer_class()(
3332
data=validator, context=sub_context,
3433
)
35-
3634
serializer.is_valid(True)
37-
options_serializer.is_valid(True)
38-
3935
data = serializer.validated_data
4036
data['name'] = name
37+
38+
options_data = validator.pop('options', {})
39+
options_context = {
40+
'information_package': ip,
41+
'base_data': data,
42+
}
43+
options_serializer = klass.get_options_serializer_class()(
44+
data=options_data,
45+
context=options_context,
46+
)
47+
48+
options_serializer.is_valid(True)
4149
data['options'] = options_serializer.validated_data
4250

4351
new_data.append(data)

ESSArch_Core/fixity/validation/backends/checksum.py

Lines changed: 53 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import logging
2-
import traceback
2+
import os
33

44
from django.utils import timezone
55
from rest_framework import serializers
@@ -28,14 +28,52 @@ class ChecksumValidator(BaseValidator):
2828

2929
label = 'Checksum Validator'
3030

31+
@classmethod
32+
def get_form(cls):
33+
return [
34+
{
35+
'key': 'path',
36+
'type': 'input',
37+
'templateOptions': {
38+
'label': 'Path to validate',
39+
'required': True,
40+
}
41+
},
42+
{
43+
'key': 'options.algorithm',
44+
'type': 'select',
45+
'defaultValue': 'SHA-256',
46+
'templateOptions': {
47+
'label': 'Checksum algorithm',
48+
'required': True,
49+
'labelProp': 'name',
50+
'valueProp': 'value',
51+
'options': [
52+
{'name': 'MD5', 'value': 'MD5'},
53+
{'name': 'SHA-1', 'value': 'SHA-1'},
54+
{'name': 'SHA-224', 'value': 'SHA-224'},
55+
{'name': 'SHA-256', 'value': 'SHA-256'},
56+
{'name': 'SHA-384', 'value': 'SHA-384'},
57+
{'name': 'SHA-512', 'value': 'SHA-512'},
58+
]
59+
}
60+
},
61+
{
62+
'key': 'options.expected',
63+
'type': 'input',
64+
'templateOptions': {
65+
'label': 'Checksum',
66+
'required': True,
67+
}
68+
},
69+
]
70+
3171
class Serializer(BaseValidator.Serializer):
32-
context = serializers.ChoiceField(choices=['checksum_str', 'checksum_file', 'xml_file'])
72+
context = serializers.CharField(default='checksum_str')
3373
block_size = serializers.IntegerField(default=65536)
3474

3575
class OptionsSerializer(BaseValidator.OptionsSerializer):
3676
expected = serializers.CharField()
37-
rootdir = serializers.CharField(default='', allow_blank=True)
38-
recursive = serializers.BooleanField(default=True)
3977
algorithm = serializers.ChoiceField(
4078
choices=['MD5', 'SHA-1', 'SHA-224', 'SHA-256', 'SHA-384', 'SHA-512'],
4179
default='SHA-256',
@@ -52,8 +90,14 @@ def __init__(self, *args, **kwargs):
5290

5391
def validate(self, filepath, expected=None):
5492
logger.debug('Validating checksum of %s' % filepath)
93+
94+
if self.ip is not None:
95+
relpath = os.path.relpath(filepath, self.ip.object_path)
96+
else:
97+
relpath = filepath
98+
5599
val_obj = Validation.objects.create(
56-
filename=filepath,
100+
filename=relpath,
57101
time_started=timezone.now(),
58102
validator=self.__class__.__name__,
59103
required=self.required,
@@ -82,14 +126,14 @@ def validate(self, filepath, expected=None):
82126
actual_checksum = calculate_checksum(filepath, algorithm=self.algorithm, block_size=self.block_size)
83127
if actual_checksum != checksum:
84128
raise ValidationError("checksum for %s is not valid (%s != %s)" % (
85-
filepath, checksum, actual_checksum
129+
relpath, checksum, actual_checksum
86130
))
87131
passed = True
88-
except Exception:
89-
val_obj.message = traceback.format_exc()
132+
except Exception as e:
133+
val_obj.message = str(e)
90134
raise
91135
else:
92-
message = 'Successfully validated checksum of %s' % filepath
136+
message = 'Successfully validated checksum of %s' % relpath
93137
val_obj.message = message
94138
logger.info(message)
95139
finally:

ESSArch_Core/fixity/validation/backends/xml.py

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,13 +94,19 @@ def __init__(self, *args, **kwargs):
9494
self.fields['context'] = FilePathField(ip.object_path, allow_blank=True, default='')
9595

9696
class OptionsSerializer(BaseValidator.OptionsSerializer):
97-
rootdir = serializers.CharField(default='', allow_blank=True)
97+
rootdir = serializers.CharField(required=False)
9898
recursive = serializers.BooleanField(default=True)
9999
default_algorithm = serializers.ChoiceField(
100100
choices=['MD5', 'SHA-1', 'SHA-224', 'SHA-256', 'SHA-384', 'SHA-512'],
101101
default='SHA-256',
102102
)
103103

104+
def validate(self2, data):
105+
if 'rootdir' not in data:
106+
data['rootdir'] = self2.context['base_data']['path']
107+
108+
return data
109+
104110
def __init__(self, *args, **kwargs):
105111
super().__init__(*args, **kwargs)
106112

ESSArch_Core/fixity/views.py

Lines changed: 1 addition & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,6 @@
1-
import os
2-
3-
from django.db import transaction
41
from django.db.models import Exists, Max, Min, OuterRef
52
from django_filters.rest_framework import DjangoFilterBackend
6-
from rest_framework import filters, mixins, status, viewsets
3+
from rest_framework import filters, viewsets
74
from rest_framework.permissions import IsAuthenticated
85
from rest_framework.response import Response
96
from rest_framework_extensions.mixins import NestedViewSetMixin
@@ -15,14 +12,11 @@
1512
ConversionToolSerializer,
1613
ValidationFilesSerializer,
1714
ValidationSerializer,
18-
ValidatorWorkflowSerializer,
1915
)
2016
from ESSArch_Core.fixity.validation import (
2117
AVAILABLE_VALIDATORS,
2218
get_backend as get_validator,
2319
)
24-
from ESSArch_Core.WorkflowEngine.models import ProcessStep
25-
from ESSArch_Core.WorkflowEngine.util import create_workflow
2620

2721

2822
class ConversionToolViewSet(viewsets.ReadOnlyModelViewSet):
@@ -57,49 +51,6 @@ def list(self, request, format=None):
5751
return Response(validators)
5852

5953

60-
class ValidatorWorkflowViewSet(mixins.CreateModelMixin, viewsets.GenericViewSet):
61-
queryset = ProcessStep.objects.all()
62-
serializer_class = ValidatorWorkflowSerializer
63-
64-
def create(self, request, *args, **kwargs):
65-
serializer = self.get_serializer(data=request.data, context={'request': self.request})
66-
serializer.is_valid(raise_exception=True)
67-
workflow_spec = []
68-
ip = serializer.validated_data['information_package']
69-
70-
for validator in serializer.validated_data['validators']:
71-
name = validator['name']
72-
klass = get_validator(name)
73-
options_serializer = klass.get_options_serializer_class()(data=validator.get('options', {}))
74-
options_serializer.is_valid(raise_exception=True)
75-
options = options_serializer.validated_data
76-
77-
path = os.path.join(ip.object_path, validator['path'])
78-
options['rootdir'] = ip.object_path
79-
80-
task_spec = {
81-
'name': 'ESSArch_Core.fixity.validation.tasks.Validate',
82-
'label': 'Validate using {}'.format(klass.label),
83-
'args': [name, path],
84-
'params': {'context': validator['context'], 'options': options},
85-
}
86-
87-
workflow_spec.append(task_spec)
88-
89-
with transaction.atomic():
90-
step = {
91-
'step': True,
92-
'name': serializer.validated_data['purpose'],
93-
'children': workflow_spec
94-
}
95-
workflow = create_workflow([step], ip=ip, name='Validation')
96-
97-
workflow.run()
98-
99-
headers = self.get_success_headers(serializer.data)
100-
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
101-
102-
10354
class ValidationViewSet(NestedViewSetMixin, viewsets.ReadOnlyModelViewSet):
10455
queryset = Validation.objects.all().order_by('filename', 'validator')
10556
serializer_class = ValidationSerializer

ESSArch_Core/ip/views.py

Lines changed: 36 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -64,8 +64,12 @@
6464
from ESSArch_Core.essxml.util import get_objectpath, parse_submit_description
6565
from ESSArch_Core.exceptions import Conflict, NoFileChunksFound
6666
from ESSArch_Core.fixity.format import FormatIdentifier
67+
from ESSArch_Core.fixity.serializers import ValidatorWorkflowSerializer
6768
from ESSArch_Core.fixity.transformation import AVAILABLE_TRANSFORMERS
68-
from ESSArch_Core.fixity.validation import AVAILABLE_VALIDATORS
69+
from ESSArch_Core.fixity.validation import (
70+
AVAILABLE_VALIDATORS,
71+
get_backend as get_validator,
72+
)
6973
from ESSArch_Core.fixity.validation.backends.checksum import ChecksumValidator
7074
from ESSArch_Core.ip.filters import (
7175
AgentFilter,
@@ -1923,46 +1927,45 @@ def unlock_profile(self, request, pk=None):
19231927
)
19241928
})
19251929

1926-
@transaction.atomic
19271930
@action(detail=True, methods=['post'], url_path='validate')
19281931
def validate(self, request, pk=None):
19291932
ip = self.get_object()
19301933

1931-
prepare = Path.objects.get(entity="ingest_workarea").value
1932-
xmlfile = os.path.join(prepare, "%s.xml" % pk)
1934+
request.data['information_package'] = str(ip.pk)
1935+
serializer = ValidatorWorkflowSerializer(data=request.data, context={'request': request})
1936+
serializer.is_valid(raise_exception=True)
1937+
workflow_spec = []
1938+
ip = serializer.validated_data['information_package']
19331939

1934-
step = ProcessStep.objects.create(
1935-
name="Validation",
1936-
information_package=ip
1937-
)
1940+
for validator in serializer.validated_data['validators']:
1941+
name = validator['name']
1942+
klass = get_validator(name)
19381943

1939-
step.add_tasks(
1940-
ProcessTask.objects.create(
1941-
name="ESSArch_Core.tasks.ValidateXMLFile",
1942-
params={
1943-
"xml_filename": xmlfile
1944-
},
1945-
log=EventIP,
1946-
information_package=ip,
1947-
responsible=self.request.user,
1948-
),
1949-
ProcessTask.objects.create(
1950-
name="ESSArch_Core.tasks.ValidateFiles",
1951-
params={
1952-
"mets_path": xmlfile,
1953-
"validate_fileformat": True,
1954-
"validate_integrity": True,
1955-
},
1956-
log=EventIP,
1957-
processstep_pos=0,
1958-
information_package=ip,
1959-
responsible=self.request.user,
1960-
)
1961-
)
1944+
options = validator['options']
1945+
path = os.path.join(ip.object_path, validator['path'])
19621946

1963-
step.run()
1947+
task_spec = {
1948+
'name': 'ESSArch_Core.fixity.validation.tasks.Validate',
1949+
'label': 'Validate using {}'.format(klass.label),
1950+
'args': [name, path],
1951+
'params': {'context': validator['context'], 'options': options},
1952+
}
19641953

1965-
return Response("Validating IP")
1954+
workflow_spec.append(task_spec)
1955+
1956+
with transaction.atomic():
1957+
step = {
1958+
'step': True,
1959+
'name': serializer.validated_data['purpose'],
1960+
'parallel': True,
1961+
'children': workflow_spec
1962+
}
1963+
workflow = create_workflow([step], ip=ip, name='Validation')
1964+
1965+
workflow.run()
1966+
1967+
headers = self.get_success_headers(serializer.data)
1968+
return Response(serializer.data, status=status.HTTP_201_CREATED, headers=headers)
19661969

19671970
def update(self, request, *args, **kwargs):
19681971
ip = self.get_object()

0 commit comments

Comments
 (0)