Source code for validation.helpers
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Tue Feb 19 16:15:35 2019
@author: Paolo Cozzi <cozzi@ibba.cnr.it>
"""
import json
import logging
import requests
from django.core.exceptions import ObjectDoesNotExist
from image_validation import validation, ValidationResult
from image_validation.static_parameters import ruleset_filename as \
IMAGE_RULESET
from common.constants import BIOSAMPLE_URL
from uid.helpers import parse_image_alias, get_model_object
from validation.models import ValidationSummary
# Get an instance of a logger
logger = logging.getLogger(__name__)
# a class to deal with temporary issues from EBI servers
[docs]class OntologyCacheError(Exception):
"""Identifies temporary issues with EBI servers and
image_validation.use_ontology.OntologyCache objects"""
# a class to deal with errors in ruleset (that are not user errors but
# errors within InjectTool and image_validation library)
[docs]class MetaDataValidation():
"""A class to deal with IMAGE-ValidationTool ruleset objects"""
ruleset = None
[docs] def __init__(self, ruleset_filename=IMAGE_RULESET):
self.read_in_ruleset(ruleset_filename)
# check validation rules
result = self.check_ruleset()
if result.get_overall_status() != 'Pass':
ruleset_errors = result.get_messages()
raise RulesetError(
"Error with ruleset: %s" % "; ".join(ruleset_errors))
[docs] def read_in_ruleset(self, ruleset_filename):
try:
self.ruleset = validation.read_in_ruleset(ruleset_filename)
except json.JSONDecodeError as message:
logger.error(
"Error with 'https://www.ebi.ac.uk/ols/api/': %s" % (
str(message)))
raise OntologyCacheError(
"Issue with 'https://www.ebi.ac.uk/ols/api/'")
[docs] def check_usi_structure(self, record: object) -> object:
"""Check data against USI rules"""
# this function need its input as a list
return validation.check_usi_structure(record)
[docs] def check_duplicates(self, record):
"""Check duplicates in data"""
return validation.check_duplicates(record)
[docs] def check_biosample_id_target(
self, biosample_id, record_id, record_result):
"""
Check if a target biosample_id exists or not. If it is present, ok.
Otherwise a ValidationResultColumn with a warning
Args:
biosample_id (str): the desidered biosample id
record_id (str): is the name of the object in the original data
source
record_result (ValidationResult.ValidationResultRecord):
an image_validation result object
Returns:
ValidationResult.ValidationResultRecord: an updated
image_validation object
"""
url = f"{BIOSAMPLE_URL}/{biosample_id}"
response = requests.get(url)
status = response.status_code
if status != 200:
record_result.add_validation_result_column(
ValidationResult.ValidationResultColumn(
"Warning",
f"Fail to retrieve record {biosample_id} from "
f"BioSamples as required in the relationship",
record_id,
'sampleRelationships'))
return record_result
[docs] def check_relationship(self, record, record_result):
"""
Check relationship for an Animal/Sample record and return a list
of dictionaries (to_biosample() objects) of related object
Args:
record (dict): An Animal/Sample.to_biosample() dictionary object
record_result (ValidationResult.ValidationResultRecord):
an image_validation result object
Returns:
list: a list of dictionaries of relate objects
ValidationResult.ValidationResultRecord: an updated
image_validation object
"""
# get relationship from a to_biosample() dictionary object
relationships = record.get('sampleRelationships', [])
# as described in image_validation.Submission.Submission
# same as record["title"], is the original name of the object id DS
record_id = record['attributes']["Data source ID"][0]['value']
# related objects (from UID goes here)
related = []
for relationship in relationships:
if 'accession' in relationship:
target = relationship['accession']
# check biosample target and update record_result if necessary
record_result = self.check_biosample_id_target(
target, record_id, record_result)
# HINT: should I check aliases? they came from PK and are related
# in the same submission. I can't have a sample without an animal
# since animal is a foreign key of sample (which doesn't tolerate
# NULL). Even mother and father are related through keys. If
# missing, no information about mother and father could be
# determined
else:
# could be a parent relationship for an animal, or the animal
# where this sample comes from
target = relationship['alias']
# test for object existence in db. Use biosample.helpers
# method to derive a model object from database, then get
# its related data
try:
material_obj = get_model_object(
*parse_image_alias(target))
related.append(material_obj.to_biosample())
except ObjectDoesNotExist:
record_result.add_validation_result_column(
ValidationResult.ValidationResultColumn(
"Error",
f"Could not locate the referenced record {target}",
record_id, 'sampleRelationships'))
return related, record_result
[docs] def validate(self, record):
"""
Check attributes for record by calling image_validation methods
Args:
record (dict): An Animal/Sample.to_biosample() dictionary object
Returns:
ValidationResult.ValidationResultRecord: an image_validation
object
"""
# this validated in general way
result = self.ruleset.validate(record)
logger.debug("Got %s as result from validate" % result.get_messages())
# as defined in image_valdiation.Submission, I will skip further
# validation check
if result.get_overall_status() == "Error":
logger.warning(
"record: %s has errors. Skipping context validation" % (
record["title"]))
else:
# context validation evaluate relationships. Get them
related, result = self.check_relationship(record, result)
logger.debug(
"Got %s and %s as result from check_relationship" % (
related, result.get_messages()))
# this validate context (attributes that depends on another one)
result = validation.context_validation(record, result, related)
logger.debug("Got %s as result for context_validation" % (
result.get_messages()))
return result
[docs]def construct_validation_message(submission):
"""
Function will return dict with all the data required to construct
validation message
Args:
submission (uid.models.Submission) : submission to get data from
Returns:
dict: dictionary with all required data for validation message or None
if database objects do not exist
"""
try:
validation_summary_animal = ValidationSummary.objects.get(
submission=submission, type='animal')
validation_summary_sample = ValidationSummary.objects.get(
submission=submission, type='sample')
validation_message = dict()
# Number of animal and samples
validation_message[
'animals'] = validation_summary_animal.all_count
validation_message[
'samples'] = validation_summary_sample.all_count
# Number of unknow validations
validation_message['animal_unkn'] = validation_summary_animal \
.get_unknown_count()
validation_message['sample_unkn'] = validation_summary_sample \
.get_unknown_count()
# Number of problem validations
validation_message['animal_issues'] = validation_summary_animal. \
issues_count
validation_message['sample_issues'] = validation_summary_sample. \
issues_count
return validation_message
except ObjectDoesNotExist:
return None