Source code for excel.helpers.fill_uid

#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Fri Jul  5 16:37:48 2019

@author: Paolo Cozzi <cozzi@ibba.cnr.it>
"""

import logging

from common.constants import (
    ERROR, LOADED, ACCURACIES, SAMPLE_STORAGE, SAMPLE_STORAGE_PROCESSING)
from common.helpers import image_timedelta, parse_image_timedelta
from uid.helpers import get_or_create_obj, update_or_create_obj
from uid.models import (
    DictBreed, DictCountry, DictSpecie, DictSex, DictUberon, Animal,
    Sample, DictDevelStage, DictPhysioStage)
from submissions.helpers import send_message
from validation.helpers import construct_validation_message
from validation.models import ValidationSummary

from .exceptions import ExcelImportError
from .exceltemplate import ExcelTemplateReader

# Get an instance of a logger
logger = logging.getLogger(__name__)


def fill_uid_breeds(submission_obj, template):
    """Fill DictBreed from a excel record"""

    logger.info("fill_uid_breeds() started")

    # ok get languages from submission (useful for translation)
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_breed_records():
        # get a DictSpecie object. Species are in latin names, but I can
        # find also a common name in translation tables
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species,
            language=language)

        # get country for breeds. Ideally will be the same of submission,
        # however, it could be possible to store data from other contries
        country = DictCountry.objects.get(label=record.efabis_breed_country)

        get_or_create_obj(
            DictBreed,
            supplied_breed=record.supplied_breed,
            specie=specie,
            country=country)

    logger.info("fill_uid_breeds() completed")


def get_relationship(animal_id_in_data_source, name, breed, owner):
    try:
        parent = Animal.objects.get(
            name=name,
            breed=breed,
            owner=owner)

    except Animal.DoesNotExist as exc:
        logger.error(exc)
        message = (
            "Unknown parent '%s': check animal '%s' "
            "in your dataset" % (name, animal_id_in_data_source))
        logger.error(message)
        raise ExcelImportError(message)

    return parent


def fill_uid_animals(submission_obj, template):
    # debug
    logger.info("called fill_uid_animals()")

    # get language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_animal_records():
        # determine sex. Check for values
        sex = DictSex.objects.get(label__iexact=record.sex)

        # get specie (mind synonyms)
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=record.species, language=language)

        logger.debug("Found '%s' as specie" % (specie))

        # how I can get breed from my data?
        breed_record = template.get_breed_from_animal(record)

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breed_record.efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=breed_record.supplied_breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        # define mother and father
        mother, father = None, None

        # get name for this animal and for mother and father
        if record.father_id_in_data_source:
            logger.debug("Getting %s as father" % (
                record.father_id_in_data_source))

            father = get_relationship(
                record.animal_id_in_data_source,
                record.father_id_in_data_source,
                breed,
                submission_obj.owner)

        if record.mother_id_in_data_source:
            logger.debug("Getting %s as mother" % (
                record.mother_id_in_data_source))

            mother = get_relationship(
                record.animal_id_in_data_source,
                record.mother_id_in_data_source,
                breed,
                submission_obj.owner)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.birth_location_accuracy)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_animal_id,
            'description': record.animal_description,
            'sex': sex,
            'father': father,
            'mother': mother,
            'birth_date': record.birth_date,
            'birth_location': record.birth_location,
            'birth_location_latitude': record.birth_location_latitude,
            'birth_location_longitude': record.birth_location_longitude,
            'birth_location_accuracy': accuracy,
        }

        # creating or updating an object
        update_or_create_obj(
            Animal,
            name=record.animal_id_in_data_source,
            breed=breed,
            owner=submission_obj.owner,
            submission=submission_obj,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="animal")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_animals() completed")


def parse_times(record, animal):
    """Try to deal with times in excel templates"""

    animal_age_at_collection, time_units = None, None

    # animal age could be present or not
    if record.animal_age_at_collection:
        try:
            animal_age_at_collection, time_units = parse_image_timedelta(
                record.animal_age_at_collection)

        except ValueError as exc:
            message = (
                "Error for Sample '%s' at animal_age_at_collection "
                "column: %s" % (
                    record.sample_id_in_data_source, exc))
            logger.error(message)
            raise ExcelImportError(message)

    elif record.collection_date and animal.birth_date:
        # derive animal age at collection if I have recommended values
        animal_age_at_collection, time_units = image_timedelta(
            record.collection_date, animal.birth_date)

    # another time column
    preparation_interval, preparation_interval_units = None, None

    try:
        if record.sampling_to_preparation_interval:
            preparation_interval, preparation_interval_units = \
                parse_image_timedelta(record.sampling_to_preparation_interval)

    except ValueError as exc:
        message = (
            "Error for Sample '%s' at sampling_to_preparation_interval "
            "column: %s" % (
                record.sample_id_in_data_source, exc))
        logger.error(message)
        raise ExcelImportError(message)

    return (animal_age_at_collection, time_units, preparation_interval,
            preparation_interval_units)


def fill_uid_samples(submission_obj, template):
    # debug
    logger.info("called fill_uid_samples()")

    # get language
    language = submission_obj.gene_bank_country.label

    # iterate among excel template
    for record in template.get_sample_records():
        # get animal by reading record
        animal_record = template.get_animal_from_sample(record)

        # get specie (mind synonyms)
        specie = DictSpecie.get_specie_check_synonyms(
            species_label=animal_record.species,
            language=language)

        logger.debug("Found '%s' as specie" % (specie))

        # get breed from animal record
        breed_record = template.get_breed_from_animal(animal_record)

        # get a country for this breed
        country = DictCountry.objects.get(
            label=breed_record.efabis_breed_country)

        # ok get a real dictbreed object
        breed = DictBreed.objects.get(
            supplied_breed=breed_record.supplied_breed,
            specie=specie,
            country=country)

        logger.debug("Selected breed is %s" % (breed))

        animal = Animal.objects.get(
            name=animal_record.animal_id_in_data_source,
            breed=breed,
            owner=submission_obj.owner)

        logger.debug("Selected animal is %s" % (animal))

        # get a organism part. Organism parts need to be in lowercases
        organism_part = get_or_create_obj(
            DictUberon,
            label=record.organism_part
        )

        # get developmental_stage and physiological_stage terms
        # they are not mandatory
        devel_stage, physio_stage = None, None

        if record.developmental_stage:
            devel_stage = get_or_create_obj(
                DictDevelStage,
                label=record.developmental_stage
            )

        if record.physiological_stage:
            physio_stage = get_or_create_obj(
                DictPhysioStage,
                label=record.physiological_stage
            )

        # deal with time columns
        (animal_age_at_collection, time_units, preparation_interval,
         preparation_interval_units) = parse_times(record, animal)

        # now get accuracy
        accuracy = ACCURACIES.get_value_by_desc(
            record.collection_place_accuracy)

        # now get storage and storage processing
        # TODO; check those values in excel columns
        storage = SAMPLE_STORAGE.get_value_by_desc(
            record.sample_storage)

        storage_processing = SAMPLE_STORAGE_PROCESSING.get_value_by_desc(
            record.sample_storage_processing)

        # create a new object. Using defaults to avoid collisions when
        # updating data
        defaults = {
            'alternative_id': record.alternative_sample_id,
            'description': record.sample_description,
            'protocol': record.specimen_collection_protocol,
            'collection_date': record.collection_date,
            'collection_place_latitude': record.collection_place_latitude,
            'collection_place_longitude': record.collection_place_longitude,
            'collection_place': record.collection_place,
            'collection_place_accuracy': accuracy,
            'organism_part': organism_part,
            'developmental_stage': devel_stage,
            'physiological_stage': physio_stage,
            'animal_age_at_collection': animal_age_at_collection,
            'animal_age_at_collection_units': time_units,
            'availability': record.availability,
            'storage': storage,
            'storage_processing': storage_processing,
            'preparation_interval': preparation_interval,
            'preparation_interval_units': preparation_interval_units,
        }

        update_or_create_obj(
            Sample,
            name=record.sample_id_in_data_source,
            animal=animal,
            owner=submission_obj.owner,
            submission=submission_obj,
            defaults=defaults)

    # create a validation summary object and set all_count
    validation_summary = get_or_create_obj(
        ValidationSummary,
        submission=submission_obj,
        type="sample")

    # reset counts
    validation_summary.reset_all_count()

    # debug
    logger.info("fill_uid_samples() completed")


def check_UID(submission_obj, reader):
    # check for species and sex in a similar way as cryoweb does
    # TODO: identical to CRBanim. Move to a mixin
    check, not_found = reader.check_sex()

    # check sex
    if not check:
        message = (
            "Not all Sex terms are loaded into database: "
            "check for '%s' in your dataset" % (not_found))

        raise ExcelImportError(message)

    check, not_found = reader.check_species(
        submission_obj.gene_bank_country)

    # check species and related
    if not check:
        raise ExcelImportError(
            "Some species are not loaded into database: "
            "check for '%s' in your dataset" % (not_found))

    check, not_found = reader.check_species_in_animal_sheet()

    if not check:
        raise ExcelImportError(
            "Some species are not defined in breed sheet: "
            "check for '%s' in your dataset" % (not_found))

    # check countries
    check, not_found = reader.check_countries()

    if not check:
        raise ExcelImportError(
            "Those countries are not loaded in database: "
            "check for '%s' in your dataset" % (not_found))

    # check accuracies
    check, not_found = reader.check_accuracies()

    if not check:
        message = (
            "Not all accuracy levels are defined in database: "
            "check for '%s' in your dataset" % (not_found))

        raise ExcelImportError(message)


[docs]def upload_template(submission_obj): # debug logger.info("Importing from Excel template file") # this is the full path in docker container fullpath = submission_obj.get_uploaded_file_path() # read submission data reader = ExcelTemplateReader() reader.read_file(fullpath) # start data loading try: # check UID data like cryoweb does check_UID(submission_obj, reader) # BREEDS fill_uid_breeds(submission_obj, reader) # ANIMALS fill_uid_animals(submission_obj, reader) # SAMPLES fill_uid_samples(submission_obj, reader) except Exception as exc: # set message: message = "Error in importing data: %s" % (str(exc)) # save a message in database submission_obj.status = ERROR submission_obj.message = message submission_obj.save() # send async message send_message(submission_obj) # debug logger.error("Error in importing from Template: %s" % (exc)) logger.exception(exc) return False else: message = "Template import completed for submission: %s" % ( submission_obj.id) submission_obj.message = message submission_obj.status = LOADED submission_obj.save() # send async message send_message( submission_obj, validation_message=construct_validation_message(submission_obj)) logger.info("Import from Template is complete") return True