AccessionMCPDConverter.java

/*
 * Copyright 2021 Global Crop Diversity Trust
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *   http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */
package org.gringlobal.worker;

import static org.gringlobal.model.community.CommunityAppSettings.MCPD_ACCEURL;

import java.util.ArrayList;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
import java.util.Optional;
import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.TimeUnit;
import java.util.stream.Collectors;
import java.util.stream.Stream;

import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
import org.gringlobal.component.elastic.AppContextHelper;
import org.gringlobal.model.Accession;
import org.gringlobal.model.AccessionInvName;
import org.gringlobal.model.AccessionSource;
import org.gringlobal.model.Inventory;
import org.gringlobal.model.TaxonomySpecies;
import org.gringlobal.model.community.AccessionMCPD;
import org.gringlobal.model.community.CommunityCodeValues;
import org.gringlobal.model.community.IWebVisible;
import org.gringlobal.service.AppSettingsService;
import org.gringlobal.service.CodeValueService;
import org.gringlobal.service.TemplatingService;
import org.gringlobal.util.MCPDDate;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.data.util.Pair;
import org.springframework.stereotype.Component;

import com.github.mustachejava.Mustache;
import com.google.common.cache.Cache;
import com.google.common.cache.CacheBuilder;

import lombok.extern.slf4j.Slf4j;

/**
 * The Class AccessionMCPDConverter.
 */
@Component
@Slf4j
public class AccessionMCPDConverter {

	@Autowired
	private CodeValueService codeValueService;

	@Autowired
	private AppSettingsService appSettingsService;

	@Autowired
	private TemplatingService templatingService;
	
	private final Cache<Pair<String, String>, Optional<String>> mcpdCodeValueCache = CacheBuilder.newBuilder().maximumSize(200).expireAfterAccess(30, TimeUnit.SECONDS).build();
	private final Cache<String, Optional<Mustache>> acceUrlCompiledTemplateCache = CacheBuilder.newBuilder().maximumSize(1).expireAfterAccess(30, TimeUnit.SECONDS).build();
	private final Cache<TaxonomySpecies, Optional<String>> taxonomyCropNameCache = CacheBuilder.newBuilder().maximumSize(200).expireAfterAccess(30, TimeUnit.SECONDS).build();
	
	/** Map MCPD field to a type of ACCESSION_NAME_TYPE */
	private final Cache<String, Optional<String>> mcpdAccessionNameType = CacheBuilder.newBuilder().maximumSize(10).expireAfterAccess(30, TimeUnit.SECONDS).build();

	public void dropAcceUrlTemplateCache() {
		acceUrlCompiledTemplateCache.invalidateAll();
	}

	/**
	 * Figure out which ACCESSION_NAME_TYPE has MCPD translation that matches {@code mcpdField}.
	 * 
	 * @param mcpdField COLLMISSID, ACCENAME, etc.
	 * @return The corresponding code value
	 */
	private Optional<String> findAccessionNameType(String mcpdField) {
		if (mcpdField == null) {
			return Optional.empty();
		}
		try {
			return mcpdAccessionNameType.get(mcpdField, () -> {
				return Optional.ofNullable(codeValueService.findCodeValueOfMCPD(CommunityCodeValues.ACCESSION_NAME_TYPE, mcpdField));
			});
		} catch (ExecutionException e) {
			throw new RuntimeException("Error reading code value for MCPD", e);
		}
	}

	public Optional<String> findMcpdOfCodeValue(String groupName, String value) {
		if (value == null) {
			return Optional.empty();
		}
		try {
			return mcpdCodeValueCache.get(Pair.of(groupName, value), () -> {
				return Optional.ofNullable(codeValueService.findMcpdOfCodeValue(groupName, value));
			});
		} catch (ExecutionException e) {
			throw new RuntimeException("Error reading MCPD code value", e);
		}
	}

	private Optional<Mustache> findAcceUrlTemplate() {
		try {
			return acceUrlCompiledTemplateCache.get(MCPD_ACCEURL.name, () -> {
				Optional<String> setting = appSettingsService.getSetting(MCPD_ACCEURL.categoryTag, MCPD_ACCEURL.name, String.class);
				return setting.map(template -> {
					// If blank or set to default value then ignore
					if (StringUtils.isBlank(template) || Objects.equals(template, MCPD_ACCEURL.value)) return null;
					return templatingService.compileTemplate(template);
				});
			});
		} catch (ExecutionException e) {
			throw new RuntimeException("Error reading ACCEURL app setting", e);
		}
	}

	private Optional<String> findCropName(TaxonomySpecies taxonomySpecies) {
		try {
			return taxonomyCropNameCache.get(taxonomySpecies, () -> {
				log.debug("Looking for crop name of taxonomySpecies id={} {}", taxonomySpecies.getId(), taxonomySpecies.getName());
				var cropMap = taxonomySpecies.getTaxonomyCrops();
				if (cropMap.size() > 0) {
					var firstTaxonomyCrop = cropMap.get(0);

					// Use the genebank's name of the crop
					if (firstTaxonomyCrop.getCrop() != null) {
						return Optional.ofNullable(firstTaxonomyCrop.getCrop().getName());
					}

					// Use common crop name if available
					if (StringUtils.isNotBlank(firstTaxonomyCrop.getCommonCropName())) {
						return Optional.ofNullable(firstTaxonomyCrop.getCommonCropName());
					}
				}
				// Check current taxonomy (if not the same)
				if (taxonomySpecies.getCurrentTaxonomySpecies() != null
						&& !taxonomySpecies.getCurrentTaxonomySpecies().getId().equals(taxonomySpecies.getId())) {
					log.trace("Looking for crop name of current species of {} currentTaxonomySpecies.id={}", taxonomySpecies.getName(), taxonomySpecies.getCurrentTaxonomySpecies().getId());
					return findCropName(taxonomySpecies.getCurrentTaxonomySpecies());
				}

				// No CROPNAME found.
				return Optional.empty();
			});
		} catch (ExecutionException e) {
			throw new RuntimeException("Error fetching CROPNAME for " + taxonomySpecies.getName(), e);
		}
	}

	public AccessionMCPD convert(Accession accession) {
		assert(accession != null);
		assert(accession.isNew() == false);

		AccessionMCPD mcpd = new AccessionMCPD();
		mcpd.id = accession.getId();
		mcpd.isWebVisible = accession.getIsWebVisible();
		mcpd.puid = accession.getDoi();
		mcpd.instCode = accession.getSite().getFaoInstituteNumber();
		mcpd.acceNumb = accession.getAccessionNumber();
		mcpd.acceName = accession.getPreferredName();
		mcpd.acqDate = MCPDDate.convert(accession.getInitialReceivedDate(), accession.getInitialReceivedDateCode());
		try {
			mcpd.sampStat = Integer.parseInt(findMcpdOfCodeValue(CommunityCodeValues.IMPROVEMENT_LEVEL, accession.getImprovementStatusCode()).orElse(null));
		} catch (NumberFormatException e) {}
		try {
			mcpd.mlsStat = Integer.parseInt(findMcpdOfCodeValue(CommunityCodeValues.ACCESSION_MLS_STATUS, accession.getMlsStatus()).orElse(null));
		} catch (NumberFormatException e) {}
		// mcpd.remarks = accession.getNote();

		mcpd.curationType = findMcpdOfCodeValue(CommunityCodeValues.ACCESSION_CURATION_TYPE, accession.getCurationTypeCode()).orElse(null);

		TaxonomySpecies taxonomySpecies = accession.getTaxonomySpecies();
		mcpd.genus = taxonomySpecies.getTaxonomyGenus().getName();
		mcpd.species = taxonomySpecies.getSpecificEpithet();
		mcpd.spAuthor = taxonomySpecies.getSpeciesAuthority();
		mcpd.subtaxa = taxonomySpecies.getSubTaxon();
		if (mcpd.subtaxa != null) {
			mcpd.subtAuthor = taxonomySpecies.getNameAuthority();
		}
		mcpd.cropName = findCropName(taxonomySpecies).orElse(null);

		var webVisibleSources = accession.getAccessionSources().stream().filter(IWebVisible::isWebVisible).collect(Collectors.toList());

		// MCPD ORIGCTY: Geography that is flagged as `isSource` provides the country of provenance
		var originSource = webVisibleSources.stream().filter(AccessionSource::isOrigin).findFirst().orElse(null);
		if (originSource != null) {
			// source with "isOrigin=Y" is the ORIGCTY
			if (originSource.getGeography() != null) {
				mcpd.origCty = StringUtils.defaultIfBlank(
					// Use MCPD translation
					findMcpdOfCodeValue(CommunityCodeValues.GEOGRAPHY_COUNTRY_CODE, originSource.getGeography().getCountryCode()).orElse(null),
					// use country code directly if not translated
					originSource.getGeography().getCountryCode());
			}
		}

		// COLLECTED
		var collectedSource = webVisibleSources.stream().filter((source) -> CommunityCodeValues.ACCESSION_SOURCE_TYPE_COLLECTED.value.equals(source.getSourceTypeCode())).findFirst().orElse(null);
		if (collectedSource != null) {
			mcpd.collSite = collectedSource.getCollectorVerbatimLocality();
			mcpd.collDate = MCPDDate.convert(collectedSource.getSourceDate(), collectedSource.getSourceDateCode());
			try {
				mcpd.collSrc = Integer.parseInt(findMcpdOfCodeValue(CommunityCodeValues.ACCESSION_SOURCE_HABITAT_TYPE, collectedSource.getAcquisitionSource()).orElse(null));
			} catch (NumberFormatException e) {}

			mcpd.decLatitude = collectedSource.getLatitude();
			mcpd.decLongitude = collectedSource.getLongitude();
			mcpd.coordDatum = collectedSource.getGeoreferenceDatum();
			mcpd.coordUncert = collectedSource.getUncertainty();
			mcpd.geoRefMeth = collectedSource.getGeoreferenceProtocolCode();
			mcpd.elevation = collectedSource.getElevationMeters();

			// first Cooperator
			var firstCooperator = collectedSource.getCooperators().stream().findFirst().orElse(null);
			if (firstCooperator != null) {
				mcpd.collCode = firstCooperator.getFaoInstituteNumber();
				mcpd.collName = firstCooperator.getOrganization();
				// Full address
				mcpd.collInstAddress = Stream.of(firstCooperator.getAddressLine1(), firstCooperator.getAddressLine2(), firstCooperator.getAddressLine3(), firstCooperator.getCity())
					.filter(StringUtils::isNotBlank).collect(Collectors.joining(", "));
			}
		}

		// DONATED
		var donorSource = webVisibleSources.stream().filter((source) -> CommunityCodeValues.ACCESSION_SOURCE_TYPE_DONATED.value.equals(source.getSourceTypeCode())).findFirst().orElse(null);
		if (donorSource != null) {
			if (donorSource.getCooperators().size() > 0) {
				var cooperator = donorSource.getCooperators().get(0);
				mcpd.donorCode = cooperator.getFaoInstituteNumber();
				mcpd.donorName = cooperator.getOrganization();
			}
		}

		// DEVELOPED
		var developSource = webVisibleSources.stream().filter((source) -> CommunityCodeValues.ACCESSION_SOURCE_TYPE_DEVELOPED.value.equals(source.getSourceTypeCode())).findFirst().orElse(null);
		if (developSource != null) {
			if (developSource.getCooperators().size() > 0) {
				var cooperator = developSource.getCooperators().get(0);
				mcpd.bredCode = cooperator.getFaoInstituteNumber();
				mcpd.bredName = cooperator.getOrganization();
			}
		}

		List<String> duplSite = new ArrayList<>();
		List<String> duplInstName = new ArrayList<>();
		Stream.of(accession.getBackupLocation1Site(), accession.getBackupLocation2Site()).filter(AccessionMCPDConverter::notNull).forEach((backupSite) -> {
			duplSite.add(backupSite.getFaoInstituteNumber());
			duplInstName.add(backupSite.getSiteLongName());
		});
		mcpd.duplSite = StringUtils.trimToNull(duplSite.stream().filter(AccessionMCPDConverter::notNull).collect(Collectors.joining(";")));
		mcpd.duplInstName = StringUtils.trimToNull(duplInstName.stream().filter(AccessionMCPDConverter::notNull).collect(Collectors.joining(";")));

		if (accession.getExploration() != null) {
			mcpd.collMissid = accession.getExploration().getExplorationNumber();
		}

		var accessionNames = accession.getNames();
		if (accessionNames == null) {
			accessionNames = AppContextHelper.loadNames(accession, Inventory.SYSTEM_INVENTORY_FTC); // only system inventory
		}
		var webVisibleNames = accessionNames.stream()
			// only public records
			.filter(IWebVisible::isWebVisible)
			// order by name rank
			.sorted(AccessionInvName::orderByPlantNameRank)
			// as list
			.collect(Collectors.toList());

		if (CollectionUtils.isNotEmpty(webVisibleNames)) {
			var nameTypeACCENAME = findAccessionNameType("ACCENAME");
			if (nameTypeACCENAME.isPresent()) {
				mcpd.acceName = webVisibleNames.stream().filter((name) -> Objects.equals(nameTypeACCENAME.get(), name.getCategoryCode()))
					// plant name
					.map(AccessionInvName::getPlantName)
					// first only
					.findFirst().orElse(null);
			} else {
				mcpd.acceName = webVisibleNames.get(0).getPlantName(); // top name is accession name
			}

			var nameTypeDONORNUMB = findAccessionNameType("DONORNUMB").orElse(CommunityCodeValues.ACCESSION_NAME_TYPE_DONOR.value);
			mcpd.donorNumb = webVisibleNames.stream().filter((name) -> Objects.equals(nameTypeDONORNUMB, name.getCategoryCode()))
				// plant name
				.map(AccessionInvName::getPlantName)
				// first only
				.findFirst().orElse(null);

			var nameTypeCOLLNUMB = findAccessionNameType("COLLNUMB").orElse(CommunityCodeValues.ACCESSION_NAME_TYPE_COLLECTOR.value);
			mcpd.collNumb = webVisibleNames.stream().filter((name) -> Objects.equals(nameTypeCOLLNUMB, name.getCategoryCode()))
				// plant name
				.map(AccessionInvName::getPlantName)
				// first only
				.findFirst().orElse(null);

			if (mcpd.collMissid == null) {
				var nameTypeCOLLMISSID = findAccessionNameType("COLLMISSID").orElse(CommunityCodeValues.ACCESSION_NAME_TYPE_EXPLORATION.value);
				mcpd.collMissid = webVisibleNames.stream().filter(name -> Objects.equals(nameTypeCOLLMISSID, name.getCategoryCode()))
					// plant name
					.map(AccessionInvName::getPlantName)
					// first only
					.findFirst().orElse(null);
			}

			var otherNumb = webVisibleNames.stream().map(AccessionInvName::getPlantName).distinct().collect(Collectors.toList()); // Using list here so the order of names is preserved

			/*@formatter:off*/
			otherNumb.remove(mcpd.acceNumb);
			if (mcpd.puid != null) otherNumb.remove(mcpd.puid);
			if (mcpd.acceName != null) otherNumb.remove(mcpd.acceName);
			if (mcpd.donorCode != null) otherNumb.remove(mcpd.donorCode);
			if (mcpd.donorNumb != null) otherNumb.remove(mcpd.donorNumb);
			if (mcpd.collCode != null) otherNumb.remove(mcpd.collCode);
			if (mcpd.collNumb != null) otherNumb.remove(mcpd.collNumb);
			if (mcpd.collMissid != null) otherNumb.remove(mcpd.collMissid);
			/*@formatter:on*/
			mcpd.otherNumb = StringUtils.trimToNull(otherNumb.stream().collect(Collectors.joining(";")));
		}

		if (accession.getAccessionPedigree() != null) {
			mcpd.ancest = accession.getAccessionPedigree().getDescription();
		}

		findAcceUrlTemplate().ifPresent(compiled -> {
			mcpd.acceUrl = StringUtils.stripToNull(templatingService.fillTemplate(compiled, Map.of("accession", accession)));
		});

		if (accession.getCurationTypeCode() != null) {  // Curation Type overrides historical
			mcpd.historical = Objects.equals(CommunityCodeValues.ACCESSION_CURATION_TYPE_HISTORICAL.value, accession.getCurationTypeCode());
		} else { // Or curation type is not specified
			try {
				mcpd.historical = not(toBoolean(findMcpdOfCodeValue(CommunityCodeValues.ACCESSION_STATUS, accession.getStatusCode()).orElse(null)));
			} catch (NumberFormatException e) {}
		}

		// Availability and storage
		if (mcpd.historical != null && mcpd.historical == true) {
			
			// If historical, then there is no availability and no storage
			mcpd.availability = false;
			mcpd.storage = "";

		} else {

			// Otherwise scan inventories for details
			Boolean availability = null;
			Set<String> storage = new HashSet<>();
			for (var inv : accession.getInventories()) {
				if (inv.isSystemInventory()) { // skip system inventory
					continue;
				}
				Double quantityOnHand = inv.getQuantityOnHand();

				if (quantityOnHand == null) { // consider only inventories with specified quantity
					continue;
				}

				if (availability == null || availability == false) {
					if (Objects.equals("Y", inv.getIsDistributable()) // Is distributale
							&& Objects.equals("Y", inv.getIsAvailable()) // TODO We should not be considering #isAvailable
							&& quantityOnHand > 0 // Has some quantity
							&& (inv.getDistributionCriticalQuantity() == null || quantityOnHand > inv.getDistributionCriticalQuantity()) // Sufficient quantity on hand
					) {
						availability = true;
					} else {
						availability = false;
					}
				}

				if (quantityOnHand <= 0) { // Do not consider empty inventories for storage
					continue;
				}
				var storageTypeCode = inv.getInventoryMaintenancePolicy().getStorageTypeCode();
				String storageType = findMcpdOfCodeValue(CommunityCodeValues.STORAGE_TYPE, storageTypeCode).orElse(null);
				if (storageType != null) {
					storage.add(storageType);
				}
			}
			mcpd.availability = availability;
			mcpd.storage = storage.stream().sorted().collect(Collectors.joining(";"));
		}

		mcpd.lastModified = accession.getModifiedDate();

		return mcpd;
	}

	private static Boolean not(Boolean bool) {
		return bool == null ? null : !bool;
	}

	private static Boolean toBoolean(String booleanStringOfSomeKind) {
		Boolean result = null;
		if (!StringUtils.isBlank(booleanStringOfSomeKind)) {
			if ("1".equals(booleanStringOfSomeKind) || "true".equalsIgnoreCase(booleanStringOfSomeKind) || "yes".equalsIgnoreCase(booleanStringOfSomeKind)) {
				result = true;
			} else if ("0".equals(booleanStringOfSomeKind) || "false".equalsIgnoreCase(booleanStringOfSomeKind) || "no".equalsIgnoreCase(booleanStringOfSomeKind)) {
				result = false;
			}
		}
		return result;
	}

	private static <T> boolean notNull(T obj) {
		return obj != null;
	}

	public static Stream<String> streamSplit(String regexp, String value) {
		if (StringUtils.isBlank(value)) {
			return Stream.empty();
		}
		return Stream.of(value.split(regexp)).filter(StringUtils::isNotBlank).map(String::strip);
	}
}