# -*- coding: utf-8 -*-
"""
#######################
Name: cryptFind.py
Author: Snorre Sulheim
Affiliation: Deptartment if biotechnology and food science, Norwegian University of Technology and Science
Date: 21.09.2017
#######################

This is an implementation of the cryptFind algorithm developed by Ali Navid and Eivind Almaas 
(Genome-scale reconstruction of the metabolic network in Yersinia pestis, strain 91001., doi: 10.1039/b818710).

# Description
The CryptFind is used to find cryptic genes, i.e. genes which are not expressed when the bacteria is in a normal state.
This is used to find out which genes / reactions to turn of to get the correct phenotypes observed in vitro.

1. Find a growth medium where the bacteria grows in silico, but not in vitro.
2. Do single gene knockout and make a list of lethal knockouts. 
3. Do the same for many different minimal growth media where the bacteria grows
   both in vitro and in silico. You only need to consider the list of genes found in 2.
4. The lethal knockouts unique to the first selceted minimal growth media are candidates for cryptic genes

# Input
1. A metabolic model, (.xml file) in sbml / cobra format.
2. A list of minimal growth media where the bacteria grows in vitro
3. The growth media for which to find the cryptic genes
4. State if you want to knock out genes (default, use "gene") or reactions ("reaction")
5. Source type, either carbon or nitrogen source
6. Name of solver you want to use, if nothing stated, gurobi will be used
7. Default carbon source, default "EX_glc_e"
8. Default nitrogen source, default "EX_nh4_e" 
- 

# Output
- A list of cryptig genes candidates or reactions

# Requirements
- Python 3
- cobrapy
- A solver, e.g. gurobi
- pandas
- csv

# How to use
The best way is to import the function into a python script, i.e.
from cryptFind import crypt_find

However, the function can also be run from the command_line using
> python cryptFind.py path_to_model_xml_file path_to_csv_file_with_id_of_reactions name_of_reaction_you_to_find_cryptic_genes_for

# Commandline Example
path_to_model_xml_file: C:\\folder\model.xml
path_to_csv_file_with_id_of_reactions: C:\\folder\reactions.csv
name_of_reaction_you_to_find_cryptic_genes_for: EX_gln__L_e

> python cryptFind.py C:\\folder\model.xml  C:\\folder\reactions.csv EX_gln__L_e

# Python script example
model = cobra.io.read_sbml_model("P:/git/gem_sco/modelS.xml")

growth_medium_list = [
    "EX_ac_e", "EX_celb_e","EX_cit_e", "EX_abt__D_e", "EX_fru_e", "EX_gal_e", "EX_glcn_e",
    "EX_glc_e", "EX_mnl_e", "EX_man_e", "EX_rib__D_e", "EX_tartr__D_e",
    "EX_xyl__D_e", "EX_glyc_e",  "EX_gly_e",   "EX_lcts_e",  "EX_ala__L_e", "EX_arab__L_e",
    "EX_asn__L_e", "EX_glu__L_e", "EX_gln__L_e",
    "EX_his__L_e", "EX_ile__L_e", "EX_lac__L_e", "EX_leu__L_e", "EX_lys__L_e", "EX_mal__L_e", "EX_pro__L_e", "EX_rmn_e",   "EX_ser__L_e",
    "EX_thr__L_e", "EX_trp__L_e", "EX_val__L_e", "EX_malt_e",  "EX_melib_e", "EX_ppa_e",   "EX_pyr_e",   "EX_salcn_e", "EX_succ_e",  "EX_tre_e",  
    "EX_xylt_e"]



nitrogen_growth_medium_list = ["EX_asn__L_e",
    "EX_asp__L_e","EX_glu__L_e","EX_gln__L_e","EX_ile__L_e","EX_leu__L_e",
    "EX_lys__L_e","EX_met__L_e","EX_phe__L_e","EX_pro__L_e","EX_val__L_e"]

cryptic_genes = crypt_find(model, growth_medium_list, "EX_gln__L_e", "gene", source_type = "carbon")

"""

import cobra
import sys
import csv

SOLVER = "gurobi"



def crypt_find(model, growth_medium_list, selected_growth_medium, 
                element_type = "gene", source_type = "carbon", solver = None,
                default_carbon_source = "EX_glc_e", default_nitrogen_source = "EX_nh4_e"):
    if not solver:
        solver = SOLVER

    if isinstance(model, str):
        try:
            model = cobra.io.read_sbml_model(model)
        except IOError:
            raise("Could not read {0}".format(model))
            return False

    if selected_growth_medium in growth_medium_list:
        growth_medium_list.pop(growth_medium_list.index(selected_growth_medium))


    # Set glucose uptake to 0
    if model.optimize().f and (model.optimize().f > 1e-9):
        if source_type == "carbon":
            model.reactions.get_by_id(default_carbon_source).lower_bound = 0
        else:
            model.reactions.get_by_id(default_nitrogen_source).lower_bound = 0


    if model.optimize().f and (model.optimize().f > 1e-9):
        print("""The model is growing after removing the default carbon / nitrogen source, 
              please set any uptake of carbon / nitrogen to zero, or change the
              input parameter default_carbon_source / default_nitrogen_source""")
        return False

    # Get initial list of lethal genes
    model.reactions.get_by_id(selected_growth_medium).lower_bound  = -10
    if element_type == "gene":
        full_element_dict = cobra.flux_analysis.single_gene_deletion(model, solver = solver)
    else:
        full_element_dict = cobra.flux_analysis.single_reaction_deletion(model, solver = solver)

    essential_elements = extract_lethal_elements(full_element_dict)
    model.reactions.get_by_id(selected_growth_medium).lower_bound  = 0

    for r_id in growth_medium_list:
        model.reactions.get_by_id(r_id).lower_bound = -10
        o = model.optimize()
        if not (o.f or 0) > 1e-9:
            print("Model dead on {0}. Skip this source".format(r_id))
            continue
        if element_type == "gene":
            element_dict = cobra.flux_analysis.single_gene_deletion(model, essential_elements, solver = solver)
        else:
            element_dict = cobra.flux_analysis.single_reaction_deletion(model, essential_elements, solver = solver)

        model.reactions.get_by_id(r_id).lower_bound = 0
        new_essential_elements = extract_lethal_elements(element_dict)
        essential_elements = [x for x in essential_elements if not x in new_essential_elements]
        print("New essential {1}s in {0}: ".format(r_id, element_type), len(new_essential_elements))
        if len(essential_elements) < 10:
            print("Essential {0}s: ".format(element_type), len(essential_elements), essential_elements)
        else:
            print("Essential {0}s: ".format(element_type), len(essential_elements))
            
    print("Your identfied cryptic gene / reactions")
    print(essential_elements)
    return essential_elements
        


def extract_lethal_elements(gene_df):
    lethal_df = gene_df[abs(gene_df["flux"]) <1e-5]
    return lethal_df.index.values


def read_reactions_csv(fn):
    reaction_ids = []
    with open(fn, "r") as f:
        reader = csv.reader(f)
        for row in reader:
            for x in row:
                if len(x.strip()):
                    reaction_ids.append(x.strip())
    return reaction_ids


if __name__ == '__main__':
    if len(sys.argv) < 4:
        print("""You have to add at least 3 input parameters: \n
                1) Path to model file in SBML format (.xml) \n
                2) A list of exchange reactions to use as uptake for carbon / nitrogen sources (where organism is growing) \n
                3) The ID of the exchange reaction (i.e. growth condition) for which you want to find a cryptic gene""")

    else:
        model_fn = sys.argv[1]
        reactions_fn = sys.argv[2]

        model = cobra.io.read_sbml_model(model_fn)
        reaction_list = read_reactions_csv(reactions_fn)
        result = crypt_find(model, reaction_list, *sys.argv[3:])