
import argparse
import itertools

parser = argparse.ArgumentParser(description = "Description for my parser")
parser.add_argument("-H", "--Help", help = "Show this help text", required = False, default = "")
parser.add_argument("-s", "--sequence", help = "sequence to be matched (default 5->3)", required = False, default = "")
parser.add_argument("-r", "--reverse", help = "reverses sequence for 3'->5' input", required = False, default = False)
parser.add_argument("-ms", "--mass", help = "mass of the measured oligo", required = False, default = False)
parser.add_argument("-msd", "--mass_delta", help = "mass-range to be searched", required = False, default = "1")
parser.add_argument("-fss", "--full_sub_sequences", help = "show masses of sub-sequences that fall outside of the searched mass range", required = False, default = False)
parser.add_argument("-p", "--phosphorylated", help = "5' phosphorylation modification is applied", required = False, default = False)

argument = parser.parse_args()
status = False

if argument.Help:
    print("You have used '-H' or '--Help' with argument: {0}".format(argument.Help))
    status = True
if argument.sequence:
    sequence = argument.sequence
if argument.mass:
    mass = float(argument.mass)
if argument.mass_delta:
    massDelta = float(argument.mass_delta)
if argument.reverse:
    sequence = argument.sequence[::-1]
if not status:
    print("Maybe you want to use -H or -s or -p or -o as arguments ?") 



print("BrightSeq V0.0.1 --- by Andre Zenz --- free under GPLv3")

matchMasses = []
matches = []
CONST_ADENOSINE_MASS = 267.241
CONST_GUANOSINE_MASS = 283.241
CONST_CYTIDINE_MASS = 243.217
CONST_THYMIDINE_MASS = 258.228
CONST_BTADINE_MASS = 362.199
CONST_DEOXYADENOSINE_MASS = 251.242
CONST_DEOXYGUANOSINE_MASS = 267.241
CONST_DEOXYCYTIDINE_MASS = 227.217
CONST_DEOXYTHYMIDINE_MASS = 242.228
CONST_DEOXYBTADINE_MASS = 344.967
CONST_PHOSPHATE_MASS = 61.947

nucleosides=['C','A','G','T']

def seqToOccurances(inSeq):
    occurance = ""
    for nuc in nucleosides:
        
        occurance = occurance+ (str(inSeq.count(nuc))+"x"+str(nuc)+"\t")
    return occurance
    
def calcMass(inSeq):
    seqMass = inSeq.count('B')*(CONST_DEOXYBTADINE_MASS+CONST_PHOSPHATE_MASS)+inSeq.count('G')*(CONST_DEOXYGUANOSINE_MASS+CONST_PHOSPHATE_MASS)+inSeq.count('A')*(CONST_DEOXYADENOSINE_MASS+CONST_PHOSPHATE_MASS)+inSeq.count('C')*(CONST_DEOXYCYTIDINE_MASS+CONST_PHOSPHATE_MASS)+inSeq.count('T')*(CONST_DEOXYTHYMIDINE_MASS+CONST_PHOSPHATE_MASS)-CONST_PHOSPHATE_MASS
    if(argument.phosphorylated):
        seqMass += 77.974
    if(len(inSeq)==0):
        seqMass = 0;
    return seqMass


matchMass = []
maxMass = 0
seqLength = 0;
delimiter = "";
matchFound = False;
while maxMass < (int(mass)+1000):
    seqLength +=1
    for seq in itertools.combinations_with_replacement(nucleosides, seqLength):
#       print(delimiter.join(seq))
        cmass = calcMass(seq)
        if (cmass>maxMass):
            maxMass = cmass
        if(cmass > (mass-massDelta) and cmass < (mass+massDelta)):
            matchMasses.append(round(cmass, 3))
            matches.append(seq)
            matchFound = True;
if(not matchFound):
    print("no match found")
else:
    print("*****   A sequence with the following constituent nucleotides would match your search mass of "+str(mass)+" within the given mass error range of "+str(massDelta)+" :   *****")
    occuranceStrings = []
    for s in matches:
        occuranceStrings.append(seqToOccurances(s))
    matchPairs = sorted(zip(matchMasses, occuranceStrings))
    for pair in matchPairs:
        for line in pair:
            print(line)
        print("")

if sequence:
    print("*****   You have put in a search sequence of 3'-"+sequence+"-5'   *****")
    print("*****   This sequence has a calculated mass of "+str(round(calcMass(sequence), 3)), end=" ")
    if (calcMass(sequence)> mass-massDelta and calcMass(sequence)< mass+massDelta):
        print("and therefore does matches your input search mass of "+str(mass)+" within the given error of "+str(massDelta)+"   *****")
    else:
        print("and therefore does NOT matches your input search mass of "+str(mass)+" within the given error of "+str(massDelta)+"   *****")
        print("*****   Fragment sequences that are shortened on either the 3' or 5' end were searched for possible mass-matches:   *****")
        trimmedSequences = []
        trimmedOperations = []
        trimmedMatches = []
        trimmedMatchMasses = []
        trimmedMatchOperations = []
        for cursor in range(len(sequence)):
            trimmedSequences.append(sequence[cursor:(len(sequence))])
            trimmedOperations.append("3' trimmed N-"+str(cursor))
            trimmedSequences.append(sequence[0:cursor])
            trimmedOperations.append("5' trimmed N-"+str(len(sequence)-cursor))


        trimmedSequenceMasses = []
        for seq in trimmedSequences:
            cmass = calcMass(seq)
            trimmedSequenceMasses.append(round(cmass,3))
            if(cmass > (mass-massDelta) and cmass < (mass+massDelta)):
                trimmedMatches.append(seq)
                trimmedMatchMasses.append(round(cmass,3))
                trimmedMatchOperations.append(trimmedOperations[trimmedSequences.index(seq)])

        if(argument.full_sub_sequences):
            print("   -> Option --full-sub-sequences! Masses of all checked trimmed sequences is listed:")
            sequencesWithMasses = sorted(zip(trimmedSequenceMasses, trimmedSequences, trimmedOperations))
            for pair in sequencesWithMasses:
                print(str(pair))
        else:
            sequencesWithMasses = sorted(zip(trimmedMatchMasses, trimmedMatches, trimmedMatchOperations))
            for pair in sequencesWithMasses:
                print(str(pair))
            if(len(trimmedMatches)==0):
                print("   -> No trimmed sequences matched the search mass")

    