Spaces:
Runtime error
Runtime error
Kevin Louis
commited on
Commit
·
4b032bf
1
Parent(s):
a388195
Upload DNAseq.py
Browse files
DNAseq.py
CHANGED
|
@@ -1,119 +1,51 @@
|
|
| 1 |
-
|
| 2 |
-
def __init__(self, sequence):
|
| 3 |
-
self.sequence = sequence.lower()
|
| 4 |
|
| 5 |
-
def get_total_bases(self):
|
| 6 |
-
return len(self.sequence)
|
| 7 |
-
# Total length of sequence
|
| 8 |
-
|
| 9 |
-
def get_base_count(self, base):
|
| 10 |
-
base = base.lower()
|
| 11 |
-
return self.sequence.count(base)
|
| 12 |
-
# Total number of specified base. Can be A, T, G or C
|
| 13 |
|
|
|
|
| 14 |
def get_base_counts(self):
|
| 15 |
base_counts = {
|
| 16 |
-
'a': self.
|
| 17 |
-
't': self.
|
| 18 |
-
'g': self.
|
| 19 |
-
'c': self.
|
| 20 |
}
|
| 21 |
return base_counts
|
| 22 |
# Total number of each base within the sequence returned as a dictionary
|
| 23 |
|
| 24 |
-
def get_base_percentage(self, base):
|
| 25 |
-
total_bases = self.get_total_bases()
|
| 26 |
-
base_count = self.get_base_count(base)
|
| 27 |
-
base_percentage = (base_count / total_bases) * 100
|
| 28 |
-
return base_percentage
|
| 29 |
-
# Base content by percentage.Can be A, T, G or C
|
| 30 |
-
|
| 31 |
def get_base_percentages(self):
|
| 32 |
base_percentages = {
|
| 33 |
-
'a': self.
|
| 34 |
-
't': self.
|
| 35 |
-
'g': self.
|
| 36 |
-
'c': self.
|
| 37 |
}
|
| 38 |
return base_percentages
|
| 39 |
# Base content percentage for each base returned as a dictionary
|
| 40 |
|
| 41 |
def get_gc_content(self):
|
| 42 |
-
total_bases = self.
|
| 43 |
gc_count = self.sequence.count('g') + self.sequence.count('c')
|
| 44 |
gc_content = (gc_count / total_bases) * 100
|
| 45 |
return gc_content
|
| 46 |
# Guanine Cytosine (gc) content by percentage
|
| 47 |
|
| 48 |
def get_at_content(self):
|
| 49 |
-
total_bases = self.
|
| 50 |
at_count = self.sequence.count('a') + self.sequence.count('t')
|
| 51 |
at_content = (at_count / total_bases) * 100
|
| 52 |
return at_content
|
| 53 |
# Adenine Thymine (at) content by percentage
|
| 54 |
|
| 55 |
def get_purine_content(self):
|
| 56 |
-
total_bases = self.
|
| 57 |
ag_count = self.sequence.count('a') + self.sequence.count('g')
|
| 58 |
ag_content = (ag_count / total_bases) * 100
|
| 59 |
return ag_content
|
| 60 |
# Adenine Guanine (purine) content by percentage
|
| 61 |
|
| 62 |
def get_pyrimidine_content(self):
|
| 63 |
-
total_bases = self.
|
| 64 |
ct_count = self.sequence.count('c') + self.sequence.count('t')
|
| 65 |
ct_content = (ct_count / total_bases) * 100
|
| 66 |
return ct_content
|
| 67 |
# Cytosine Thymine (pyrimidine) content by percentage
|
| 68 |
-
|
| 69 |
-
def get_base_at_position(self, position):
|
| 70 |
-
pos = position - 1
|
| 71 |
-
|
| 72 |
-
if 0 <= pos < len(self.sequence):
|
| 73 |
-
base_at_pos = self.sequence[pos]
|
| 74 |
-
return base_at_pos
|
| 75 |
-
else:
|
| 76 |
-
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
|
| 77 |
-
# Returns the base at a specified position in the sequence
|
| 78 |
-
|
| 79 |
-
def get_base_at_positions(self, position_list):
|
| 80 |
-
if self.check_positions(position_list):
|
| 81 |
-
pos_dict = {i: self.sequence[i - 1] for i in position_list if 0 <= i < len(self.sequence)}
|
| 82 |
-
return pos_dict
|
| 83 |
-
else:
|
| 84 |
-
return "Position is out of range.Positions should be 1 - {}".format(len(self.sequence))
|
| 85 |
-
# Returns base for each position in list
|
| 86 |
-
|
| 87 |
-
def check_positions(self, position_list):
|
| 88 |
-
# Check if the positions are within the range of the sequence length
|
| 89 |
-
# Value = 0 -> position out of sequence range
|
| 90 |
-
# Value = 1 -> position within sequence range
|
| 91 |
-
|
| 92 |
-
checked = {}
|
| 93 |
-
for pos in position_list:
|
| 94 |
-
if pos <= 0 or pos > len(self.sequence):
|
| 95 |
-
checked[pos] = 0
|
| 96 |
-
else:
|
| 97 |
-
checked[pos] = 1
|
| 98 |
-
|
| 99 |
-
# Check if all values are equal to 1 / All positions in the list are within the range of the sequence length
|
| 100 |
-
all_values_equal_to_1 = all(value == 1 for value in checked.values())
|
| 101 |
-
|
| 102 |
-
if all_values_equal_to_1:
|
| 103 |
-
valid = True
|
| 104 |
-
else:
|
| 105 |
-
valid = False
|
| 106 |
-
|
| 107 |
-
return valid
|
| 108 |
-
|
| 109 |
-
def get_subsequence(self, start_position, end_position):
|
| 110 |
-
# Ensure the start and end positions are within the bounds of the sequence
|
| 111 |
-
if start_position > 0 and end_position <= len(self.sequence):
|
| 112 |
-
return self.sequence[start_position - 1:end_position]
|
| 113 |
-
else:
|
| 114 |
-
return "Position is out of range. Positions should be 1 - {}".format(len(self.sequence))
|
| 115 |
-
# Returns the subsequence based on given positions
|
| 116 |
-
|
| 117 |
-
def subsequence_total_bases(self, start_position, end_position):
|
| 118 |
-
return len(self.get_subsequence(start_position, end_position))
|
| 119 |
-
|
|
|
|
| 1 |
+
from sequence import Sequence
|
|
|
|
|
|
|
| 2 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
+
class DNAseq(Sequence):
|
| 5 |
def get_base_counts(self):
|
| 6 |
base_counts = {
|
| 7 |
+
'a': self.get_unit_count('a'),
|
| 8 |
+
't': self.get_unit_count('t'),
|
| 9 |
+
'g': self.get_unit_count('g'),
|
| 10 |
+
'c': self.get_unit_count('c'),
|
| 11 |
}
|
| 12 |
return base_counts
|
| 13 |
# Total number of each base within the sequence returned as a dictionary
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
def get_base_percentages(self):
|
| 16 |
base_percentages = {
|
| 17 |
+
'a': self.get_unit_percentage('a'),
|
| 18 |
+
't': self.get_unit_percentage('t'),
|
| 19 |
+
'g': self.get_unit_percentage('g'),
|
| 20 |
+
'c': self.get_unit_percentage('c'),
|
| 21 |
}
|
| 22 |
return base_percentages
|
| 23 |
# Base content percentage for each base returned as a dictionary
|
| 24 |
|
| 25 |
def get_gc_content(self):
|
| 26 |
+
total_bases = self.get_seq_length()
|
| 27 |
gc_count = self.sequence.count('g') + self.sequence.count('c')
|
| 28 |
gc_content = (gc_count / total_bases) * 100
|
| 29 |
return gc_content
|
| 30 |
# Guanine Cytosine (gc) content by percentage
|
| 31 |
|
| 32 |
def get_at_content(self):
|
| 33 |
+
total_bases = self.get_seq_length()
|
| 34 |
at_count = self.sequence.count('a') + self.sequence.count('t')
|
| 35 |
at_content = (at_count / total_bases) * 100
|
| 36 |
return at_content
|
| 37 |
# Adenine Thymine (at) content by percentage
|
| 38 |
|
| 39 |
def get_purine_content(self):
|
| 40 |
+
total_bases = self.get_seq_length()
|
| 41 |
ag_count = self.sequence.count('a') + self.sequence.count('g')
|
| 42 |
ag_content = (ag_count / total_bases) * 100
|
| 43 |
return ag_content
|
| 44 |
# Adenine Guanine (purine) content by percentage
|
| 45 |
|
| 46 |
def get_pyrimidine_content(self):
|
| 47 |
+
total_bases = self.get_seq_length()
|
| 48 |
ct_count = self.sequence.count('c') + self.sequence.count('t')
|
| 49 |
ct_content = (ct_count / total_bases) * 100
|
| 50 |
return ct_content
|
| 51 |
# Cytosine Thymine (pyrimidine) content by percentage
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|