Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
O
Optimized Nussinov Algorithm
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Package Registry
Container Registry
Model registry
Operate
Environments
Terraform modules
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
mika04
Optimized Nussinov Algorithm
Commits
9f9b734d
Commit
9f9b734d
authored
3 months ago
by
Mika Cankosyan
Browse files
Options
Downloads
Patches
Plain Diff
nussinov implementation + bit of optimization (not finished)
parents
No related branches found
Branches containing commit
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
README.txt
+0
-0
0 additions, 0 deletions
README.txt
nussinov.py
+211
-0
211 additions, 0 deletions
nussinov.py
with
211 additions
and
0 deletions
README.txt
0 → 100644
+
0
−
0
View file @
9f9b734d
This diff is collapsed.
Click to expand it.
nussinov.py
0 → 100644
+
211
−
0
View file @
9f9b734d
'''
notes
'''
# constants
AU_SCORE
=
1
GC_SCORE
=
1.5
GU_SCORE
=
0.5
def
is_valid_rna_sequence
(
rna_sequence
):
valid_chars
=
{
'
A
'
,
'
U
'
,
'
G
'
,
'
C
'
}
return
set
(
rna_sequence
).
issubset
(
valid_chars
)
def
is_match
(
nucleotide1
,
nucleotide2
):
pairings
=
{
'
A
'
:
'
U
'
,
'
U
'
:
'
A
'
,
'
G
'
:
'
C
'
,
'
C
'
:
'
G
'
}
return
nucleotide2
==
pairings
[
nucleotide1
]
# returns -1 if not match, positive score if match
def
match_score
(
nucleotide1
,
nucleotide2
):
scores
=
{
'
A
'
:
{
'
A
'
:
-
1
,
'
U
'
:
AU_SCORE
,
'
G
'
:
-
1
,
'
C
'
:
-
1
},
'
U
'
:
{
'
A
'
:
AU_SCORE
,
'
U
'
:
-
1
,
'
G
'
:
GU_SCORE
,
'
C
'
:
-
1
},
'
G
'
:
{
'
A
'
:
-
1
,
'
U
'
:
GU_SCORE
,
'
G
'
:
-
1
,
'
C
'
:
GC_SCORE
},
'
C
'
:
{
'
A
'
:
-
1
,
'
U
'
:
-
1
,
'
G
'
:
GC_SCORE
,
'
C
'
:
-
1
}
}
return
scores
[
nucleotide1
][
nucleotide2
]
def
print_2d_array
(
array
):
for
row
in
array
:
print
(
'
'
.
join
(
f
'
{
num
:
3
}
'
for
num
in
row
))
# returns a list consisting of '(', ')', and '-' characters
def
bt_to_coded_list
(
bt
,
i
,
j
):
length
=
j
-
i
+
1
og_i
=
i
coded_list
=
list
(
'
-
'
*
length
)
while
(
bt
[
i
][
j
]
!=
-
1
):
if
(
bt
[
i
][
j
]
==
"
left
"
):
j
-=
1
elif
(
bt
[
i
][
j
]
==
"
down
"
):
i
+=
1
elif
(
bt
[
i
][
j
]
==
"
match
"
):
coded_list
[
i
-
og_i
]
=
'
(
'
coded_list
[
j
-
og_i
]
=
'
)
'
i
+=
1
j
-=
1
else
:
# bifurcation
return
coded_list
[:
i
]
+
bt_to_coded_list
(
bt
,
i
,
bt
[
i
][
j
])
\
+
bt_to_coded_list
(
bt
,
bt
[
i
][
j
]
+
1
,
j
)
+
coded_list
[
j
+
1
:]
return
coded_list
# returns dictionary of the base pairs
def
bt_to_base_pairs
(
bt
,
i
,
j
,
base_pairs
):
length
=
j
-
i
+
1
while
(
bt
[
i
][
j
]
!=
-
1
):
if
(
bt
[
i
][
j
]
==
"
left
"
):
j
-=
1
elif
(
bt
[
i
][
j
]
==
"
down
"
):
i
+=
1
elif
(
bt
[
i
][
j
]
==
"
match
"
):
base_pairs
[
i
]
=
j
i
+=
1
j
-=
1
else
:
# bifurcation
bt_to_base_pairs
(
bt
,
i
,
bt
[
i
][
j
],
base_pairs
)
bt_to_base_pairs
(
bt
,
bt
[
i
][
j
]
+
1
,
j
,
base_pairs
)
return
base_pairs
return
base_pairs
# takes in rna primary sequence
# returns dynamic programming table and backtrace table
# error if length < 1
# if there's a "tie" between bifurcations, chooses the smallest k giving the tied max
# if there's a "tie" between two or more of left, down, match, and bifurcation, chooses based on precedence in that order
def
original
(
rna_sequence
):
length
=
len
(
rna_sequence
)
dp_table
=
[[
0
for
i
in
range
(
length
)]
for
j
in
range
(
length
)]
bt
=
[[
0
for
i
in
range
(
length
)]
for
j
in
range
(
length
)]
# i is row, j is col
i
=
length
-
1
j
=
0
done
=
False
while
(
not
done
):
while
(
i
!=
length
and
j
!=
length
):
if
(
i
>=
j
):
dp_table
[
i
][
j
]
=
0
bt
[
i
][
j
]
=
-
1
else
:
left
=
dp_table
[
i
][
j
-
1
]
down
=
dp_table
[
i
+
1
][
j
]
match
=
None
if
(
is_match
(
rna_sequence
[
i
],
rna_sequence
[
j
])):
match
=
dp_table
[
i
+
1
][
j
-
1
]
+
1
else
:
match
=
-
1
bifurcation
=
-
1
k
=
None
for
K
in
range
(
1
+
1
,
j
):
current
=
dp_table
[
i
][
K
]
+
dp_table
[
K
+
1
][
j
]
if
(
current
>
bifurcation
):
bifurcation
=
current
k
=
K
max
=
None
if
(
left
>=
down
and
left
>=
match
and
left
>=
bifurcation
):
max
=
left
bt
[
i
][
j
]
=
"
left
"
elif
(
down
>=
left
and
down
>=
match
and
down
>=
bifurcation
):
max
=
down
bt
[
i
][
j
]
=
"
down
"
elif
(
match
>=
left
and
match
>=
down
and
match
>=
bifurcation
):
max
=
match
bt
[
i
][
j
]
=
"
match
"
else
:
max
=
bifurcation
bt
[
i
][
j
]
=
k
dp_table
[
i
][
j
]
=
max
i
+=
1
j
+=
1
if
(
i
==
length
):
if
(
j
==
length
):
i
=
0
j
=
1
else
:
i
-=
j
+
1
j
=
0
elif
(
i
!=
0
):
j
=
length
-
i
+
1
i
=
0
else
:
done
=
True
return
dp_table
,
bt
# takes in rna primary sequence
# returns dynamic programming table and backtrace table
def
optimized
(
rna_sequence
):
length
=
len
(
rna_sequence
)
dp_table
=
[[
0
for
i
in
range
(
length
)]
for
j
in
range
(
length
)]
bt
=
[[
0
for
i
in
range
(
length
)]
for
j
in
range
(
length
)]
# i is row, j is col
i
=
length
-
1
j
=
0
done
=
False
while
(
not
done
):
while
(
i
!=
length
and
j
!=
length
):
if
(
i
>=
j
):
dp_table
[
i
][
j
]
=
0
bt
[
i
][
j
]
=
-
1
else
:
left
=
dp_table
[
i
][
j
-
1
]
down
=
dp_table
[
i
+
1
][
j
]
match
=
None
match_score1
=
match_score
(
rna_sequence
[
i
],
rna_sequence
[
j
])
if
(
match_score1
!=
-
1
):
match
=
dp_table
[
i
+
1
][
j
-
1
]
+
match_score1
else
:
match
=
-
1
bifurcation
=
-
1
k
=
None
for
K
in
range
(
1
+
1
,
j
):
current
=
dp_table
[
i
][
K
]
+
dp_table
[
K
+
1
][
j
]
if
(
current
>
bifurcation
):
bifurcation
=
current
k
=
K
max
=
None
if
(
left
>=
down
and
left
>=
match
and
left
>=
bifurcation
):
max
=
left
bt
[
i
][
j
]
=
"
left
"
elif
(
down
>=
left
and
down
>=
match
and
down
>=
bifurcation
):
max
=
down
bt
[
i
][
j
]
=
"
down
"
elif
(
match
>=
left
and
match
>=
down
and
match
>=
bifurcation
):
max
=
match
bt
[
i
][
j
]
=
"
match
"
else
:
max
=
bifurcation
bt
[
i
][
j
]
=
k
dp_table
[
i
][
j
]
=
max
i
+=
1
j
+=
1
if
(
i
==
length
):
if
(
j
==
length
):
i
=
0
j
=
1
else
:
i
-=
j
+
1
j
=
0
elif
(
i
!=
0
):
j
=
length
-
i
+
1
i
=
0
else
:
done
=
True
return
dp_table
,
bt
rna_sequence
=
input
(
"
enter the rna sequence:
"
)
# will only work if it's a string consisting of AUCG
if
(
not
is_valid_rna_sequence
(
rna_sequence
)):
print
(
"
didn
'
t enter valid rna sequence (string of uppercase A U G C with no spaces), please run again
"
)
exit
()
choice
=
input
(
"
original or optimized:
"
)
# will only work if "original" or "optimized"
if
(
choice
==
"
original
"
):
dp_table
,
bt
=
original
(
rna_sequence
)
elif
(
choice
==
"
optimized
"
):
dp_table
,
bt
=
optimized
(
rna_sequence
)
else
:
print
(
"
didn
'
t enter valid choice, please run again
"
)
exit
()
print_2d_array
(
dp_table
)
coded_list
=
bt_to_coded_list
(
bt
,
0
,
len
(
rna_sequence
)
-
1
)
base_pairs
=
bt_to_base_pairs
(
bt
,
0
,
len
(
rna_sequence
)
-
1
,
{})
print
(
''
.
join
(
coded_list
))
print
(
base_pairs
)
\ No newline at end of file
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment