@@ -82,6 +82,9 @@ rss_descr = "Solving Rosalind problem CONS — finding a consensus string from a
8282> **Sample Output**
8383> ```
8484> ATGCAACT
85+ > ```
86+ >
87+ > ```
8588> A: 5 1 0 0 5 5 0 0
8689> C: 0 0 1 4 2 0 6 1
8790> G: 1 1 6 3 0 1 0 0
@@ -150,8 +153,6 @@ we can generate the consensus string.
150153
151154
152155``` julia
153- using DataFrames
154-
155156function consensus (fasta_string)
156157
157158 # extract strings from fasta
@@ -163,26 +164,14 @@ function consensus(fasta_string)
163164 # convert data_vector to matrix where each column is a character position and each row is a string
164165 data_matrix = reduce (vcat, permutedims .(collect .(data_vector)))
165166
166- # make profile matrix
167- consensus_matrix_list = Vector{Int64}[]
168- for nuc in [' A' , ' C' , ' G' , ' T' ]
169- nuc_count = vec (sum (x-> x== nuc, data_matrix, dims= 1 ))
170- push! (consensus_matrix_list, nuc_count)
171- end
172-
173- consensus_matrix = vcat (consensus_matrix_list)
174-
175- # convert matrix to DF and add row names for nucleotides
176- consensus_df = DataFrame (consensus_matrix, [" A" , " C" , " G" , " T" ])
177-
178-
179- # make column with nucleotide with the max value
180- # argmax returns the index or key of the first one encountered
181- nuc_max_df = transform (consensus_df, AsTable (:) => ByRow (argmax) => :MaxColName )
182-
183- # return consensus string
184- return join (nuc_max_df. MaxColName)
167+ # make profile matrix: (num_strings × n) of Chars
168+ # profile is 4×n (each row corresponds to A,C,G,T)
169+ nucs = [' A' , ' C' , ' G' , ' T' ]
170+ profile = reduce (vcat, (sum (data_matrix .== nuc, dims= 1 ) for nuc in nucs))
185171
172+ # compute the consensus string
173+ consensus_string = join ([nucs[argmax (@view profile[:, j])] for j in 1 : size (profile, 2 )])
174+ return (consensus_string)
186175end
187176
188177consensus (fake_file)
0 commit comments