156156 HEADER_INDICATORS,
157157)
158158
159- # `Headers` gets called _alot_ (on every line), so we try very hard to be
160- # efficient.
161- function Headers (s)
159+ function parse_single_header (s:: AbstractString )
162160 N = length (s)
163161 x = first (s)
164162 if N == 3
@@ -180,7 +178,7 @@ function Headers(s)
180178 return HEADER_ENDATA
181179 end
182180 elseif N == 7
183- if (x == ' C' || x == ' c' ) && ( uppercase (s) == " COLUMNS" )
181+ if (x == ' C' || x == ' c' ) && uppercase (s) == " COLUMNS"
184182 return HEADER_COLUMNS
185183 elseif (x == ' Q' || x == ' q' )
186184 header = uppercase (s)
@@ -190,34 +188,127 @@ function Headers(s)
190188 return HEADER_QMATRIX
191189 end
192190 end
193- elseif N > = 8
194- if (x == ' O' || x == ' o' ) && startswith ( uppercase (s), " OBJSENSE" )
191+ elseif N = = 8
192+ if (x == ' O' || x == ' o' ) && uppercase (s) == " OBJSENSE"
195193 return HEADER_OBJSENSE
196- elseif (x == ' Q' || x == ' q' )
197- header = uppercase (s)
198- if startswith (header, " QCMATRIX" )
199- return HEADER_QCMATRIX
200- elseif startswith (header, " QSECTION" )
201- return HEADER_QSECTION
194+ end
195+ elseif N == 10
196+ if (x == ' I' || x == ' i' ) && uppercase (s) == " INDICATORS"
197+ return HEADER_INDICATORS
198+ end
199+ end
200+ return HEADER_UNKNOWN
201+ end
202+
203+ function parse_double_header (s:: AbstractString )
204+ N = length (s)
205+ x = first (s)
206+ if N != 8
207+ return HEADER_UNKNOWN
208+ elseif (x == ' O' || x == ' o' ) && uppercase (s) == " OBJSENSE"
209+ return HEADER_OBJSENSE
210+ elseif (x == ' Q' || x == ' q' )
211+ header = uppercase (s)
212+ if startswith (header, " QCMATRIX" )
213+ return HEADER_QCMATRIX
214+ elseif startswith (header, " QSECTION" )
215+ return HEADER_QSECTION
216+ end
217+ end
218+ return HEADER_UNKNOWN
219+ end
220+
221+ """
222+ LineToItems(line::String)
223+
224+ Split on any whitespace characters. We can't split only on `' '` because at
225+ least one models in MIPLIB has `\t ` as a separator.
226+
227+ This decision assumes that we are parsing a free MPS file, where whitespace is
228+ disallowed in names. If this ever becomes a problem, we could change to the
229+ fixed MPS format, where the files are split at the usual offsets.
230+
231+ This function is a more performant version of:
232+ ```julia
233+ LineToItems(line::String) = split(line, r"\\ s"; keepempty = false)
234+ ```
235+ """
236+ struct LineToItems
237+ line:: String
238+ nfields:: Int
239+ fields:: NTuple{5,UnitRange{Int}}
240+
241+ function LineToItems (line:: String )
242+ nfields, f1, f2, f3, f4, f5 = 0 , 0 : 0 , 0 : 0 , 0 : 0 , 0 : 0 , 0 : 0
243+ start, in_field = - 1 , false
244+ n = ncodeunits (line)
245+ for i in 1 : n
246+ if isspace (line[i])
247+ if in_field
248+ nfields += 1
249+ if nfields == 1
250+ f1 = start: (i- 1 )
251+ elseif nfields == 2
252+ f2 = start: (i- 1 )
253+ elseif nfields == 3
254+ f3 = start: (i- 1 )
255+ elseif nfields == 4
256+ f4 = start: (i- 1 )
257+ elseif nfields == 5
258+ f5 = start: (i- 1 )
259+ end
260+ in_field = false
261+ end
262+ elseif ! in_field
263+ start = i
264+ in_field = true
202265 end
203- elseif N == 10
204- if (x == ' I' || x == ' i' ) && uppercase (s) == " INDICATORS"
205- return HEADER_INDICATORS
266+ end
267+ if in_field
268+ nfields += 1
269+ if nfields == 1
270+ f1 = start: n
271+ elseif nfields == 2
272+ f2 = start: n
273+ elseif nfields == 3
274+ f3 = start: n
275+ elseif nfields == 4
276+ f4 = start: n
277+ elseif nfields == 5
278+ f5 = start: n
206279 end
207280 end
281+ return new (line, nfields, (f1, f2, f3, f4, f5))
208282 end
209- return HEADER_UNKNOWN
210283end
211284
212- function line_to_items (line)
213- # Split on any whitespace characters. We can't split only on `' '` because
214- # at least one models in MIPLIB has `\t` as a separator.
215- #
216- # This decision assumes that we are parsing a free MPS file, where
217- # whitespace is disallowed in names. If this ever becomes a problem, we
218- # could change to the fixed MPS format, where the files are split at the
219- # usual offsets.
220- return split (line, r" \s " ; keepempty = false )
285+ Base. length (x:: LineToItems ) = x. nfields
286+
287+ function Base. getindex (x:: LineToItems , i:: Int )
288+ if ! (1 <= i <= min (5 , x. nfields))
289+ throw (BoundsError (x, i))
290+ end
291+ return SubString (x. line, x. fields[i])
292+ end
293+
294+ Base. iterate (x:: LineToItems ) = iterate (x, 1 )
295+
296+ function Base. iterate (x:: LineToItems , i)
297+ if i > x. nfields
298+ return nothing
299+ end
300+ return x[i], i + 1
301+ end
302+
303+ # `parse_header` gets called _alot_ (on every line), so we try very hard to be
304+ # efficient.
305+ function parse_header (s:: LineToItems )
306+ if length (s) == 1
307+ return parse_single_header (s[1 ])
308+ elseif length (s) == 2
309+ return parse_double_header (s[1 ])
310+ end
311+ return HEADER_UNKNOWN
221312end
222313
223314"""
@@ -237,13 +328,12 @@ function Base.read!(io::IO, model::Model{T}) where {T}
237328 if startswith (data. contents, ' *' )
238329 continue # Lines starting with `*` are comments
239330 end
240- line = string ( strip ( data. contents) )
241- if isempty (line)
331+ items = LineToItems ( data. contents)
332+ if length (items) == 0
242333 continue # Skip blank lines
243334 end
244- h = Headers (line )
335+ h = parse_header (items )
245336 if h == HEADER_OBJSENSE
246- items = line_to_items (line)
247337 if length (items) == 2
248338 sense = uppercase (items[2 ])
249339 if ! (sense in (" MIN" , " MAX" ))
@@ -258,7 +348,6 @@ function Base.read!(io::IO, model::Model{T}) where {T}
258348 end
259349 continue
260350 elseif h == HEADER_QCMATRIX || h == HEADER_QSECTION
261- items = line_to_items (line)
262351 if length (items) != 2
263352 _throw_parse_error (
264353 data,
@@ -274,10 +363,8 @@ function Base.read!(io::IO, model::Model{T}) where {T}
274363 continue
275364 end
276365 # Otherwise, carry on with the previous header
277- # TODO : split into hard fields based on column indices.
278- items = line_to_items (line)
279366 if header == HEADER_NAME
280- parse_name_line (data, line )
367+ parse_name_line (data)
281368 elseif header == HEADER_OBJSENSE
282369 sense = uppercase (only (items))
283370 if ! (sense in (" MIN" , " MAX" ))
490577# NAME
491578# ==============================================================================
492579
493- function parse_name_line (data:: TempMPSModel , line )
494- m = match (r" ^\s *NAME(.*)" i , line )
580+ function parse_name_line (data:: TempMPSModel )
581+ m = match (r" ^\s *NAME(.*)" i , data . contents )
495582 if m === nothing
496583 _throw_parse_error (
497584 data,
506593# ROWS
507594# ==============================================================================
508595
509- function parse_rows_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
596+ function parse_rows_line (data:: TempMPSModel{T} , items) where {T}
510597 if length (items) < 2
511598 _throw_parse_error (
512599 data,
@@ -619,7 +706,7 @@ function _set_intorg(data::TempMPSModel{T}, column, column_name) where {T}
619706 return
620707end
621708
622- function parse_columns_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
709+ function parse_columns_line (data:: TempMPSModel{T} , items) where {T}
623710 if length (items) == 3
624711 # [column name] [row name] [value]
625712 column_name, row_name, value = items
657744# RHS
658745# ==============================================================================
659746
660- function parse_single_rhs (data, row_name, value, items:: Vector )
747+ function parse_single_rhs (data, row_name, value, items)
661748 if row_name == data. obj_name
662749 data. obj_constant = value
663750 return
@@ -688,7 +775,7 @@ function parse_single_rhs(data, row_name, value, items::Vector)
688775end
689776
690777# TODO : handle multiple RHS vectors.
691- function parse_rhs_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
778+ function parse_rhs_line (data:: TempMPSModel{T} , items) where {T}
692779 if length (items) == 3
693780 # [rhs name] [row name] [value]
694781 rhs_name, row_name, value = items
@@ -744,7 +831,7 @@ function parse_single_range(data, row_name, value)
744831end
745832
746833# TODO : handle multiple RANGES vectors.
747- function parse_ranges_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
834+ function parse_ranges_line (data:: TempMPSModel{T} , items) where {T}
748835 if length (items) == 3
749836 # [rhs name] [row name] [value]
750837 _, row_name, value = items
@@ -859,7 +946,7 @@ function _parse_single_bound(
859946 end
860947end
861948
862- function parse_bounds_line (data:: TempMPSModel{T} , items:: Vector ) where {T}
949+ function parse_bounds_line (data:: TempMPSModel{T} , items) where {T}
863950 if length (items) == 3
864951 bound_type, _, column_name = items
865952 _parse_single_bound (data, column_name, bound_type)
0 commit comments