|
| 1 | +use std::collections::HashSet; |
| 2 | + |
| 3 | +/// Extract unique wikilink targets from text. |
| 4 | +/// Handles [[Target]], [[Target|Display]], [[Target#Heading]]. |
| 5 | +/// Skips embeds (![[...]]). |
| 6 | +pub fn extract_wikilink_targets(text: &str) -> Vec<String> { |
| 7 | + let bytes = text.as_bytes(); |
| 8 | + let mut targets = Vec::new(); |
| 9 | + let mut seen = HashSet::new(); |
| 10 | + let mut i = 0; |
| 11 | + |
| 12 | + while i + 1 < bytes.len() { |
| 13 | + if bytes[i] == b'[' && bytes[i + 1] == b'[' { |
| 14 | + // Check for embed prefix (! before [[) |
| 15 | + let is_embed = i > 0 && bytes[i - 1] == b'!'; |
| 16 | + if let Some(rest) = text.get(i + 2..) |
| 17 | + && let Some(close) = rest.find("]]") |
| 18 | + { |
| 19 | + let inner = &rest[..close]; |
| 20 | + if !is_embed && !inner.is_empty() && !inner.contains('\n') { |
| 21 | + // Strip heading: [[Note#Section]] → "Note" |
| 22 | + let target = inner.split('#').next().unwrap_or(inner); |
| 23 | + // Strip display: [[Note|Display]] → "Note" |
| 24 | + let target = target.split('|').next().unwrap_or(target); |
| 25 | + let target = target.trim().to_string(); |
| 26 | + if !target.is_empty() && seen.insert(target.clone()) { |
| 27 | + targets.push(target); |
| 28 | + } |
| 29 | + } |
| 30 | + i += 2 + close + 2; |
| 31 | + continue; |
| 32 | + } |
| 33 | + } |
| 34 | + i += 1; |
| 35 | + } |
| 36 | + targets |
| 37 | +} |
| 38 | + |
| 39 | +/// Extract query terms for relevance filtering. |
| 40 | +/// Splits on whitespace, lowercases, drops terms shorter than 3 chars. |
| 41 | +pub fn extract_query_terms(query: &str) -> Vec<String> { |
| 42 | + query |
| 43 | + .split_whitespace() |
| 44 | + .map(|t| t.to_lowercase()) |
| 45 | + .filter(|t| t.len() >= 3) |
| 46 | + .collect() |
| 47 | +} |
| 48 | + |
| 49 | +#[cfg(test)] |
| 50 | +mod tests { |
| 51 | + use super::*; |
| 52 | + |
| 53 | + #[test] |
| 54 | + fn test_extract_wikilink_targets() { |
| 55 | + let text = |
| 56 | + "See [[Note One]] and [[Note Two|display]] for details. Also [[Note One]] again."; |
| 57 | + let targets = extract_wikilink_targets(text); |
| 58 | + assert!(targets.contains(&"Note One".to_string())); |
| 59 | + assert!(targets.contains(&"Note Two".to_string())); |
| 60 | + assert_eq!(targets.len(), 2); // deduplicated |
| 61 | + } |
| 62 | + |
| 63 | + #[test] |
| 64 | + fn test_extract_wikilinks_with_headings() { |
| 65 | + let text = "Link to [[Note#Section]] here."; |
| 66 | + let targets = extract_wikilink_targets(text); |
| 67 | + assert_eq!(targets, vec!["Note"]); |
| 68 | + } |
| 69 | + |
| 70 | + #[test] |
| 71 | + fn test_extract_wikilinks_empty() { |
| 72 | + assert!(extract_wikilink_targets("no links here").is_empty()); |
| 73 | + assert!(extract_wikilink_targets("").is_empty()); |
| 74 | + } |
| 75 | + |
| 76 | + #[test] |
| 77 | + fn test_extract_wikilinks_skip_embeds() { |
| 78 | + let text = "![[embedded image.png]] and [[real link]]"; |
| 79 | + let targets = extract_wikilink_targets(text); |
| 80 | + assert_eq!(targets, vec!["real link"]); |
| 81 | + } |
| 82 | + |
| 83 | + #[test] |
| 84 | + fn test_extract_wikilinks_heading_and_display() { |
| 85 | + let text = "[[Note#Section|Custom Display]]"; |
| 86 | + let targets = extract_wikilink_targets(text); |
| 87 | + assert_eq!(targets, vec!["Note"]); // strip both heading and display |
| 88 | + } |
| 89 | + |
| 90 | + #[test] |
| 91 | + fn test_extract_query_terms() { |
| 92 | + let terms = extract_query_terms("BRE-2579 delivery date"); |
| 93 | + assert_eq!(terms, vec!["bre-2579", "delivery", "date"]); |
| 94 | + } |
| 95 | + |
| 96 | + #[test] |
| 97 | + fn test_extract_query_terms_short_words_dropped() { |
| 98 | + let terms = extract_query_terms("a is the big query"); |
| 99 | + assert_eq!(terms, vec!["the", "big", "query"]); |
| 100 | + } |
| 101 | +} |
0 commit comments