1use aios_spec::{ExtensionCategory, ScriptHint, SemanticHint, TextHint};
7
8pub(crate) fn analyze_text(text: &str) -> TextHint {
11 let length_chars = text.chars().count();
12 let is_emoji_only = !text.is_empty() && text.chars().all(is_emoji);
13
14 let script = if text.is_empty() {
15 ScriptHint::Unknown
16 } else {
17 let mut has_latin = false;
18 let mut has_hanzi = false;
19 let mut has_cyrillic = false;
20 let mut has_arabic = false;
21
22 for ch in text.chars() {
23 match ch {
24 '\u{0041}'..='\u{007A}' | '\u{00C0}'..='\u{024F}' => has_latin = true,
25 '\u{4E00}'..='\u{9FFF}'
26 | '\u{3400}'..='\u{4DBF}'
27 | '\u{3000}'..='\u{303F}'
28 | '\u{FF00}'..='\u{FFEF}' => has_hanzi = true,
29 '\u{0400}'..='\u{04FF}' | '\u{0500}'..='\u{052F}' => has_cyrillic = true,
30 '\u{0600}'..='\u{06FF}'
31 | '\u{0750}'..='\u{077F}'
32 | '\u{FB50}'..='\u{FDFF}'
33 | '\u{FE70}'..='\u{FEFF}' => has_arabic = true,
34 _ => {},
35 }
36 }
37
38 let count = [has_latin, has_hanzi, has_cyrillic, has_arabic]
39 .iter()
40 .filter(|&&x| x)
41 .count();
42 match count {
43 0 => ScriptHint::Unknown,
44 1 if has_latin => ScriptHint::Latin,
45 1 if has_hanzi => ScriptHint::Hanzi,
46 1 if has_cyrillic => ScriptHint::Cyrillic,
47 1 if has_arabic => ScriptHint::Arabic,
48 _ => ScriptHint::Mixed,
49 }
50 };
51
52 TextHint {
53 length_chars,
54 script,
55 is_emoji_only,
56 }
57}
58
59pub(crate) fn extract_semantic_hints(title: &str, text: &str) -> Vec<SemanticHint> {
63 let combined = format!("{} {}", title, text).to_lowercase();
64 let mut hints = Vec::new();
65
66 if contains_any(
68 &combined,
69 &[
70 "文件",
71 "file",
72 "pdf",
73 "doc",
74 "docx",
75 "xls",
76 "xlsx",
77 "ppt",
78 "pptx",
79 "zip",
80 "rar",
81 "attachment",
82 "附件",
83 ],
84 ) {
85 hints.push(SemanticHint::FileMention);
86 }
87 if contains_any(
89 &combined,
90 &[
91 "图片",
92 "照片",
93 "截图",
94 "image",
95 "photo",
96 "screenshot",
97 "jpg",
98 "jpeg",
99 "png",
100 "gif",
101 "webp",
102 "相册",
103 ],
104 ) {
105 hints.push(SemanticHint::ImageMention);
106 }
107 if contains_any(
109 &combined,
110 &[
111 "语音", "voice", "audio", "mp3", "wav", "aac", "录音", "通话",
112 ],
113 ) {
114 hints.push(SemanticHint::AudioMessage);
115 }
116 if contains_any(&combined, &["http", "https", "www.", "链接", "link", "url"]) {
118 hints.push(SemanticHint::LinkAttachment);
119 }
120 if contains_any(
122 &combined,
123 &["@你", "@所有人", "提到了你", "mentioned you", "@"],
124 ) {
125 hints.push(SemanticHint::UserMentioned);
126 }
127 if contains_any(
129 &combined,
130 &[
131 "会议",
132 "meeting",
133 "calendar",
134 "日历",
135 "invitation",
136 "邀请",
137 "schedule",
138 "日程",
139 ],
140 ) {
141 hints.push(SemanticHint::CalendarInvitation);
142 }
143 if contains_any(
145 &combined,
146 &[
147 "支付",
148 "付款",
149 "转账",
150 "payment",
151 "transaction",
152 "红包",
153 "balance",
154 "余额",
155 ],
156 ) {
157 hints.push(SemanticHint::FinancialContext);
158 }
159 if contains_any(
161 &combined,
162 &["验证码", "code", "otp", "验证", "verification", "captcha"],
163 ) {
164 hints.push(SemanticHint::VerificationCode);
165 }
166
167 hints
168}
169
170pub(crate) fn classify_extension(path: &str) -> ExtensionCategory {
174 let lower = path.to_lowercase();
175 let ext = std::path::Path::new(&lower)
176 .extension()
177 .and_then(|e| e.to_str())
178 .unwrap_or("");
179
180 match ext {
181 "pdf" | "doc" | "docx" | "xls" | "xlsx" | "ppt" | "pptx" | "txt" | "md" | "csv" | "odt"
182 | "ods" | "odp" => ExtensionCategory::Document,
183 "jpg" | "jpeg" | "png" | "gif" | "webp" | "heic" | "heif" | "bmp" | "svg" | "tiff" => {
184 ExtensionCategory::Image
185 },
186 "mp4" | "mov" | "avi" | "mkv" | "webm" | "flv" | "wmv" | "3gp" => ExtensionCategory::Video,
187 "mp3" | "wav" | "aac" | "flac" | "ogg" | "wma" | "m4a" | "opus" => ExtensionCategory::Audio,
188 "zip" | "rar" | "7z" | "tar" | "gz" | "bz2" | "xz" | "apk" | "aab" => {
189 ExtensionCategory::Archive
190 },
191 "py" | "js" | "ts" | "rs" | "cpp" | "c" | "h" | "java" | "kt" | "swift" | "go" | "so"
192 | "dylib" | "dll" => ExtensionCategory::Code,
193 "" => ExtensionCategory::Unknown,
194 _ => ExtensionCategory::Other,
195 }
196}
197
198fn contains_any(text: &str, keywords: &[&str]) -> bool {
201 keywords.iter().any(|kw| text.contains(kw))
202}
203
204fn is_emoji(ch: char) -> bool {
205 matches!(ch,
206 '\u{1F600}'..='\u{1F64F}' | '\u{1F300}'..='\u{1F5FF}' | '\u{1F680}'..='\u{1F6FF}' | '\u{1F900}'..='\u{1F9FF}' | '\u{2600}'..='\u{26FF}' | '\u{2700}'..='\u{27BF}' | '\u{FE00}'..='\u{FE0F}' | '\u{200D}' | '\u{1F1E0}'..='\u{1F1FF}' )
216}