feat(tms_service): 添加html_scraper模块替代openai解析链接
使用html_scraper模块通过CSS选择器直接解析HTML获取链接和token,替代之前依赖openai的方式
This commit is contained in:
10
src-tauri/Cargo.lock
generated
10
src-tauri/Cargo.lock
generated
@@ -1593,6 +1593,15 @@ dependencies = [
|
|||||||
"match_token",
|
"match_token",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "html_scraper"
|
||||||
|
version = "0.1.0"
|
||||||
|
dependencies = [
|
||||||
|
"regex",
|
||||||
|
"scraper",
|
||||||
|
"thiserror 1.0.69",
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "http"
|
name = "http"
|
||||||
version = "1.3.1"
|
version = "1.3.1"
|
||||||
@@ -4458,6 +4467,7 @@ checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
|
|||||||
name = "tms_service"
|
name = "tms_service"
|
||||||
version = "0.1.0"
|
version = "0.1.0"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
|
"html_scraper",
|
||||||
"openai",
|
"openai",
|
||||||
"reqwest",
|
"reqwest",
|
||||||
"serde",
|
"serde",
|
||||||
|
|||||||
9
src-tauri/html_scraper/Cargo.toml
Normal file
9
src-tauri/html_scraper/Cargo.toml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
[package]
|
||||||
|
name = "html_scraper"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2024"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
scraper = "0.19.1"
|
||||||
|
thiserror = "1.0"
|
||||||
|
regex = "1.10.5"
|
||||||
73
src-tauri/html_scraper/src/lib.rs
Normal file
73
src-tauri/html_scraper/src/lib.rs
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
use scraper::{Html, Selector};
|
||||||
|
use thiserror::Error;
|
||||||
|
use regex::Regex;
|
||||||
|
|
||||||
|
/// 定义库可能返回的错误类型
|
||||||
|
#[derive(Debug, Error)]
|
||||||
|
pub enum ScrapeError {
|
||||||
|
/// 当内部使用的CSS选择器无效时返回。
|
||||||
|
#[error("无效的CSS选择器: {0}")]
|
||||||
|
InvalidSelector(String),
|
||||||
|
/// 当正则表达式编译失败时返回。
|
||||||
|
#[error("无效的正则表达式: {0}")]
|
||||||
|
InvalidRegex(#[from] regex::Error),
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 通用函数:根据CSS选择器和链接文本查找链接。
|
||||||
|
fn find_link_by_text(html_body: &str, selector_str: &str, link_text: &str) -> Result<Option<String>, ScrapeError> {
|
||||||
|
let selector = Selector::parse(selector_str)
|
||||||
|
.map_err(|e| ScrapeError::InvalidSelector(format!("'{selector_str}': {e}")))?;
|
||||||
|
|
||||||
|
let document = Html::parse_document(html_body);
|
||||||
|
|
||||||
|
for element in document.select(&selector) {
|
||||||
|
if element.text().any(|text| text.trim() == link_text) {
|
||||||
|
if let Some(link) = element.value().attr("href") {
|
||||||
|
return Ok(Some(link.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 解析HTML文本,查找并返回第一个匹配“编辑”按钮的链接。
|
||||||
|
pub fn find_edit_link(html_body: &str) -> Result<Option<String>, ScrapeError> {
|
||||||
|
find_link_by_text(html_body, "a.btn.btn-info.btn-xs.m-bot5", "编辑")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// 解析HTML文本,查找并返回“TMS配置”按钮的链接。
|
||||||
|
pub fn find_tms_config_link(html_body: &str) -> Result<Option<String>, ScrapeError> {
|
||||||
|
find_link_by_text(html_body, "a.btn.btn-default.not-cinema", "TMS配置")
|
||||||
|
}
|
||||||
|
|
||||||
|
/// # 参数
|
||||||
|
/// * `html_body`: 一个字符串切片,包含要解析的HTML内容。
|
||||||
|
pub fn find_token(html_body: &str) -> Result<Option<String>, ScrapeError> {
|
||||||
|
// 使用属性选择器直接、高效地定位目标元素
|
||||||
|
let selector_str = r#"input[name="token"]"#;
|
||||||
|
let token_selector = Selector::parse(selector_str)
|
||||||
|
.map_err(|e| ScrapeError::InvalidSelector(format!("'{selector_str}': {e}")))?;
|
||||||
|
|
||||||
|
let document = Html::parse_document(html_body);
|
||||||
|
|
||||||
|
// 查找第一个匹配的元素并提取其 "value" 属性
|
||||||
|
if let Some(element) = document.select(&token_selector).next() {
|
||||||
|
if let Some(token_value) = element.value().attr("value") {
|
||||||
|
return Ok(Some(token_value.to_string()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Ok(None)
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn it_works() {
|
||||||
|
let result = add(2, 2);
|
||||||
|
assert_eq!(result, 4);
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
|
|||||||
serde = { version = "1", features = ["derive"] }
|
serde = { version = "1", features = ["derive"] }
|
||||||
serde_json = "1"
|
serde_json = "1"
|
||||||
openai = { path = "../openai" }
|
openai = { path = "../openai" }
|
||||||
|
html_scraper = { path = "../html_scraper" }
|
||||||
|
|||||||
@@ -40,8 +40,11 @@ pub async fn create_ticket(cookie: &str, n2p: &str, massageQ: &str, wx: Option<&
|
|||||||
|
|
||||||
println!("源码获取完成");
|
println!("源码获取完成");
|
||||||
|
|
||||||
let system_prompt = "告诉我编辑按钮的链接,只要告诉我链接的url就可以,不要说其他的任何内容";
|
let edit_page_url = match html_scraper::find_edit_link(&body) {
|
||||||
let edit_page_url = openai::ask_openai(&body, system_prompt).await?;
|
Ok(Some(link)) => link,
|
||||||
|
Ok(None) => return Ok("没有找到编辑链接".to_string()),
|
||||||
|
Err(e) => return Ok(format!("查找编辑链接时出错: {}", e)),
|
||||||
|
};
|
||||||
println!("平台id{}", edit_page_url);
|
println!("平台id{}", edit_page_url);
|
||||||
|
|
||||||
// 3. Visit the edit page to get its source
|
// 3. Visit the edit page to get its source
|
||||||
@@ -50,8 +53,11 @@ pub async fn create_ticket(cookie: &str, n2p: &str, massageQ: &str, wx: Option<&
|
|||||||
let body2 = String::from_utf8_lossy(&body2_bytes).to_string();
|
let body2 = String::from_utf8_lossy(&body2_bytes).to_string();
|
||||||
|
|
||||||
// 4. Get the TMS config URL
|
// 4. Get the TMS config URL
|
||||||
let system_prompt2 = "告诉我TMS配置按钮的链接,只要告诉我链接的url就可以,不要说其他的任何内容";
|
let tms_config_url = match html_scraper::find_tms_config_link(&body2) {
|
||||||
let tms_config_url = openai::ask_openai(&body2, system_prompt2).await?;
|
Ok(Some(link)) => link,
|
||||||
|
Ok(None) => return Ok("没有找到TMS配置链接".to_string()),
|
||||||
|
Err(e) => return Ok(format!("查找TMS配置链接时出错: {}", e)),
|
||||||
|
};
|
||||||
println!("tms配置链接{}", tms_config_url);
|
println!("tms配置链接{}", tms_config_url);
|
||||||
|
|
||||||
// 5. Visit the TMS config URL to get the token
|
// 5. Visit the TMS config URL to get the token
|
||||||
@@ -59,8 +65,11 @@ pub async fn create_ticket(cookie: &str, n2p: &str, massageQ: &str, wx: Option<&
|
|||||||
let body3_bytes = res3.bytes().await?;
|
let body3_bytes = res3.bytes().await?;
|
||||||
let body3 = String::from_utf8_lossy(&body3_bytes).to_string();
|
let body3 = String::from_utf8_lossy(&body3_bytes).to_string();
|
||||||
|
|
||||||
let system_prompt3 = "告诉我权限认证(token)的值,只要告诉我对应的值就行,不要说其他的任何内容";
|
let token = match html_scraper::find_token(&body3) {
|
||||||
let token = openai::ask_openai(&body3, system_prompt3).await?;
|
Ok(Some(t)) => t,
|
||||||
|
Ok(None) => return Ok("没有找到token".to_string()),
|
||||||
|
Err(e) => return Ok(format!("查找token时出错: {}", e)),
|
||||||
|
};
|
||||||
println!("token={}", token);
|
println!("token={}", token);
|
||||||
|
|
||||||
// 6. Visit the final URL to get the session cookies
|
// 6. Visit the final URL to get the session cookies
|
||||||
|
|||||||
Reference in New Issue
Block a user