282 lines
7.5 KiB
Rust
Raw Normal View History

mod context;
mod document;
pub(crate) mod error;
mod fields;
mod template_checker;
use std::collections::BTreeMap;
use std::convert::TryFrom;
2024-09-03 12:07:10 +02:00
use std::num::NonZeroUsize;
use std::ops::Deref;
use error::{NewPromptError, RenderPromptError};
use self::context::Context;
use self::document::Document;
use crate::update::del_add::DelAdd;
use crate::{FieldId, FieldsIdsMap};
pub struct Prompt {
template: liquid::Template,
template_text: String,
2024-09-03 12:07:10 +02:00
max_bytes: Option<NonZeroUsize>,
}
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
pub struct PromptData {
pub template: String,
2024-09-03 12:07:10 +02:00
pub max_bytes: Option<NonZeroUsize>,
}
impl From<Prompt> for PromptData {
fn from(value: Prompt) -> Self {
2024-09-03 12:07:10 +02:00
Self { template: value.template_text, max_bytes: value.max_bytes }
}
}
impl TryFrom<PromptData> for Prompt {
type Error = NewPromptError;
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
2024-09-03 12:07:10 +02:00
Prompt::new(value.template, value.max_bytes)
}
}
impl Clone for Prompt {
fn clone(&self) -> Self {
let template_text = self.template_text.clone();
2024-09-03 12:07:10 +02:00
Self {
template: new_template(&template_text).unwrap(),
template_text,
max_bytes: self.max_bytes,
}
}
}
fn new_template(text: &str) -> Result<liquid::Template, liquid::Error> {
liquid::ParserBuilder::with_stdlib().build().unwrap().parse(text)
}
fn default_template() -> liquid::Template {
new_template(default_template_text()).unwrap()
}
fn default_template_text() -> &'static str {
"{% for field in fields %}\
{% if field.is_searchable and field.value != nil %}\
{{ field.name }}: {{ field.value }}\n\
{% endif %}\
{% endfor %}"
}
2024-09-03 12:07:10 +02:00
pub fn default_max_bytes() -> NonZeroUsize {
NonZeroUsize::new(400).unwrap()
}
impl Default for Prompt {
fn default() -> Self {
2024-09-03 12:07:10 +02:00
Self {
template: default_template(),
template_text: default_template_text().into(),
max_bytes: Some(default_max_bytes()),
}
}
}
impl Default for PromptData {
fn default() -> Self {
2024-09-03 12:07:10 +02:00
Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) }
}
}
impl Prompt {
2024-09-03 12:07:10 +02:00
pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> {
let this = Self {
template: liquid::ParserBuilder::with_stdlib()
.build()
.unwrap()
.parse(&template)
.map_err(NewPromptError::cannot_parse_template)?,
template_text: template,
2024-09-03 12:07:10 +02:00
max_bytes,
};
// render template with special object that's OK with `doc.*` and `fields.*`
this.template
.render(&template_checker::TemplateChecker)
.map_err(NewPromptError::invalid_fields_in_template)?;
Ok(this)
}
pub fn render(
&self,
document: &obkv::KvReaderU16,
side: DelAdd,
field_id_map: &FieldsIdsMapWithMetadata,
) -> Result<String, RenderPromptError> {
let document = Document::new(document, side, field_id_map);
let context = Context::new(&document, field_id_map);
2024-09-03 12:07:10 +02:00
let mut rendered =
self.template.render(&context).map_err(RenderPromptError::missing_context)?;
if let Some(max_bytes) = self.max_bytes {
truncate(&mut rendered, max_bytes.get());
}
Ok(rendered)
}
}
fn truncate(s: &mut String, max_bytes: usize) {
if max_bytes >= s.len() {
return;
}
for i in (0..=max_bytes).rev() {
if s.is_char_boundary(i) {
s.truncate(i);
break;
}
}
}
pub struct FieldsIdsMapWithMetadata<'a> {
fields_ids_map: &'a FieldsIdsMap,
metadata: BTreeMap<FieldId, FieldMetadata>,
}
impl<'a> FieldsIdsMapWithMetadata<'a> {
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
for searchable_field_id in searchable_fields_ids {
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
metadata.searchable = true;
}
Self { fields_ids_map, metadata }
}
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
self.metadata.get(&field_id).copied()
}
}
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
type Target = FieldsIdsMap;
fn deref(&self) -> &Self::Target {
self.fields_ids_map
}
}
#[derive(Debug, Default, Clone, Copy)]
pub struct FieldMetadata {
pub searchable: bool,
}
#[cfg(test)]
mod test {
use super::Prompt;
use crate::error::FaultSource;
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
2024-09-03 12:07:10 +02:00
use crate::prompt::truncate;
#[test]
fn default_template() {
// does not panic
Prompt::default();
}
#[test]
fn empty_template() {
2024-09-03 12:07:10 +02:00
Prompt::new("".into(), None).unwrap();
}
#[test]
fn template_ok() {
2024-09-03 12:07:10 +02:00
Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap();
}
#[test]
fn template_syntax() {
assert!(matches!(
2024-09-03 12:07:10 +02:00
Prompt::new("{{doc.title: {{doc.overview}}".into(), None),
Err(NewPromptError {
kind: NewPromptErrorKind::CannotParseTemplate(_),
fault: FaultSource::User
})
));
}
#[test]
fn template_missing_doc() {
assert!(matches!(
2024-09-03 12:07:10 +02:00
Prompt::new("{{title}}: {{overview}}".into(), None),
Err(NewPromptError {
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
fault: FaultSource::User
})
));
}
#[test]
fn template_nested_doc() {
2024-09-03 12:07:10 +02:00
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap();
}
#[test]
fn template_fields() {
2024-09-03 12:07:10 +02:00
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap();
}
#[test]
fn template_fields_ok() {
2024-09-03 12:07:10 +02:00
Prompt::new(
"{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(),
None,
)
.unwrap();
}
#[test]
fn template_fields_invalid() {
assert!(matches!(
// intentionally garbled field
2024-09-03 12:07:10 +02:00
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None),
Err(NewPromptError {
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
fault: FaultSource::User
})
));
}
2024-09-03 12:07:10 +02:00
// todo: test truncation
#[test]
fn template_truncation() {
let mut s = "インテル ザー ビーグル".to_string();
truncate(&mut s, 42);
assert_eq!(s, "インテル ザー ビーグル");
assert_eq!(s.len(), 32);
truncate(&mut s, 32);
assert_eq!(s, "インテル ザー ビーグル");
truncate(&mut s, 31);
assert_eq!(s, "インテル ザー ビーグ");
truncate(&mut s, 30);
assert_eq!(s, "インテル ザー ビーグ");
truncate(&mut s, 28);
assert_eq!(s, "インテル ザー ビー");
truncate(&mut s, 26);
assert_eq!(s, "インテル ザー ビー");
truncate(&mut s, 25);
assert_eq!(s, "インテル ザー ビ");
assert_eq!("".len(), 3);
truncate(&mut s, 3);
assert_eq!(s, "");
truncate(&mut s, 2);
assert_eq!(s, "");
}
}