2023-11-15 15:46:37 +01:00
|
|
|
mod context;
|
|
|
|
mod document;
|
|
|
|
pub(crate) mod error;
|
|
|
|
mod fields;
|
|
|
|
mod template_checker;
|
|
|
|
|
2024-10-28 14:10:55 +01:00
|
|
|
use std::cell::RefCell;
|
2024-09-02 11:28:53 +02:00
|
|
|
use std::collections::BTreeMap;
|
2023-11-15 15:46:37 +01:00
|
|
|
use std::convert::TryFrom;
|
2024-10-28 14:10:55 +01:00
|
|
|
use std::fmt::Debug;
|
2024-09-03 12:07:10 +02:00
|
|
|
use std::num::NonZeroUsize;
|
2024-09-02 11:28:53 +02:00
|
|
|
use std::ops::Deref;
|
2023-11-15 15:46:37 +01:00
|
|
|
|
2024-10-28 14:10:55 +01:00
|
|
|
use bumpalo::Bump;
|
|
|
|
use document::ParseableDocument;
|
2023-11-15 15:46:37 +01:00
|
|
|
use error::{NewPromptError, RenderPromptError};
|
2024-10-28 14:10:55 +01:00
|
|
|
use fields::{BorrowedFields, OwnedFields};
|
2023-11-15 15:46:37 +01:00
|
|
|
|
|
|
|
use self::context::Context;
|
|
|
|
use self::document::Document;
|
|
|
|
use crate::update::del_add::DelAdd;
|
2024-10-28 14:10:55 +01:00
|
|
|
use crate::{FieldId, FieldsIdsMap, GlobalFieldsIdsMap};
|
2023-11-15 15:46:37 +01:00
|
|
|
|
|
|
|
pub struct Prompt {
|
|
|
|
template: liquid::Template,
|
|
|
|
template_text: String,
|
2024-09-03 12:07:10 +02:00
|
|
|
max_bytes: Option<NonZeroUsize>,
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
|
|
|
|
pub struct PromptData {
|
|
|
|
pub template: String,
|
2024-09-03 12:07:10 +02:00
|
|
|
pub max_bytes: Option<NonZeroUsize>,
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
impl From<Prompt> for PromptData {
|
|
|
|
fn from(value: Prompt) -> Self {
|
2024-09-03 12:07:10 +02:00
|
|
|
Self { template: value.template_text, max_bytes: value.max_bytes }
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl TryFrom<PromptData> for Prompt {
|
|
|
|
type Error = NewPromptError;
|
|
|
|
|
|
|
|
fn try_from(value: PromptData) -> Result<Self, Self::Error> {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new(value.template, value.max_bytes)
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Clone for Prompt {
|
|
|
|
fn clone(&self) -> Self {
|
|
|
|
let template_text = self.template_text.clone();
|
2024-09-03 12:07:10 +02:00
|
|
|
Self {
|
|
|
|
template: new_template(&template_text).unwrap(),
|
|
|
|
template_text,
|
|
|
|
max_bytes: self.max_bytes,
|
|
|
|
}
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn new_template(text: &str) -> Result<liquid::Template, liquid::Error> {
|
|
|
|
liquid::ParserBuilder::with_stdlib().build().unwrap().parse(text)
|
|
|
|
}
|
|
|
|
|
|
|
|
fn default_template() -> liquid::Template {
|
|
|
|
new_template(default_template_text()).unwrap()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn default_template_text() -> &'static str {
|
2024-09-02 11:30:59 +02:00
|
|
|
"{% for field in fields %}\
|
|
|
|
{% if field.is_searchable and field.value != nil %}\
|
2023-11-15 15:46:37 +01:00
|
|
|
{{ field.name }}: {{ field.value }}\n\
|
2024-09-02 11:30:59 +02:00
|
|
|
{% endif %}\
|
2023-11-15 15:46:37 +01:00
|
|
|
{% endfor %}"
|
|
|
|
}
|
|
|
|
|
2024-09-03 12:07:10 +02:00
|
|
|
pub fn default_max_bytes() -> NonZeroUsize {
|
|
|
|
NonZeroUsize::new(400).unwrap()
|
|
|
|
}
|
|
|
|
|
2023-11-15 15:46:37 +01:00
|
|
|
impl Default for Prompt {
|
|
|
|
fn default() -> Self {
|
2024-09-03 12:07:10 +02:00
|
|
|
Self {
|
|
|
|
template: default_template(),
|
|
|
|
template_text: default_template_text().into(),
|
|
|
|
max_bytes: Some(default_max_bytes()),
|
|
|
|
}
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Default for PromptData {
|
|
|
|
fn default() -> Self {
|
2024-09-03 12:07:10 +02:00
|
|
|
Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) }
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl Prompt {
|
2024-09-03 12:07:10 +02:00
|
|
|
pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> {
|
2023-11-15 15:46:37 +01:00
|
|
|
let this = Self {
|
|
|
|
template: liquid::ParserBuilder::with_stdlib()
|
|
|
|
.build()
|
|
|
|
.unwrap()
|
|
|
|
.parse(&template)
|
|
|
|
.map_err(NewPromptError::cannot_parse_template)?,
|
|
|
|
template_text: template,
|
2024-09-03 12:07:10 +02:00
|
|
|
max_bytes,
|
2023-11-15 15:46:37 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
// render template with special object that's OK with `doc.*` and `fields.*`
|
|
|
|
this.template
|
|
|
|
.render(&template_checker::TemplateChecker)
|
|
|
|
.map_err(NewPromptError::invalid_fields_in_template)?;
|
|
|
|
|
|
|
|
Ok(this)
|
|
|
|
}
|
|
|
|
|
2024-10-28 14:10:55 +01:00
|
|
|
pub fn render_document<
|
|
|
|
'a, // lifetime of the borrow of the document
|
|
|
|
'doc: 'a, // lifetime of the allocator, will live for an entire chunk of documents
|
|
|
|
>(
|
|
|
|
&self,
|
2024-12-11 15:26:18 +01:00
|
|
|
external_docid: &str,
|
2024-10-28 14:10:55 +01:00
|
|
|
document: impl crate::update::new::document::Document<'a> + Debug,
|
|
|
|
field_id_map: &RefCell<GlobalFieldsIdsMap>,
|
|
|
|
doc_alloc: &'doc Bump,
|
|
|
|
) -> Result<&'doc str, RenderPromptError> {
|
|
|
|
let document = ParseableDocument::new(document, doc_alloc);
|
|
|
|
let fields = BorrowedFields::new(&document, field_id_map, doc_alloc);
|
|
|
|
let context = Context::new(&document, &fields);
|
|
|
|
let mut rendered = bumpalo::collections::Vec::with_capacity_in(
|
|
|
|
self.max_bytes.unwrap_or_else(default_max_bytes).get(),
|
2024-10-28 14:22:19 +01:00
|
|
|
doc_alloc,
|
2024-10-28 14:10:55 +01:00
|
|
|
);
|
2024-12-11 15:26:18 +01:00
|
|
|
self.template.render_to(&mut rendered, &context).map_err(|liquid_error| {
|
|
|
|
RenderPromptError::missing_context_with_external_docid(
|
|
|
|
external_docid.to_owned(),
|
|
|
|
liquid_error,
|
|
|
|
)
|
|
|
|
})?;
|
2024-10-28 14:10:55 +01:00
|
|
|
Ok(std::str::from_utf8(rendered.into_bump_slice())
|
|
|
|
.expect("render can only write UTF-8 because all inputs and processing preserve utf-8"))
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn render_kvdeladd(
|
2023-11-15 15:46:37 +01:00
|
|
|
&self,
|
2024-08-29 19:20:10 +02:00
|
|
|
document: &obkv::KvReaderU16,
|
2023-11-15 15:46:37 +01:00
|
|
|
side: DelAdd,
|
2024-09-02 11:28:53 +02:00
|
|
|
field_id_map: &FieldsIdsMapWithMetadata,
|
2023-11-15 15:46:37 +01:00
|
|
|
) -> Result<String, RenderPromptError> {
|
|
|
|
let document = Document::new(document, side, field_id_map);
|
2024-10-28 14:10:55 +01:00
|
|
|
let fields = OwnedFields::new(&document, field_id_map);
|
|
|
|
let context = Context::new(&document, &fields);
|
2023-11-15 15:46:37 +01:00
|
|
|
|
2024-09-03 12:07:10 +02:00
|
|
|
let mut rendered =
|
|
|
|
self.template.render(&context).map_err(RenderPromptError::missing_context)?;
|
|
|
|
if let Some(max_bytes) = self.max_bytes {
|
|
|
|
truncate(&mut rendered, max_bytes.get());
|
|
|
|
}
|
|
|
|
Ok(rendered)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
fn truncate(s: &mut String, max_bytes: usize) {
|
|
|
|
if max_bytes >= s.len() {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
for i in (0..=max_bytes).rev() {
|
|
|
|
if s.is_char_boundary(i) {
|
|
|
|
s.truncate(i);
|
|
|
|
break;
|
|
|
|
}
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-02 11:28:53 +02:00
|
|
|
pub struct FieldsIdsMapWithMetadata<'a> {
|
|
|
|
fields_ids_map: &'a FieldsIdsMap,
|
|
|
|
metadata: BTreeMap<FieldId, FieldMetadata>,
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<'a> FieldsIdsMapWithMetadata<'a> {
|
|
|
|
pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
|
|
|
|
let mut metadata: BTreeMap<FieldId, FieldMetadata> =
|
|
|
|
fields_ids_map.ids().map(|id| (id, Default::default())).collect();
|
|
|
|
for searchable_field_id in searchable_fields_ids {
|
|
|
|
let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
|
|
|
|
metadata.searchable = true;
|
|
|
|
}
|
|
|
|
Self { fields_ids_map, metadata }
|
|
|
|
}
|
|
|
|
|
|
|
|
pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
|
|
|
|
self.metadata.get(&field_id).copied()
|
2023-11-15 15:46:37 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2024-09-02 11:28:53 +02:00
|
|
|
impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
|
|
|
|
type Target = FieldsIdsMap;
|
|
|
|
|
|
|
|
fn deref(&self) -> &Self::Target {
|
|
|
|
self.fields_ids_map
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Default, Clone, Copy)]
|
|
|
|
pub struct FieldMetadata {
|
|
|
|
pub searchable: bool,
|
|
|
|
}
|
|
|
|
|
2023-12-12 21:19:48 +01:00
|
|
|
#[cfg(test)]
|
|
|
|
mod test {
|
|
|
|
use super::Prompt;
|
|
|
|
use crate::error::FaultSource;
|
|
|
|
use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
|
2024-09-03 12:07:10 +02:00
|
|
|
use crate::prompt::truncate;
|
2023-12-12 21:19:48 +01:00
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn default_template() {
|
|
|
|
// does not panic
|
|
|
|
Prompt::default();
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn empty_template() {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("".into(), None).unwrap();
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_ok() {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap();
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_syntax() {
|
|
|
|
assert!(matches!(
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{{doc.title: {{doc.overview}}".into(), None),
|
2023-12-12 21:19:48 +01:00
|
|
|
Err(NewPromptError {
|
|
|
|
kind: NewPromptErrorKind::CannotParseTemplate(_),
|
|
|
|
fault: FaultSource::User
|
|
|
|
})
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_missing_doc() {
|
|
|
|
assert!(matches!(
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{{title}}: {{overview}}".into(), None),
|
2023-12-12 21:19:48 +01:00
|
|
|
Err(NewPromptError {
|
|
|
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
|
|
|
fault: FaultSource::User
|
|
|
|
})
|
|
|
|
));
|
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_nested_doc() {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap();
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_fields() {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap();
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_fields_ok() {
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new(
|
|
|
|
"{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(),
|
|
|
|
None,
|
|
|
|
)
|
|
|
|
.unwrap();
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn template_fields_invalid() {
|
|
|
|
assert!(matches!(
|
|
|
|
// intentionally garbled field
|
2024-09-03 12:07:10 +02:00
|
|
|
Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None),
|
2023-12-12 21:19:48 +01:00
|
|
|
Err(NewPromptError {
|
|
|
|
kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
|
|
|
|
fault: FaultSource::User
|
|
|
|
})
|
|
|
|
));
|
|
|
|
}
|
2024-09-03 12:07:10 +02:00
|
|
|
|
|
|
|
// todo: test truncation
|
|
|
|
#[test]
|
|
|
|
fn template_truncation() {
|
|
|
|
let mut s = "インテル ザー ビーグル".to_string();
|
|
|
|
|
|
|
|
truncate(&mut s, 42);
|
|
|
|
assert_eq!(s, "インテル ザー ビーグル");
|
|
|
|
|
|
|
|
assert_eq!(s.len(), 32);
|
|
|
|
truncate(&mut s, 32);
|
|
|
|
assert_eq!(s, "インテル ザー ビーグル");
|
|
|
|
|
|
|
|
truncate(&mut s, 31);
|
|
|
|
assert_eq!(s, "インテル ザー ビーグ");
|
|
|
|
truncate(&mut s, 30);
|
|
|
|
assert_eq!(s, "インテル ザー ビーグ");
|
|
|
|
truncate(&mut s, 28);
|
|
|
|
assert_eq!(s, "インテル ザー ビー");
|
|
|
|
truncate(&mut s, 26);
|
|
|
|
assert_eq!(s, "インテル ザー ビー");
|
|
|
|
truncate(&mut s, 25);
|
|
|
|
assert_eq!(s, "インテル ザー ビ");
|
|
|
|
|
|
|
|
assert_eq!("イ".len(), 3);
|
|
|
|
truncate(&mut s, 3);
|
|
|
|
assert_eq!(s, "イ");
|
|
|
|
truncate(&mut s, 2);
|
|
|
|
assert_eq!(s, "");
|
|
|
|
}
|
2023-12-12 21:19:48 +01:00
|
|
|
}
|