Skip to content

feat: keep rust-managed code block venvs separate #755

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 6 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions examples/talk/6-code-json.pdl
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ text:
- def: EVAL
contribute: []
lang: python
requirements:
- textdistance
code:
|
import textdistance
Expand Down
60 changes: 55 additions & 5 deletions pdl-live-react/src-tauri/src/cli/run.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,24 @@
use ::std::fs::{remove_file, write};
use ::std::path::Path;

use duct::cmd;
use futures::executor::block_on;
use tempfile::Builder;
use yaml_rust2::yaml::LoadError;
use yaml_rust2::{EmitError, YamlEmitter};
use yaml_rust2::{ScanError, Yaml, YamlLoader};

use crate::interpreter::pip::pip_install_interpreter_if_needed;
use crate::interpreter::pip::{
pip_install_code_blocks_if_needed, pip_install_interpreter_if_needed,
};
use crate::interpreter::pull::pull_if_needed;

/// Read the given filesystem path and produce a potentially multi-document Yaml
fn from_path(path: &String) -> Result<Vec<Yaml>, ScanError> {
let content = std::fs::read_to_string(path).unwrap();
YamlLoader::load_from_str(&content)
}

#[cfg(desktop)]
pub fn run_pdl_program(
source_file_path: String,
Expand All @@ -14,14 +27,16 @@ pub fn run_pdl_program(
data: Option<&tauri_plugin_cli::ArgData>,
stream: Option<&tauri_plugin_cli::ArgData>,
) -> Result<(), tauri::Error> {
println!(
eprintln!(
"Running {:#?}",
Path::new(&source_file_path).file_name().unwrap()
);

// async the model pull and pip installs
let pull_future = pull_if_needed(&source_file_path);
let bin_path_future = pip_install_interpreter_if_needed(app_handle);
let program = &from_path(&source_file_path).unwrap()[0];
let pull_future = pull_if_needed(&program);
let reqs_future = pip_install_code_blocks_if_needed(&app_handle, &program);
let bin_path_future = pip_install_interpreter_if_needed(&app_handle);

// wait for any model pulls to finish
block_on(pull_future).map_err(|e| match e {
Expand All @@ -33,15 +48,50 @@ pub fn run_pdl_program(
// wait for any pip installs to finish
let bin_path = block_on(bin_path_future)?;

// wait for code block requirements to be pulled
let updated_source_file_path = match block_on(reqs_future)? {
Some(updated_program) => {
// We received back an updated program
println!("Updated! {:?}", updated_program);
let mut out_str = String::new();
let mut emitter = YamlEmitter::new(&mut out_str);
emitter.multiline_strings(true);
emitter.dump(&updated_program).map_err(|e| match e {
EmitError::FmtError(ee) => tauri::Error::Anyhow(ee.into()),
})?;
match Path::new(&source_file_path).parent() {
Some(dir) => {
let tmp = Builder::new()
.prefix("pdl-program-")
.suffix(".pdl")
.tempfile_in(&dir)?;
write(&tmp, out_str)?;
let (_, path) = tmp.keep().map_err(|e| tauri::Error::Io(e.error))?;
path.display().to_string()
}
_ => {
eprintln!("Failed to find target directory for updated program");
source_file_path.clone()
}
}
}
_ => source_file_path.clone(),
};

let mut args = vec![
source_file_path,
updated_source_file_path.clone(),
dashdash("--trace", trace_file),
dashdash("--data", data),
dashdash("--stream", stream),
];
args.retain(|x| x.chars().count() > 0);
cmd(bin_path.join("pdl"), &args).run()?;

// TODO how do we do this on all exit paths in rust?
if updated_source_file_path != source_file_path {
remove_file(updated_source_file_path)?;
}

Ok(())
}

Expand Down
126 changes: 108 additions & 18 deletions pdl-live-react/src-tauri/src/interpreter/extract.rs
Original file line number Diff line number Diff line change
@@ -1,57 +1,147 @@
use yaml_rust2::Yaml;

/// Extract models referenced by the programs
pub fn extract_models(programs: Vec<Yaml>) -> Vec<String> {
extract_values(programs, "model")
use crate::interpreter::shasum;

/// Extract models referenced by the program
pub fn extract_models(program: &Yaml) -> (Vec<String>, Yaml) {
extract_values(program, "model", &|y| y.clone())
}

/// Extract requirements.txt referenced by the program
pub fn extract_requirements(program: &Yaml) -> (Vec<String>, Yaml) {
let requirements = Yaml::String("requirements".to_string());
let code = Yaml::String("code".to_string());
let lang = Yaml::String("lang".to_string());
let python = Yaml::String("python".to_string());

extract_values(program, "requirements", &|y| match y {
Yaml::Hash(h) => {
match h.contains_key(&requirements) && h.contains_key(&code) && h[&lang] == python {
true => {
let requirements_text = match &h[&requirements] {
Yaml::Array(a) => a
.into_iter()
.map(|item| match item {
Yaml::String(s) => s.to_string(),
_ => "".to_string(),
})
.collect::<Vec<_>>()
.join("\n"),

Yaml::String(s) => s.to_string(),

_ => "".to_string(),
};

let req_hash = shasum::sha256sum_str(requirements_text.as_str()).unwrap();
let code_text = if let Some(c) = h[&code].as_str() {
format!("{}\nprint(result)", c)
} else {
"".to_string()
};
//let code_hash = shasum::sha256sum_str(&code_text.as_str()).unwrap();

/*let tmp = Builder::new()
.prefix(&format!("pdl-program-{}", code_hash))
.suffix(".pdl")
.tempfile()
.unwrap(); // TODO tmpfile_in(source dir)
write(&tmp, code_text).unwrap();
let (_, tmp_path) = tmp.keep().unwrap();*/

h.remove(&requirements);
h[&lang] = Yaml::String("command".to_string());
//h.insert(&Yaml::String("file".to_string()), Yaml::Boolean(true));
h[&code] = Yaml::Array(vec![
Yaml::String(format!(
"/Users/nickm/Library/Caches/pdl/venvs/{}/{}/python",
req_hash,
if cfg!(windows) { "Scripts" } else { "bin" },
)),
Yaml::String("-c".to_owned()),
Yaml::String(code_text),
]);

Yaml::Hash(h.clone())
}
false => Yaml::Hash(h.clone()),
}
}
y => y.clone(),
})
}

/// Take a list of Yaml fragments and produce a vector of the string-valued entries of the given field
pub fn extract_values(programs: Vec<Yaml>, field: &str) -> Vec<String> {
let mut values = programs
.into_iter()
.flat_map(|p| extract_one_values(p, field))
.collect::<Vec<String>>();
pub fn extract_values(
program: &Yaml,
field: &str,
mutator: &impl Fn(&mut Yaml) -> Yaml,
) -> (Vec<String>, Yaml) {
let (mut values, updated_program) = traverse(program, field, mutator);

// A single program may specify the same model more than once. Dedup!
values.sort();
values.dedup();

values
(values, updated_program)
}

/// Take one Yaml fragment and produce a vector of the string-valued entries of the given field
fn extract_one_values(program: Yaml, field: &str) -> Vec<String> {
fn traverse(
program: &Yaml,
field: &str,
mutator: &impl Fn(&mut Yaml) -> Yaml,
) -> (Vec<String>, Yaml) {
let mut values: Vec<String> = Vec::new();

match program {
let updated_program: Yaml = match program {
Yaml::Hash(h) => {
let mut hh = h.clone();
for (key, val) in h {
match key {
Yaml::String(f) if f == field => match &val {
Yaml::String(m) => {
values.push(m.to_string());
}
Yaml::Array(a) => values.push(
a.into_iter()
.map(|item| match item {
Yaml::String(s) => s.to_string(),
_ => "".to_string(),
})
.collect::<Vec<_>>()
.join("\n"),
),
_ => {}
},
_ => {}
}

for m in extract_one_values(val, field) {
let (v, p) = traverse(val, field, mutator);
hh[key] = p;
for m in v {
values.push(m)
}
}

mutator(&mut Yaml::Hash(hh))
}

Yaml::Array(a) => {
for val in a {
for m in extract_one_values(val, field) {
values.push(m)
let mut aa = a.clone();
for (i, val) in a.iter().enumerate() {
let (v, p) = traverse(val, field, mutator);
aa[i] = p;
for m in v {
values.push(m);
}
}

mutator(&mut Yaml::Array(aa))
}

_ => {}
}
x => mutator(&mut x.clone()),
};

values
(values, updated_program)
}
54 changes: 47 additions & 7 deletions pdl-live-react/src-tauri/src/interpreter/pip.rs
Original file line number Diff line number Diff line change
@@ -1,21 +1,25 @@
use ::std::fs::{copy, create_dir_all};
use ::std::fs::{copy, create_dir_all, write};
use ::std::path::{Path, PathBuf};

use duct::cmd;
use rayon::prelude::*;
use tauri::path::BaseDirectory;
use tauri::Manager;
use tempfile::Builder;
use yaml_rust2::Yaml;

use crate::interpreter::extract;
use crate::interpreter::shasum;

#[cfg(desktop)]
pub async fn pip_install_if_needed(
fn pip_install_if_needed_with_hash(
cache_path: &Path,
requirements_path: &Path,
hash: String,
) -> Result<PathBuf, tauri::Error> {
create_dir_all(&cache_path)?;

let hash = shasum::sha256sum(&requirements_path)?;
let venv_path = cache_path.join(hash);
let venv_path = cache_path.join("venvs").join(hash);
let bin_path = venv_path.join(if cfg!(windows) { "Scripts" } else { "bin" });

if !venv_path.exists() {
Expand All @@ -27,7 +31,7 @@ pub async fn pip_install_if_needed(
};
cmd!(python, "-mvenv", &venv_path).run()?;

cmd!(bin_path.join("pip"), "install", "-r", &requirements_path,).run()?;
cmd!(bin_path.join("pip"), "install", "-r", &requirements_path).run()?;

let cached_requirements_path = venv_path.join("requirements.txt");
copy(requirements_path, cached_requirements_path)?;
Expand All @@ -36,9 +40,45 @@ pub async fn pip_install_if_needed(
Ok(bin_path.to_path_buf())
}

#[cfg(desktop)]
fn pip_install_if_needed(
cache_path: &Path,
requirements_path: &Path,
) -> Result<PathBuf, tauri::Error> {
let hash = shasum::sha256sum(&requirements_path)?;
pip_install_if_needed_with_hash(cache_path, requirements_path, hash)
}

#[cfg(desktop)]
pub async fn pip_install_code_blocks_if_needed(
app_handle: &tauri::AppHandle,
program: &Yaml,
) -> Result<Option<Yaml>, tauri::Error> {
let cache_path = app_handle.path().cache_dir()?.join("pdl");

let (reqs, updated_program) = extract::extract_requirements(&program);
let n = reqs
.into_par_iter()
.map(|req| -> Result<usize, tauri::Error> {
let req_path = Builder::new()
.prefix("pdl-requirements-")
.suffix(".txt")
.tempfile()?;
write(&req_path, req)?;
pip_install_if_needed(&cache_path, &req_path.path())?;
Ok(1)
})
.count();

match n {
0 => Ok(None), // We did not change the program
_ => Ok(Some(updated_program)),
}
}

#[cfg(desktop)]
pub async fn pip_install_interpreter_if_needed(
app_handle: tauri::AppHandle,
app_handle: &tauri::AppHandle,
) -> Result<PathBuf, tauri::Error> {
// the interpreter requirements.txt
let requirements_path = app_handle
Expand All @@ -47,5 +87,5 @@ pub async fn pip_install_interpreter_if_needed(

let cache_path = app_handle.path().cache_dir()?.join("pdl");

pip_install_if_needed(&cache_path, &requirements_path).await
pip_install_if_needed(&cache_path, &requirements_path)
}
13 changes: 4 additions & 9 deletions pdl-live-react/src-tauri/src/interpreter/pull.rs
Original file line number Diff line number Diff line change
@@ -1,19 +1,14 @@
use duct::cmd;
use rayon::prelude::*;
use yaml_rust2::yaml::LoadError;
use yaml_rust2::{ScanError, Yaml, YamlLoader};
use yaml_rust2::Yaml;

use crate::interpreter::extract;

/// Read the given filesystem path and produce a potentially multi-document Yaml
fn from_path(path: &String) -> Result<Vec<Yaml>, ScanError> {
let content = std::fs::read_to_string(path).unwrap();
YamlLoader::load_from_str(&content)
}

/// Pull models (in parallel) from the PDL program in the given filepath.
pub async fn pull_if_needed(path: &String) -> Result<(), LoadError> {
extract::extract_models(from_path(path).unwrap())
pub async fn pull_if_needed(program: &Yaml) -> Result<(), LoadError> {
let (models, _) = extract::extract_models(program);
models
.into_par_iter()
.try_for_each(|model| match model {
m if model.starts_with("ollama/") => ollama_pull_if_needed(&m[7..]),
Expand Down
Loading