Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

6 changes: 5 additions & 1 deletion crates/llama-cpp-server/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,11 @@ impl CompletionServer {

#[async_trait]
impl CompletionStream for CompletionServer {
async fn generate(&self, prompt: &str, options: CompletionOptions) -> BoxStream<'life0, String> {
async fn generate(
&self,
prompt: &str,
options: CompletionOptions,
) -> BoxStream<'life0, String> {
self.completion.generate(prompt, options).await
}
}
Expand Down
14 changes: 11 additions & 3 deletions crates/tabby-common/src/axum.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,11 @@ impl AllowedCodeRepository {
.into_iter()
.enumerate()
.map(|(i, repo)| {
CodeRepository::new(repo.git_url(), &crate::config::config_index_to_id(i))
CodeRepository::new(
repo.git_url(),
&crate::config::config_index_to_id(i),
repo.git_refs(),
)
})
.collect()
})
Expand Down Expand Up @@ -85,7 +89,9 @@ mod tests {
let candidates: Vec<_> = $candidates
.into_iter()
.enumerate()
.map(|(i, x)| CodeRepository::new(&x, &crate::config::config_index_to_id(i)))
.map(|(i, x)| {
CodeRepository::new(&x, &crate::config::config_index_to_id(i), vec![])
})
.collect();
let expect = &candidates[0];
assert_eq!(
Expand All @@ -100,7 +106,9 @@ mod tests {
let candidates: Vec<_> = $candidates
.into_iter()
.enumerate()
.map(|(i, x)| CodeRepository::new(&x, &crate::config::config_index_to_id(i)))
.map(|(i, x)| {
CodeRepository::new(&x, &crate::config::config_index_to_id(i), vec![])
})
.collect();
assert_eq!(closest_match($query, &candidates), None);
};
Expand Down
12 changes: 11 additions & 1 deletion crates/tabby-common/src/config.rs
Original file line number Diff line number Diff line change
Expand Up @@ -156,13 +156,19 @@ pub fn config_id_to_index(id: &str) -> Result<usize, anyhow::Error> {
#[derive(Serialize, Deserialize, Debug, Clone, PartialEq)]
pub struct RepositoryConfig {
git_url: String,
#[serde(default)]
pub refs: Vec<String>,
}

impl RepositoryConfig {
pub fn git_url(&self) -> &str {
&self.git_url
}

pub fn git_refs(&self) -> Vec<String> {
self.refs.clone()
}

pub fn canonicalize_url(url: &str) -> String {
let url = url.strip_suffix(".git").unwrap_or(url);
url::Url::parse(url)
Expand Down Expand Up @@ -472,13 +478,15 @@ impl AnswerConfig {
pub struct CodeRepository {
pub git_url: String,
pub source_id: String,
pub git_refs: Vec<String>,
}

impl CodeRepository {
pub fn new(git_url: &str, source_id: &str) -> Self {
pub fn new(git_url: &str, source_id: &str, git_refs: Vec<String>) -> Self {
Self {
git_url: git_url.to_owned(),
source_id: source_id.to_owned(),
git_refs,
}
}

Expand Down Expand Up @@ -643,6 +651,7 @@ mod tests {
fn it_parses_local_dir() {
let repo = RepositoryConfig {
git_url: "file:///home/user".to_owned(),
refs: vec![],
};
let _ = repo.dir();
}
Expand All @@ -651,6 +660,7 @@ mod tests {
fn test_repository_config_name() {
let repo = RepositoryConfig {
git_url: "https://github.com/TabbyML/tabby.git".to_owned(),
refs: vec![],
};
assert!(repo.dir().ends_with("https_github.com_TabbyML_tabby"));
}
Expand Down
77 changes: 76 additions & 1 deletion crates/tabby-git/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,9 @@ mod file_search;
mod grep;
mod serve_git;

use std::path::Path;
use std::{fs, path::Path, process::Command};

use anyhow::bail;
use axum::{
body::Body,
http::{Response, StatusCode},
Expand All @@ -13,6 +14,7 @@ pub use commit::{stream_commits, Commit};
use file_search::GitFileSearch;
use futures::Stream;
pub use grep::{GrepFile, GrepLine, GrepSubMatch, GrepTextOrBase64};
use tracing::warn;

pub async fn search_files(
root: &Path,
Expand Down Expand Up @@ -56,6 +58,7 @@ pub fn serve_file(
serve_git::serve(&repository, commit, path)
}

#[derive(Debug)]
pub struct GitReference {
pub name: String,
pub commit: String,
Expand All @@ -76,6 +79,78 @@ pub fn list_refs(root: &Path) -> anyhow::Result<Vec<GitReference>> {
.collect())
}

pub fn get_head_name(root: &Path) -> anyhow::Result<String> {
let repository = git2::Repository::open(root)?;
let head = repository.head()?;
let name = head.name().ok_or(anyhow::anyhow!("HEAD has no name"))?;
Ok(name.to_string())
}

pub fn sync_refs(root: &Path, url: &str, refs: &Vec<String>) -> anyhow::Result<()> {
if !root.exists() {
fs::create_dir_all(root)?;
let status = Command::new("git")
.current_dir(root.parent().expect("Must not be in root directory"))
.arg("clone")
.arg(url)
.arg(root)
.status()?;

if let Some(code) = status.code() {
if code != 0 {
warn!(
"Failed to clone `{}`. Please check your repository configuration.",
url
);
fs::remove_dir_all(root).expect("Failed to remove directory");

bail!("Failed to clone `{}`", url);
}
}
}

for ref_name in refs {
let branch = ref_name.rsplit('/').next().unwrap_or(ref_name);
// get the current branch name without refs/ prefix
let output = Command::new("git")
.current_dir(root)
.arg("symbolic-ref")
.arg("--short")
.arg("HEAD")
.output()
.ok();

let current_branch = output
.filter(|o| o.status.success())
.and_then(|o| String::from_utf8(o.stdout).ok())
.map(|s| s.trim().to_string());

let status = if current_branch.as_deref() == Some(branch) {
Command::new("git")
.current_dir(root)
.arg("pull")
.arg("origin")
.arg(branch)
.status()?
} else {
// Use `git fetch origin +ref:ref` to create or update the local branch from the remote.
// The + ensures that the local branch is updated (forced) even if it's not a fast-forward,
// and it creates the branch if it doesn't exist locally.
Command::new("git")
.current_dir(root)
.arg("fetch")
.arg("origin")
.arg(format!("+{branch}:{branch}"))
.status()?
};
if !status.success() {
return Err(anyhow::anyhow!("Failed to fetch origin {}", branch));
}
}

Ok(())
}

fn rev_to_commit<'a>(
repository: &'a git2::Repository,
rev: Option<&str>,
Expand Down
1 change: 1 addition & 0 deletions crates/tabby-index/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ homepage.workspace = true
[dependencies]
anyhow = { workspace = true }
tabby-common = { path = "../tabby-common" }
tabby-git ={ path = "../tabby-git" }
tantivy = { workspace = true }
tracing = { workspace = true }
tree-sitter-tags = "0.22.6"
Expand Down
59 changes: 40 additions & 19 deletions crates/tabby-index/src/code/index.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,12 @@ use tracing::warn;
use super::{
create_code_builder,
intelligence::{CodeIntelligence, SourceCode},
CodeRepository,
repository, CodeRepository,
};
use crate::{
code::repository::resolve_commits,
indexer::{Indexer, TantivyDocBuilder},
};
use crate::indexer::{Indexer, TantivyDocBuilder};

// Magic numbers
static MAX_LINE_LENGTH_THRESHOLD: usize = 300;
Expand All @@ -22,13 +25,30 @@ static MIN_ALPHA_NUM_FRACTION: f32 = 0.25f32;
static MAX_NUMBER_OF_LINES: usize = 100000;
static MAX_NUMBER_FRACTION: f32 = 0.5f32;

pub async fn index_repository(
embedding: Arc<dyn Embedding>,
repository: &CodeRepository,
commit: &str,
) {
let total_files = Walk::new(repository.dir()).count();
let file_stream = stream! {
pub async fn index_repository(embedding: Arc<dyn Embedding>, repository: &CodeRepository) {
let refs = resolve_commits(repository);
// resolve_commits would return the current default branch,
// so it should never be empty here.
if refs.is_empty() {
logkit::error!(
"no branches found for repository {}",
repository.canonical_git_url()
);
return;
}

let mut count_files = 0;
let mut count_chunks = 0;

for (ref_name, sha) in refs {
if let Err(e) = repository::checkout(repository, &ref_name) {
warn!("Failed to checkout ref {}: {}", ref_name, e);
continue;
}

logkit::info!("Indexing branch {} with commit {}", ref_name, &sha);

let file_stream = stream! {
for file in Walk::new(repository.dir()) {
let file = match file {
Ok(file) => file,
Expand All @@ -40,21 +60,22 @@ pub async fn index_repository(

yield file;
}
}
// Commit every 100 files
.chunks(100);
}
// Commit every 100 files
.chunks(100);

let mut file_stream = pin!(file_stream);
let mut file_stream = pin!(file_stream);

let mut count_files = 0;
let mut count_chunks = 0;
while let Some(files) = file_stream.next().await {
count_files += files.len();
count_chunks += add_changed_documents(repository, commit, embedding.clone(), files).await;
logkit::info!("Processed {count_files}/{total_files} files, updated {count_chunks} chunks",);
while let Some(files) = file_stream.next().await {
count_files += files.len();
count_chunks += add_changed_documents(repository, &sha, embedding.clone(), files).await;
logkit::info!("Processed {count_files} files, updated {count_chunks} chunks",);
}
}
}

// garbage collection use blob id to check files,
// does NOT have to checkout branch locally.
pub async fn garbage_collection() {
let index = Indexer::new(corpus::CODE);
stream! {
Expand Down
6 changes: 3 additions & 3 deletions crates/tabby-index/src/code/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ impl CodeIndexer {
embedding: Arc<dyn Embedding>,
repository: &CodeRepository,
) -> anyhow::Result<()> {
repository::sync_repository(repository)?;

logkit::info!(
"Building source code index: {}",
repository.canonical_git_url()
);
let commit = repository::sync_repository(repository)?;

index::index_repository(embedding, repository, &commit).await;
index::index_repository(embedding, repository).await;
index::garbage_collection().await;

Ok(())
Expand Down
Loading
Loading