-
| Hey! I am attempting to use the new C API from C++, however I must be doing something significantly wrong since the results are very very weird (literally nonsense). Here is the code: class LLM {
    struct Exception : public std::runtime_error {
        using std::runtime_error::runtime_error;
    };
    struct {
        std::string model = "7B-ggml-model-quant.bin";
        int32_t seed; // RNG seed
        int32_t n_threads = static_cast<int32_t>(std::thread::hardware_concurrency()) / 4;
        int32_t n_ctx = 2024; // Context size
        int32_t n_batch = 8; // Batch size
        int32_t top_k = 40;
        float   top_p = 0.5f;
        float   temp  = 0.81f;
    } params;
    struct State {
        std::string prompt;
        std::vector<llama_token> embd;
        int n_ctx;
    } state;
    llama_context *ctx;
    std::mutex lock;
    void init() {
        // Get llama parameters
        puts("30");
        auto lparams = llama_context_default_params();
        lparams.seed = params.seed;
        lparams.n_ctx = 2024;
        // Create context
        puts("31");
        ctx = llama_init_from_file(params.model.c_str(), lparams);
        puts("32");
        // Initialize some variables
        state.n_ctx = llama_n_ctx(ctx);
    }
public:
    LLM(int32_t seed = 0) {
        // Set random seed
        params.seed = seed?seed:time(NULL);
        // Initialize llama
        init();
    }
    void append(const std::string& prompt) {
        std::scoped_lock L(lock);
        // Check if prompt was empty
        const bool was_empty = state.prompt.empty();
        // Append to current prompt
        printf("ddd %s\n", prompt.c_str());
        state.prompt.append(prompt);
        // Resize buffer for tokens
        puts("cccc");
        const auto old_token_count = state.embd.size();
        state.embd.resize(old_token_count+state.prompt.size()+1);
        // Run tokenizer
        puts("bbbb");
        const auto token_count = llama_tokenize(ctx, prompt.data(), state.embd.data()+old_token_count, state.embd.size()-old_token_count, was_empty);
        state.embd.resize(old_token_count+token_count);
        // Evaluate new tokens
        // TODO: Larger batch size
        printf("aaa %lu+%d=%lu\n", old_token_count, token_count, old_token_count+token_count);
        for (int it = old_token_count; it != old_token_count+token_count; it++) {
            printf("aaa %i %s\n", it, llama_token_to_str(ctx, state.embd.data()[it]));
            llama_eval(ctx, state.embd.data()+it, 1, it, params.n_threads);
        }
    }
    std::string run(std::string_view end) {
        std::scoped_lock L(lock);
        std::string fres;
        // Loop until done
        puts("6");
        bool abort = false;
        while (!abort && !fres.ends_with(end)) {
            // Sample top p and top k
            const auto id = llama_sample_top_p_top_k(ctx, nullptr, 0, params.top_k, params.top_p, params.temp, 1.0f);
            // Add token
            state.embd.push_back(id);
            // Get token as string
            const auto str = llama_token_to_str(ctx, id);
            // Debug
            std::cout << str << std::flush;
            // Append string to function result
            fres.append(str);
            // Evaluate token
            // TODO: Larger batch size
            llama_eval(ctx, state.embd.data()+state.embd.size()-1, 1, state.embd.size()-1, params.n_threads);
        }
        // Return final string
        puts("23");
        state.prompt.append(fres);
        return std::string(fres.data(), fres.size()-end.size());
    }
};It'd be amazing if someone with a bit more knowledge than me could look over this and maybe give me some tips and hints :-) Btw: the code is called from multiple threads, but never at the same time thanks to the locks. Thanks | 
Beta Was this translation helpful? Give feedback.
      
      
          Answered by
          
            niansa
          
      
      
        Mar 23, 2023 
      
    
    Replies: 1 comment
-
| Nevermind my code is correct. I had a space at the end of the prompt. | 
Beta Was this translation helpful? Give feedback.
                  
                    0 replies
                  
                
            
      Answer selected by
        niansa
  
    Sign up for free
    to join this conversation on GitHub.
    Already have an account?
    Sign in to comment
  
        
    
Nevermind my code is correct. I had a space at the end of the prompt.