diff --git a/.gitignore b/.gitignore
index 596e85d..08b4cb3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,4 +1,5 @@
 /.venv
 .DS_Store
 /output
-/__pycache__
+__pycache__
+.env
\ No newline at end of file
diff --git a/genmoji/.env.sample b/genmoji/.env.sample
new file mode 100644
index 0000000..900692b
--- /dev/null
+++ b/genmoji/.env.sample
@@ -0,0 +1 @@
+HF_TOKEN=
\ No newline at end of file
diff --git a/genmoji/domain/__init__.py b/genmoji/domain/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/metaprompt/open-genmoji.md b/genmoji/domain/prompts.py
similarity index 79%
rename from metaprompt/open-genmoji.md
rename to genmoji/domain/prompts.py
index 4fb7382..638c60d 100644
--- a/metaprompt/open-genmoji.md
+++ b/genmoji/domain/prompts.py
@@ -1,4 +1,4 @@
-You are helping create a prompt for a Emoji generation image model. An emoji must be easily interpreted when small so details must be exaggerated to be clear. Your goal is to use descriptions to achieve this.
+SYSTEM_PROMPT = """You are helping create a prompt for a Emoji generation image model. An emoji must be easily interpreted when small so details must be exaggerated to be clear. Your goal is to use descriptions to achieve this.
 
 You will receive a user description, and you must rephrase it to consist of short phrases separated by periods, adding detail to everything the user provides.
 
@@ -16,4 +16,4 @@
 - "head is turned towards viewer.": ONLY humans or animals
 - "detailed texture.": ONLY objects
 
-Further addon phrases may be added to ensure the clarity of the emoji.
+Further addon phrases may be added to ensure the clarity of the emoji."""
\ No newline at end of file
diff --git a/genmoji/domain/schemas.py b/genmoji/domain/schemas.py
new file mode 100644
index 0000000..575874b
--- /dev/null
+++ b/genmoji/domain/schemas.py
@@ -0,0 +1,17 @@
+from pydantic import BaseModel
+from typing import Optional
+
+
+class DownloadModelRequest(BaseModel):
+    huggingface_repo: str
+    model_name: str
+
+
+class GenerationRequest(BaseModel):
+    prompt: str
+    lora: Optional[str] = "flux-dev"
+    llm_model: Optional[str] = "llama3.1:latest"
+    direct: Optional[bool] = False
+    height: Optional[int] = 160
+    width: Optional[int] = 160
+    upscale_factor: Optional[int] = 5
\ No newline at end of file
diff --git a/download.py b/genmoji/download.py
similarity index 100%
rename from download.py
rename to genmoji/download.py
diff --git a/genmoji.py b/genmoji/genmoji.py
similarity index 93%
rename from genmoji.py
rename to genmoji/genmoji.py
index 422c4c5..769c57b 100644
--- a/genmoji.py
+++ b/genmoji/genmoji.py
@@ -1,13 +1,13 @@
-import sys
-from promptAssistant import get_prompt_response
-from generateImage import generate_image
-from PIL import Image
-import os
 import argparse
 import json
+import os
+import sys
+from PIL import Image
+from utils.generate_image import generate_image
+from utils.llm_utils import model_inference
 
 
-def get_unique_path(base_path):
+def get_unique_path(base_path: str) -> str:
     directory = os.path.dirname(base_path)
     filename = os.path.basename(base_path)
     name, ext = os.path.splitext(filename)
@@ -31,6 +31,7 @@ def main(
     upscale_factor: int,
     output_path: str = "output/genmoji.png",
     lora: str = "flux-dev",
+    llm_model: str = "llama3.1:latest"
 ):
     with open("./lora/info.json", "r") as f:
         models = json.load(f)
@@ -57,7 +58,7 @@ def main(
             sys.exit(1)
         if not direct:
             # Get the response from the prompt assistant
-            prompt_response = get_prompt_response(user_prompt, metaprompt)
+            prompt_response = model_inference(user_prompt, llm_model).get("message")
             print("Prompt Created: " + prompt_response)
         elif direct:
             prompt_response = user_prompt
diff --git a/genmoji/main.py b/genmoji/main.py
new file mode 100644
index 0000000..ac37da4
--- /dev/null
+++ b/genmoji/main.py
@@ -0,0 +1,105 @@
+import io
+import os
+from dotenv import load_dotenv
+from fastapi import FastAPI, HTTPException
+from fastapi.responses import JSONResponse, StreamingResponse
+from huggingface_hub import hf_hub_download, login
+from PIL import Image
+from domain.schemas import DownloadModelRequest, GenerationRequest
+from utils.generate_image import generate_image
+from utils.llm_utils import list_installed_llms, model_inference
+from utils.logger import logger
+
+load_dotenv()
+
+app = FastAPI()
+login(token=os.getenv("HF_TOKEN"))
+
+
+@app.post("/download_model")
+async def download_model(input_data: DownloadModelRequest) -> JSONResponse:
+    """
+    Downloads model from huggingface
+    """
+    try:
+        filename = f"{input_data.model_name}.safetensors"
+        hf_hub_download(
+            repo_id=input_data.huggingface_repo, filename=filename, local_dir="./lora"
+        )
+        return JSONResponse(content={"response": f"Download {input_data.model_name} complete"})
+    except Exception as e:
+        logger.error(f"Error downloading model: {e}")
+        logger.exception(e)
+        return JSONResponse(content={"response": f"Error downloading model: {e}"}, status_code=500)
+
+
+@app.get("/installed_img_genmodels")
+async def get_installed_img_gen_models() -> JSONResponse:
+    """
+    Get the list of installed image gen models
+    """
+    try:
+        models = os.listdir("lora/")
+        models = [model.replace(".safetensors", "") for model in models if not model.startswith(".") and "safetensors" in model]
+        return JSONResponse(content={"models": models})
+    except Exception as e:
+        logger.error(f"Error listing models: {e}")
+        logger.exception(e)
+        return JSONResponse(content=f"Error listing models: {e}", status_code=500)
+
+
+@app.get("/installed_llms")
+async def get_installed_llms() -> JSONResponse:
+    """
+    Get the list of installed llms from ollama
+    """
+    try:
+        return JSONResponse(content={"models": list_installed_llms()})
+    except Exception as e:
+        logger.error(f"Error listing models: {e}")
+        logger.exception(e)
+        return JSONResponse(content=f"Error listing models: {e}", status_code=500)
+
+
+@app.post("/inference")
+async def inference(input_data: GenerationRequest) -> StreamingResponse:
+    """
+    Perform model inference to generate an emoji. Uses Ollama to handle LLM inference for lora prompt generation
+    """
+    # Check if the lora file exists
+    lora_path = f"lora/{input_data.lora}.safetensors"
+    if not os.path.exists(lora_path):
+        raise HTTPException(
+            status_code=404,
+            detail=f"Error: LoRA {input_data.lora} is not downloaded. Please run use the /download_model endpoint to download it.")
+    
+    if input_data.direct:
+        user_prompt = input_data.prompt
+    else:
+        # Get the response from the prompt assistant
+        user_prompt = model_inference(
+            user_prompt=input_data.prompt,
+            model_name=input_data.llm_model
+        ).get("message")
+        if "i cannot" in user_prompt.lower() or "i can't" in user_prompt.lower():
+            logger.warning("Refusal detected from LLM prompt enhancement. Using raw prompt input, generation may be lacking.")
+            user_prompt = input_data.prompt
+        else:
+            logger.info("Prompt Created: " + user_prompt)
+
+    # Generate the image using the response from the prompt assistant
+    image = generate_image(user_prompt, input_data.lora, input_data.width, input_data.height)
+
+    output_width, output_height = image.size
+    resized_image = image.resize(
+        (
+            output_width * input_data.upscale_factor,
+            output_height * input_data.upscale_factor
+        ),
+        Image.LANCZOS)
+
+    img_io = io.BytesIO()
+    resized_image.save(img_io, 'PNG')
+    img_io.seek(0)
+
+    return StreamingResponse(img_io, media_type="image/png")
\ No newline at end of file
diff --git a/genmoji/requirements.txt b/genmoji/requirements.txt
new file mode 100644
index 0000000..1831f2b
--- /dev/null
+++ b/genmoji/requirements.txt
@@ -0,0 +1,6 @@
+fastapi==0.115.6
+python-dotenv==1.0.1
+huggingface-hub==0.27.1
+mflux==0.5.1
+ollama==0.4.5
+uvicorn==0.34.0
\ No newline at end of file
diff --git a/genmoji/utils/__init__.py b/genmoji/utils/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/generateImage.py b/genmoji/utils/generate_image.py
similarity index 79%
rename from generateImage.py
rename to genmoji/utils/generate_image.py
index 33426e2..aa39963 100644
--- a/generateImage.py
+++ b/genmoji/utils/generate_image.py
@@ -1,5 +1,5 @@
-from mflux import Flux1, Config, ModelConfig
 import os
+from mflux import Config, Flux1, ModelConfig
 
 
 def generate_image(prompt: str, lora: str, width: int, height: int):
@@ -8,7 +8,7 @@ def generate_image(prompt: str, lora: str, width: int, height: int):
         model_config=ModelConfig.FLUX1_DEV,
         quantize=8,
         lora_paths=[
-            f"{os.path.abspath(os.path.dirname(__file__))}/lora/{lora}.safetensors"
+            f"{os.path.abspath(os.path.dirname(__file__))}/../lora/{lora}.safetensors"
         ],
         lora_scales=[1.0],
     )
diff --git a/genmoji/utils/llm_utils.py b/genmoji/utils/llm_utils.py
new file mode 100644
index 0000000..2f31ffd
--- /dev/null
+++ b/genmoji/utils/llm_utils.py
@@ -0,0 +1,52 @@
+import os
+from ollama import Client, Options, Message, ResponseError
+from typing import Dict, List
+from domain.prompts import SYSTEM_PROMPT
+
+
+OLLAMA_HOST_DEFAULT="http://localhost:11434"
+OLLAMA_HOST = os.getenv("OLLAMA_HOST")
+client = Client(host=OLLAMA_HOST)
+
+
+def model_inference(
+        user_prompt: str,
+        model_name: str,
+        max_output: int = 128,
+        temperature: float = 0.7
+) -> Dict[str, str]:
+    try:
+        # prime conversation with system prompt and few shot
+        messages = [
+            Message(role="system", content=SYSTEM_PROMPT),
+            Message(role="user", content="a horse wearing a suit"),
+            Message(role="assistant", content="emoji of horse in black suit and tie with flowing mane. a strong, confident stallion wearing formal attire for a special occasion. cute. 3D lighting. no cast shadows. enlarged head in cartoon style. head is turned towards viewer."),
+            Message(role="user", content="flying pig"),
+            Message(role="assistant", content="emoji of flying pink pig. enlarged head in cartoon style. cute. white wings. head is turned towards viewer. 3D lighting. no cast shadows."),
+            Message(role="user", content=user_prompt)
+        ]
+        response = client.chat(
+            model=model_name,
+            messages=messages,
+            options=Options(temperature=temperature, num_predict=max_output))
+
+        return {"message": response.get("message").content}
+    except ResponseError as e:
+        if e.status_code == 404:
+            return {"message": f"Model {model_name} not found"}
+        return {"message": f"Error performing inference: {e.error}"}
+    except Exception as e:
+        return {"message": f"Error performing inference: {e}"}
+
+
+def list_installed_llms() -> List[str]:
+    models = client.list()
+
+    return [
+        {
+            "model_name": model.get("model"),
+            "family": model.get("details", {}).get("family"),
+            "param_size": model.get("details", {}).get("parameter_size"),
+        }
+        for model in models.get("models")
+    ]
\ No newline at end of file
diff --git a/genmoji/utils/logger.py b/genmoji/utils/logger.py
new file mode 100644
index 0000000..6510ae8
--- /dev/null
+++ b/genmoji/utils/logger.py
@@ -0,0 +1,25 @@
+import logging
+
+
+def setup_logger(name: str) -> logging.Logger:
+    """
+    Sets up a logger instance with a set log format
+    """
+    logger = logging.getLogger(name)
+    logger.setLevel(logging.INFO)
+
+    # Create a formatter for the log messages
+    formatter = logging.Formatter('%(asctime)s [%(levelname)s] %(message)s')
+
+    # Create a console handler for the log messages
+    console_handler = logging.StreamHandler()
+    console_handler.setLevel(logging.INFO)
+    console_handler.setFormatter(formatter)
+
+    # Add the handlers to the logger
+    # logger.addHandler(file_handler)
+    logger.addHandler(console_handler)
+    return logger
+
+
+logger = setup_logger(__name__)
\ No newline at end of file
diff --git a/metaprompt/open-genmoji.json b/metaprompt/open-genmoji.json
deleted file mode 100644
index 66b6ee2..0000000
--- a/metaprompt/open-genmoji.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "messages": [
-    { "role": "user", "content": "You are helping create a prompt for a Emoji generation image model. An emoji must be easily interpreted when small so details must be exaggerated to be clear. Your goal is to use descriptions to achieve this.\n\nYou will receive a user description, and you must rephrase it to consist of short phrases separated by periods, adding detail to everything the user provides.\n\nAdd describe the color of all parts or components of the emoji. Unless otherwise specified by the user, do not describe people. Do not describe the background of the image. Your output should be in the format:\n\n```\nemoji of {description}. {addon phrases}. 3D lighting. no cast shadows.\n```\n\nThe description should be a 1 sentence of your interpretation of the emoji.\nThen, you may choose to add addon phrases. You must use the following in the given scenarios:\n\n- \"cute.\": If generating anything that's not an object, and also not a human\n- \"enlarged head in cartoon style.\": ONLY animals\n- \"head is turned towards viewer.\": ONLY humans or animals\n- \"detailed texture.\": ONLY objects\n\nFurther addon phrases may be added to ensure the clarity of the emoji.\n\n\n USER PROMPT: a horse wearing a suit"},
-    { "role": "assistant", "content": "emoji of horse in black suit and tie with flowing mane. a strong, confident stallion wearing formal attire for a special occasion. cute. 3D lighting. no cast shadows. enlarged head in cartoon style. head is turned towards viewer." },
-    { "role": "user", "content": "You are helping create a prompt for a Emoji generation image model. An emoji must be easily interpreted when small so details must be exaggerated to be clear. Your goal is to use descriptions to achieve this.\n\nYou will receive a user description, and you must rephrase it to consist of short phrases separated by periods, adding detail to everything the user provides.\n\nAdd describe the color of all parts or components of the emoji. Unless otherwise specified by the user, do not describe people. Do not describe the background of the image. Your output should be in the format:\n\n```\nemoji of {description}. {addon phrases}. 3D lighting. no cast shadows.\n```\n\nThe description should be a 1 sentence of your interpretation of the emoji.\nThen, you may choose to add addon phrases. You must use the following in the given scenarios:\n\n- \"cute.\": If generating anything that's not an object, and also not a human\n- \"enlarged head in cartoon style.\": ONLY animals\n- \"head is turned towards viewer.\": ONLY humans or animals\n- \"detailed texture.\": ONLY objects\n\nFurther addon phrases may be added to ensure the clarity of the emoji.\n\n\n USER PROMPT: flying pig"},
-    { "role": "assistant", "content": "emoji of flying pink pig. enlarged head in cartoon style. cute. white wings. head is turned towards viewer. 3D lighting. no cast shadows." }
-  ]
-}
\ No newline at end of file
diff --git a/promptAssistant.py b/promptAssistant.py
deleted file mode 100644
index c7b03c1..0000000
--- a/promptAssistant.py
+++ /dev/null
@@ -1,60 +0,0 @@
-import requests
-import json
-import os
-
-
-def get_prompt_response(user_prompt: str, metaprompt: str) -> str:
-    # The URL where the local server is running
-    url = "http://localhost:1234/v1/chat/completions"
-
-    # The headers to indicate that we are sending JSON data
-    headers = {"Content-Type": "application/json"}
-
-    # The JSON data payload
-    # Read the content from METAPROMPT.md
-    with open(
-        f"{os.path.abspath(os.path.dirname(__file__))}/metaprompt/{metaprompt}.md", "r"
-    ) as file:
-        prompt_content = file.read()
-
-    # Append the user prompt
-    full_prompt = prompt_content + f'\n\nUSER PROMPT: "{user_prompt}"'
-
-    # get the (pre-made) conversation history and append the current full prompt
-    json_path = f"{os.path.abspath(os.path.dirname(__file__))}/metaprompt/{metaprompt}.json"
-
-    try:
-        with open(json_path, "r") as json_file:
-            conversation_history = json.load(json_file)
-            print("Using pre-existing conversation history")
-    except FileNotFoundError:
-        print("No pre existing conversation history")
-        conversation_history = {"messages": []}
-
-    conversation_history["messages"].append({"role": "user", "content": full_prompt})
-
-    data = {
-        "messages": conversation_history["messages"],
-        "temperature": 0.7,
-        "max_tokens": -1,
-        "stream": False,
-    }
-
-    # Making the POST request to the local server
-    response = requests.post(url, headers=headers, data=json.dumps(data))
-
-    # Checking if the request was successful
-    if response.status_code == 200:
-        # Returning the response content
-        return (
-            response.json()
-            .get("choices")[0]
-            .get("message")
-            .get("content")
-            .replace("```", "")
-            .replace("\n", "")
-        )
-    else:
-        raise Exception(
-            f"Failed to get response: {response.status_code}, {response.text}"
-        )
diff --git a/resize.py b/resize.py
deleted file mode 100644
index 0bc10b2..0000000
--- a/resize.py
+++ /dev/null
@@ -1,36 +0,0 @@
-from PIL import Image
-import os
-import sys
-
-
-def get_resized_filename(input_path):
-    """Generate output filename by adding '-resized' before extension"""
-    base, ext = os.path.splitext(input_path)
-    return f"{base}-resized{ext}"
-
-
-def resize_image(input_path, scale=5):
-    """Resize image by given scale factor using Lanczos resampling"""
-    try:
-        # Open and resize image
-        with Image.open(input_path) as img:
-            width, height = img.size
-            new_img = img.resize((width * scale, height * scale), Image.LANCZOS)
-
-            # Save resized image
-            output_path = get_resized_filename(input_path)
-            new_img.save(output_path)
-            print(f"Saved resized image to: {output_path}")
-
-    except Exception as e:
-        print(f"Error processing image: {e}")
-        sys.exit(1)
-
-
-if __name__ == "__main__":
-    if len(sys.argv) != 2:
-        print("Usage: python resize_image.py <image_path>")
-        sys.exit(1)
-
-    input_path = sys.argv[1]
-    resize_image(input_path)