diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 0000000..9d866e3 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,11 @@ +# To get started with Dependabot version updates, you'll need to specify which +# package ecosystems to update and where the package manifests are located. +# Please see the documentation for all configuration options: +# https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file + +version: 2 +updates: + - package-ecosystem: "pip" # See documentation for possible values + directory: "/" # Location of package manifests + schedule: + interval: "weekly" diff --git a/.github/workflows/python-package.yml b/.github/workflows/python-package.yml index 0fe9439..efe7b51 100644 --- a/.github/workflows/python-package.yml +++ b/.github/workflows/python-package.yml @@ -12,21 +12,23 @@ on: jobs: build: - runs-on: ubuntu-latest strategy: fail-fast: false matrix: + os: [macos-latest, windows-latest, ubuntu-latest] python-version: ["3.10", "3.11", "3.12"] - + runs-on: ${{ matrix.os }} + steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v3 + uses: actions/setup-python@v5 with: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | python -m pip install --upgrade pip + echo "psycopg-binary" >> requirements.txt pip install -e .[test] - name: Lint with flake8 run: | diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml new file mode 100644 index 0000000..15e93dd --- /dev/null +++ b/.github/workflows/python-publish.yml @@ -0,0 +1,39 @@ +# This workflow will upload a Python Package using Twine when a release is created +# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python#publishing-to-package-registries + +# This workflow uses actions that are not certified by GitHub. +# They are provided by a third-party and are governed by +# separate terms of service, privacy policy, and support +# documentation. + +name: Upload Python Package + +on: + release: + types: [published] + +permissions: + contents: read + +jobs: + deploy: + + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - name: Set up Python + uses: actions/setup-python@v5 + with: + python-version: '3.x' + - name: Install dependencies + run: | + python -m pip install --upgrade pip + pip install build + - name: Build package + run: python -m build + - name: Publish package + uses: pypa/gh-action-pypi-publish@27b31702a0e7fc50959f5ad993c78deac1bdfc29 + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..b40afae --- /dev/null +++ b/LICENSE @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2024 José Miguel Cordero Carvacho + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. diff --git a/prompt2map/providers/openai.py b/prompt2map/providers/openai.py index 86c0c94..ee422a1 100644 --- a/prompt2map/providers/openai.py +++ b/prompt2map/providers/openai.py @@ -1,7 +1,8 @@ import inspect import json import logging -from typing import Any, Callable, Optional, TypeVar +from typing import Any, Callable, Iterable, Literal, Optional, TypeVar +from duckdb import DEFAULT import jsonlines import numpy as np from openai.types import Batch @@ -12,28 +13,30 @@ T = TypeVar('T') +DEFAULT_EMBEDDING_MODEL = "text-embedding-3-small" +DEFAULT_LLM_MODEL = "gpt-4o" -def generate_openai_embedding_request(id: int, text: str) -> dict: +def generate_openai_embedding_request(id: int, text: str, model: str = DEFAULT_EMBEDDING_MODEL) -> dict: return { "custom_id": f"embedding_request_{id}", "method": "POST", "url": "/v1/embeddings", "body": { "input": text, - "model": "text-embedding-3-small", + "model": model, } } -def generate_openai_completion_request(id: int, system_prompt: Optional[str] = None, user_prompt: Optional[str] = None) -> dict: +def generate_openai_completion_request(id: int, max_tokens: int, system_prompt: Optional[str] = None, user_prompt: Optional[str] = None, model: str = DEFAULT_LLM_MODEL) -> dict: messages = get_messages(system_prompt, user_prompt) return { "custom_id": f"request-{id}", "method": "POST", "url": "/v1/chat/completions", "body": { - "model": "gpt-4o", + "model": DEFAULT_LLM_MODEL, "messages": messages, - "max_tokens": 10 + "max_tokens": max_tokens } } def get_messages(system_prompt: Optional[str] = None, user_prompt: Optional[str] = None) -> list[dict[str, str]]: @@ -47,7 +50,7 @@ def get_messages(system_prompt: Optional[str] = None, user_prompt: Optional[str] return messages class OpenAIProvider(LLM, Embedding): - def __init__(self, model_name: str = "gpt-4o", embedding_model_name: str = 'text-embedding-3-small', api_key: Optional[str] = None) -> None: + def __init__(self, model_name: str = DEFAULT_LLM_MODEL, embedding_model_name: str = DEFAULT_EMBEDDING_MODEL, api_key: Optional[str] = None) -> None: self.logger = logging.getLogger(self.__class__.__name__) self.client = OpenAI(api_key=api_key) self.model_name = model_name @@ -112,7 +115,7 @@ def get_embedding(self, text: str) -> np.ndarray: return np.array(embedding) - def send_batch_embedding(self, requests, input_file_name: str) -> str: + def send_batch(self, requests: Iterable[dict], input_file_name: str, endpoint: Literal["/v1/chat/completions", "/v1/embeddings", "/v1/completions"]) -> str: # Write the requests to a jsonl file with jsonlines.open(input_file_name, mode='w') as writer: writer.write_all(requests) @@ -126,7 +129,7 @@ def send_batch_embedding(self, requests, input_file_name: str) -> str: # Send the batch to OpenAI openai_batch = self.client.batches.create( input_file_id=batch_input_file.id, - endpoint="/v1/embeddings", + endpoint=endpoint, completion_window="24h", metadata={ "description": "testing", diff --git a/pyproject.toml b/pyproject.toml index 92fe25e..db851b0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,14 +7,24 @@ name = "prompt2map" description = "Dynamic maps generation based on natural language prompts using Retrieval-Augmented Generation (RAG)" readme = "README.md" requires-python = ">=3.10" -authors = [ - { name = "José Miguel Cordero Carvacho" } -] -license = { text = "Copyright (c) 2024 José Miguel Cordero Carvacho" } +authors = [{ name = "José Miguel Cordero Carvacho", email="josemcorderoc@gmail.com" }] +maintainers = [{ name = "José Miguel Cordero Carvacho", email="josemcorderoc@gmail.com" }] +license = {text = "MIT License"} +keywords = ["maps", "mapping", "cartography", "gis", "webgis", "geospatial", "llm", "nlp", "prompt"] classifiers = [ + "Development Status :: 2 - Pre-Alpha", + + "Intended Audience :: Science/Research", + "Topic :: Scientific/Engineering :: GIS", + + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", - "License :: Other/Proprietary License", - "Operating System :: OS Independent" + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + + "Operating System :: OS Independent", ] dynamic = ["dependencies", "version"] @@ -23,13 +33,13 @@ dependencies = {file = ["requirements.txt"]} [project.optional-dependencies] test = [ - "pytest==8.3.3", - "pytest-mock==3.14.0", - "flake8==7.1.0" + "pytest~=8.3", + "pytest-mock~=3.14", + "flake8~=7.1" ] [project.urls] -Homepage = "https://github.com/josemcorderoc/prompt2map" +Repository = "https://github.com/josemcorderoc/prompt2map" [tool.setuptools.packages.find] exclude = ["tests", "examples"] diff --git a/requirements.txt b/requirements.txt index cd68c3d..9a03b4f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,18 +1,18 @@ -bidict==0.23.1 -click==8.1.7 -folium==0.17.0 -geopandas==1.0.1 -jsonlines==4.0.0 -matplotlib==3.9.1 -pandas==2.2.2 -plotly==5.23.0 -psycopg==3.2.1 -Shapely==2.0.5 -SQLAlchemy==2.0.31 -sqlglot==25.8.1 -sqlparse==0.5.1 -openai==1.38.0 -mapclassify==2.8.0 -duckdb==1.1.0 -pyarrow==17.0.0 -typing-extensions==4.12.2 +bidict~=0.23 +click~=8.1 +folium~=0.17 +geopandas~=1.0 +jsonlines~=4.0 +matplotlib~=3.9 +pandas~=2.2 +plotly~=5.23 +psycopg~=3.2 +Shapely~=2.0 +SQLAlchemy~=2.0 +sqlglot~=25.8 +sqlparse~=0.5 +openai~=1.38 +mapclassify~=2.8 +duckdb~=1.1 +typing-extensions~=4.12 +pyarrow~=17.0