Skip to content

Commit a893a2e

Browse files
authored
Merge pull request #633 from dzcode-io/feat/translate-name-and-title-fields-using-llm
Feat: translate name and title fields using llm
2 parents a620aeb + 5e07302 commit a893a2e

File tree

13 files changed

+238
-28
lines changed

13 files changed

+238
-28
lines changed

api/README.md

+2-1
Original file line numberDiff line numberDiff line change
@@ -24,8 +24,9 @@ NODE_ENV=development
2424
Keep in mind that you have limited calls to the GitHub API (60 calls per hour). The [FetchService](./api/src/fetch/service.ts) does a great job of caching these calls so it doesn't unnecessarily consume the GitHub API quota. If you wish to extend the limit from 60 to 5000, simply create a [GitHub Personal Access Token](https://github.com/settings/tokens) (make sure it has `Access public repositories` checked), and set it in `./api/.env` like this:
2525

2626
```.env
27-
GITHUB_TOKEN=Paste_Your_Token_Here
27+
GITHUB_TOKEN=Paste_your_token_here
2828
NODE_ENV=development
29+
OPENAI_KEY=Pase_your_key_here
2930
```
3031

3132
**Note:** If the README is still unclear, please create a PR with your suggested changes/additions.

api/oracle-cloud/deploy.ts

+1
Original file line numberDiff line numberDiff line change
@@ -52,6 +52,7 @@ const appPath = "~/app";
5252
const sshPrefix =
5353
"ssh -o StrictHostKeyChecking=no " + (sshKeyPath ? `-i ${sshKeyPath} ` : "") + sshServer + " ";
5454

55+
// todo-ZM: let docker-compose handle deletion of old containers
5556
// Check for existing containers
5657
logs = execSync(sshPrefix + '"sudo docker ps -aq"');
5758

api/package.json

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
"@types/make-fetch-happen": "^10.0.4",
1616
"class-transformer": "^0.5.1",
1717
"class-validator": "^0.14.1",
18+
"class-validator-jsonschema": "^5.0.1",
1819
"cors": "^2.8.5",
1920
"cron": "^3.1.7",
2021
"dotenv": "^16.4.5",

api/src/ai/service.ts

+66
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,66 @@
1+
import { ConfigService } from "src/config/service";
2+
import { LoggerService } from "src/logger/service";
3+
import { Service } from "typedi";
4+
import { targetConstructorToSchema } from "class-validator-jsonschema";
5+
import { FetchService } from "src/fetch/service";
6+
import { ClassConstructor, plainToClass } from "class-transformer";
7+
import { validateSync } from "class-validator";
8+
9+
type AIChat = { role: "user" | "system"; content: string };
10+
11+
type OpenAIResponse = {
12+
choices: Array<{ message: AIChat }>;
13+
};
14+
15+
@Service()
16+
export class AIService {
17+
constructor(
18+
private readonly configService: ConfigService,
19+
private readonly logger: LoggerService,
20+
private readonly fetchService: FetchService,
21+
) {}
22+
23+
public query = async <T extends object>(
24+
payload: AIChat[],
25+
ResponseDto: ClassConstructor<T>,
26+
): Promise<T> => {
27+
const schema = targetConstructorToSchema(ResponseDto);
28+
29+
const payloadWithValidationPrompt: AIChat[] = [
30+
{
31+
role: "system",
32+
content: `system response must strictly follow the schema:\n${JSON.stringify(schema)}`,
33+
},
34+
...payload,
35+
];
36+
37+
const { OPENAI_KEY } = this.configService.env();
38+
39+
// todo: cache response
40+
const res = await this.fetchService.post<OpenAIResponse>(
41+
"https://api.openai.com/v1/chat/completions",
42+
{
43+
headers: { Authorization: `Bearer ${OPENAI_KEY}` },
44+
body: {
45+
model: "gpt-4o",
46+
messages: payloadWithValidationPrompt,
47+
},
48+
},
49+
);
50+
51+
const chatResponseUnchecked = JSON.parse(res.choices[0].message.content) as T;
52+
53+
const output = plainToClass(ResponseDto, chatResponseUnchecked);
54+
const errors = validateSync(output);
55+
56+
if (errors.length > 0)
57+
throw new Error(
58+
`⚠️ Errors in AI response in the following keys:${errors.reduce(
59+
(pV, cV) => (pV += "\n" + cV.property + " : " + JSON.stringify(cV.constraints)),
60+
"",
61+
)}`,
62+
);
63+
64+
return output;
65+
};
66+
}

api/src/config/types.ts

+3
Original file line numberDiff line numberDiff line change
@@ -40,4 +40,7 @@ export class EnvRecord {
4040
}
4141

4242
MEILISEARCH_MASTER_KEY = "default";
43+
44+
@IsString()
45+
OPENAI_KEY = "no-key";
4346
}

api/src/digest/cron.ts

+76-15
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ import { RepositoryRepository } from "src/repository/repository";
1414
import { SearchService } from "src/search/service";
1515
import { Service } from "typedi";
1616
import { TagRepository } from "src/tag/repository";
17+
import { AIService } from "src/ai/service";
18+
import { AIResponseTranslateNameDto, AIResponseTranslateTitleDto } from "./dto";
1719

1820
@Service()
1921
export class DigestCron {
@@ -29,7 +31,8 @@ export class DigestCron {
2931
private readonly contributionsRepository: ContributionRepository,
3032
private readonly contributorsRepository: ContributorRepository,
3133
private readonly searchService: SearchService,
32-
private readonly tagsRepository: TagRepository,
34+
private readonly tagRepository: TagRepository,
35+
private readonly aiService: AIService,
3336
) {
3437
const SentryCronJob = cron.instrumentCron(CronJob, "DigestCron");
3538
new SentryCronJob(
@@ -81,10 +84,29 @@ export class DigestCron {
8184
// or uncomment to skip the cron
8285
// if (Math.random()) return;
8386

87+
const projectTitleSystemPrompt = `user will give you an open-source project name, and you will translate it to Arabic.`;
88+
const contributorNameSystemPrompt = `user will give you an open-source contributor name, and you will translate it to Arabic.
89+
if the name contain both english and arabic only keep the parts related to the language.`;
90+
const issueTitleSystemPrompt = `user will give you an open-source issue/PR title, and you will translate it to Arabic.`;
91+
8492
for (const project of projectsFromDataFolder) {
85-
// todo: call AIService
86-
const name_en = project.name;
87-
const name_ar = `ar ${name_en}`;
93+
let name_en = project.name;
94+
let name_ar = name_en;
95+
96+
try {
97+
const aiRes = await this.aiService.query(
98+
[
99+
{ role: "system", content: projectTitleSystemPrompt },
100+
{ role: "user", content: name_en },
101+
],
102+
AIResponseTranslateNameDto,
103+
);
104+
105+
name_en = aiRes.name_en;
106+
name_ar = aiRes.name_ar;
107+
} catch (error) {
108+
captureException(error, { tags: { type: "CRON" } });
109+
}
88110

89111
const projectEntity: ProjectRow = {
90112
runId,
@@ -94,7 +116,7 @@ export class DigestCron {
94116
};
95117
const [{ id: projectId }] = await this.projectsRepository.upsert(projectEntity);
96118
for (const tagId of project.tags || []) {
97-
await this.tagsRepository.upsert({ id: tagId, runId });
119+
await this.tagRepository.upsert({ id: tagId, runId });
98120
await this.projectsRepository.upsertRelationWithTag({ projectId, tagId, runId });
99121
}
100122
await this.searchService.upsert("project", projectEntity);
@@ -133,9 +155,22 @@ export class DigestCron {
133155

134156
if (githubUser.type !== "User") continue;
135157

136-
// todo: call AIService
137-
const name_en = githubUser.name || githubUser.login;
138-
const name_ar = `ar ${name_en}`;
158+
let name_en = githubUser.name || githubUser.login;
159+
let name_ar = name_en;
160+
try {
161+
const aiRes = await this.aiService.query(
162+
[
163+
{ role: "system", content: contributorNameSystemPrompt },
164+
{ role: "user", content: name_en },
165+
],
166+
AIResponseTranslateNameDto,
167+
);
168+
169+
name_en = aiRes.name_en;
170+
name_ar = aiRes.name_ar;
171+
} catch (error) {
172+
captureException(error, { tags: { type: "CRON" } });
173+
}
139174

140175
const contributorEntity: ContributorRow = {
141176
name_en,
@@ -160,9 +195,22 @@ export class DigestCron {
160195

161196
const type = issue.pull_request ? "PULL_REQUEST" : "ISSUE";
162197

163-
// todo: call AIService
164-
const title_en = issue.title;
165-
const title_ar = `ar ${title_en}`;
198+
let title_en = issue.title;
199+
let title_ar = `ar ${title_en}`;
200+
try {
201+
const aiRes = await this.aiService.query(
202+
[
203+
{ role: "system", content: issueTitleSystemPrompt },
204+
{ role: "user", content: title_en },
205+
],
206+
AIResponseTranslateTitleDto,
207+
);
208+
209+
title_en = aiRes.title_en;
210+
title_ar = aiRes.title_ar;
211+
} catch (error) {
212+
captureException(error, { tags: { type: "CRON" } });
213+
}
166214

167215
const contributionEntity: ContributionRow = {
168216
title_en,
@@ -194,9 +242,22 @@ export class DigestCron {
194242
username: repoContributor.login,
195243
});
196244

197-
// todo: call AIService
198-
const name_en = contributor.name || contributor.login;
199-
const name_ar = `ar ${name_en}`;
245+
let name_en = contributor.name || contributor.login;
246+
let name_ar = `ar ${name_en}`;
247+
try {
248+
const aiRes = await this.aiService.query(
249+
[
250+
{ role: "system", content: contributorNameSystemPrompt },
251+
{ role: "user", content: name_en },
252+
],
253+
AIResponseTranslateNameDto,
254+
);
255+
256+
name_en = aiRes.name_en;
257+
name_ar = aiRes.name_ar;
258+
} catch (error) {
259+
captureException(error, { tags: { type: "CRON" } });
260+
}
200261

201262
const contributorEntity: ContributorRow = {
202263
name_en,
@@ -244,7 +305,7 @@ export class DigestCron {
244305
await this.projectsRepository.deleteAllRelationWithTagButWithRunId(runId);
245306
await this.projectsRepository.deleteAllButWithRunId(runId);
246307

247-
await this.tagsRepository.deleteAllButWithRunId(runId);
308+
await this.tagRepository.deleteAllButWithRunId(runId);
248309

249310
await Promise.all([
250311
this.searchService.deleteAllButWithRunId("project", runId),

api/src/digest/dto.ts

+17
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
import { IsString } from "class-validator";
2+
3+
export class AIResponseTranslateNameDto {
4+
@IsString()
5+
name_en!: string;
6+
7+
@IsString()
8+
name_ar!: string;
9+
}
10+
11+
export class AIResponseTranslateTitleDto {
12+
@IsString()
13+
title_en!: string;
14+
15+
@IsString()
16+
title_ar!: string;
17+
}

api/src/fetch/service.ts

+19-4
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import { defaults } from "make-fetch-happen";
1+
import { defaults, FetchOptions } from "make-fetch-happen";
22
import { ConfigService } from "src/config/service";
33
import { LoggerService } from "src/logger/service";
44
import { Service } from "typedi";
@@ -18,21 +18,36 @@ export class FetchService {
1818
});
1919
}
2020

21+
public post = async <T>(
22+
url: string,
23+
{ headers = {}, body }: FetchConfig = {},
24+
): Promise<Awaited<T>> => {
25+
const response = await this.fetch<T>(url, {
26+
headers: {
27+
"Content-Type": "application/json",
28+
...headers,
29+
},
30+
method: "POST",
31+
body: body ? JSON.stringify(body) : undefined,
32+
});
33+
return response;
34+
};
35+
2136
public get = async <T>(
2237
url: string,
2338
{ params = {}, headers = {} }: FetchConfig = {},
2439
): Promise<Awaited<T>> => {
2540
const _url = new URL(url);
2641
Object.keys(params).forEach((key) => _url.searchParams.append(key, String(params[key])));
2742

28-
const response = await this.fetch<T>(_url.toString(), { headers });
43+
const response = await this.fetch<T>(_url.toString(), { headers, method: "GET" });
2944
return response;
3045
};
3146

3247
private makeFetchHappenInstance;
33-
private async fetch<T>(url: string, { headers }: Omit<FetchConfig, "params"> = {}) {
48+
private async fetch<T>(url: string, options: FetchOptions) {
3449
this.logger.info({ message: `Fetching ${url}` });
35-
const response = await this.makeFetchHappenInstance(url, { headers });
50+
const response = await this.makeFetchHappenInstance(url, options);
3651
const jsonResponse = (await response.json()) as T;
3752
return jsonResponse;
3853
}

api/src/fetch/types.ts

+1
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
export interface FetchConfig {
22
params?: Record<string, string | number | boolean>;
33
headers?: Record<string, string>;
4+
body?: Record<string, unknown>;
45
}

0 commit comments

Comments
 (0)