Skip to content

Commit 2dcf2ba

Browse files
committed
Merge branch 'AmmrFX-master'
2 parents 22c1bfe + d8add56 commit 2dcf2ba

5 files changed

Lines changed: 350 additions & 43 deletions

File tree

arabic_alphabet.go

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,67 @@ type Harf struct {
55
Unicode, Isolated, Beginning, Middle, Final rune
66
}
77

8+
const (
9+
FATHA rune = '\u064E' // َ (short a)
10+
DAMMA rune = '\u064F' // ُ (short u)
11+
KASRA rune = '\u0650' // ِ (short i)
12+
SUKUN rune = '\u0652' // ْ (no vowel)
13+
SHADDA rune = '\u0651' // ّ (gemination/doubling)
14+
15+
// Tanween (nutation)
16+
TANWEEN_FATH rune = '\u064B' // ً (an)
17+
TANWEEN_DAMM rune = '\u064C' // ٌ (un)
18+
TANWEEN_KASR rune = '\u064D' // ٍ (in)
19+
20+
// Quranic / Extended marks
21+
SUPERSCRIPT_ALEF rune = '\u0670' // ٰ (dagger alef)
22+
MADDAH_ABOVE rune = '\u0653' // ٓ (maddah)
23+
HAMZA_ABOVE rune = '\u0654' // ٔ (hamza above)
24+
HAMZA_BELOW rune = '\u0655' // ٕ (hamza below)
25+
SUBSCRIPT_ALEF rune = '\u0656' // ٖ (subscript alef)
26+
INVERTED_DAMMA rune = '\u0657' // ٗ (inverted damma)
27+
MARK_NOON_GHUNNA rune = '\u0658' // ٘ (noon ghunna)
28+
29+
// Shadda + Vowel Ligatures (Arabic Presentation Forms-B)
30+
SHADDA_FATHA rune = '\uFC60' // ﱠ
31+
SHADDA_DAMMA rune = '\uFC61' // ﱡ
32+
SHADDA_KASRA rune = '\uFC62' // ﱢ
33+
SHADDA_DAMMATAN rune = '\uFC5E' // ﱞ (Shadda + Tanween Damm)
34+
SHADDA_KASRATAN rune = '\uFC5F' // ﱟ (Shadda + Tanween Kasr)
35+
SHADDA_SUPERSCRIPT_ALEF rune = '\uFC63' // ﱣ
36+
)
37+
38+
var tashkeelMarks = map[rune]bool{
39+
// Basic
40+
FATHA: true, DAMMA: true, KASRA: true,
41+
SHADDA: true, SUKUN: true,
42+
43+
// Tanween
44+
TANWEEN_DAMM: true, TANWEEN_FATH: true, TANWEEN_KASR: true,
45+
46+
// Quranic/Extended
47+
SUPERSCRIPT_ALEF: true, MADDAH_ABOVE: true,
48+
HAMZA_ABOVE: true, HAMZA_BELOW: true,
49+
SUBSCRIPT_ALEF: true, INVERTED_DAMMA: true,
50+
MARK_NOON_GHUNNA: true,
51+
}
52+
53+
// shaddaLigatures maps vowels to their combined Shadda+Vowel ligature form
54+
var shaddaLigatures = map[rune]rune{
55+
FATHA: SHADDA_FATHA,
56+
DAMMA: SHADDA_DAMMA,
57+
KASRA: SHADDA_KASRA,
58+
TANWEEN_DAMM: SHADDA_DAMMATAN,
59+
TANWEEN_KASR: SHADDA_KASRATAN,
60+
SUPERSCRIPT_ALEF: SHADDA_SUPERSCRIPT_ALEF,
61+
}
62+
63+
// GetShaddaLigature returns the combined Shadda+Vowel ligature for a given vowel.
64+
// Returns 0 if no ligature exists for the vowel.
65+
func GetShaddaLigature(vowel rune) rune {
66+
return shaddaLigatures[vowel]
67+
}
68+
869
// Arabic Alphabet using the new Harf type.
970
var (
1071
ALEF_HAMZA_ABOVE = Harf{ // أ
@@ -325,7 +386,7 @@ var (
325386
Final: '\ufef8'}
326387
)
327388

328-
var arabic_alphabet = map[rune]Harf{}
389+
var arabicAlphabet = map[rune]Harf{}
329390

330391
var arabicAlphabetCollection = []Harf{
331392
ALEF_HAMZA_ABOVE,
@@ -378,11 +439,11 @@ var arabicAlphabetCollection = []Harf{
378439
func init() {
379440
for _, harf := range arabicAlphabetCollection {
380441
// Map all forms to the Harf struct
381-
arabic_alphabet[harf.Unicode] = harf
382-
arabic_alphabet[harf.Isolated] = harf
383-
arabic_alphabet[harf.Beginning] = harf
384-
arabic_alphabet[harf.Middle] = harf
385-
arabic_alphabet[harf.Final] = harf
442+
arabicAlphabet[harf.Unicode] = harf
443+
arabicAlphabet[harf.Isolated] = harf
444+
arabicAlphabet[harf.Beginning] = harf
445+
arabicAlphabet[harf.Middle] = harf
446+
arabicAlphabet[harf.Final] = harf
386447
}
387448
}
388449

arabic_helper.go

Lines changed: 108 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,56 @@
11
package gopdf
22

3-
func Reverse(s string) string {
4-
r := []rune(s)
5-
for i, j := 0, len(r)-1; i < len(r)/2; i, j = i+1, j-1 {
6-
r[i], r[j] = r[j], r[i]
7-
}
8-
return string(r)
3+
import "strings"
4+
5+
// ALLAH_LIGATURE is the Unicode character for the Allah ligature (U+FDF2 ﷲ)
6+
const ALLAH_LIGATURE rune = 0xFDF2
7+
8+
// convertAllahToLigature replaces the word "الله" (Allah) with the Allah ligature U+FDF2 (ﷲ)
9+
func convertAllahToLigature(text string) string {
10+
// الله without tashkeel: Alef + Lam + Lam + Heh
11+
allah := string([]rune{ALEF.Unicode, LAM.Unicode, LAM.Unicode, HEH.Unicode})
12+
// Replace with the Allah ligature character
13+
return strings.ReplaceAll(text, allah, string(ALLAH_LIGATURE))
914
}
1015

16+
// reverseWithTashkeel reverses Arabic text while keeping tashkeel attached to base characters
17+
func reverseWithTashkeel(runes []rune) string {
18+
if len(runes) == 0 {
19+
return ""
20+
}
21+
22+
// Group base characters with their following tashkeel
23+
type hrofGroup struct {
24+
base rune
25+
tashkeel []rune
26+
}
27+
28+
var groups []hrofGroup
29+
var currentGroup *hrofGroup
30+
31+
for _, r := range runes {
32+
if IsTashkeel(r) {
33+
if currentGroup != nil {
34+
currentGroup.tashkeel = append(currentGroup.tashkeel, r)
35+
}
36+
} else {
37+
groups = append(groups, hrofGroup{base: r})
38+
currentGroup = &groups[len(groups)-1]
39+
}
40+
}
41+
42+
// Reverse the groups and rebuild
43+
// Output tashkeel BEFORE base for proper RTL rendering in PDF
44+
result := make([]rune, 0, len(runes))
45+
for i := len(groups) - 1; i >= 0; i-- {
46+
result = append(result, groups[i].tashkeel...)
47+
result = append(result, groups[i].base)
48+
}
49+
return string(result)
50+
51+
}
1152
func getHarf(char rune) Harf {
12-
for _, s := range arabic_alphabet {
53+
for _, s := range arabicAlphabet {
1354
if s.equals(char) {
1455
return s
1556
}
@@ -42,15 +83,15 @@ func getCharShape(previousChar, currentChar, nextChar rune) rune {
4283
nextArabic := false
4384
previousArabic := false
4485

45-
if _, ok := arabic_alphabet[previousChar]; ok {
86+
if _, ok := arabicAlphabet[previousChar]; ok {
4687
previousArabic = true
4788
}
4889

49-
if _, ok := arabic_alphabet[nextChar]; ok {
90+
if _, ok := arabicAlphabet[nextChar]; ok {
5091
nextArabic = true
5192
}
5293

53-
if _, ok := arabic_alphabet[currentChar]; !ok {
94+
if _, ok := arabicAlphabet[currentChar]; !ok {
5495
return shape
5596
}
5697

@@ -83,8 +124,34 @@ func getCharShape(previousChar, currentChar, nextChar rune) rune {
83124
return shape
84125
}
85126

127+
// findPreviousNonTashkeel finds the previous character that is not a tashkeel mark
128+
func findPreviousNonTashkeelHarf(runes []rune, currentIndex int) rune {
129+
for i := currentIndex - 1; i >= 0; i-- {
130+
if !IsTashkeel(runes[i]) {
131+
return runes[i]
132+
}
133+
}
134+
return 0
135+
}
136+
137+
// findNextNonTashkeel finds the next character that is not a tashkeel mark
138+
func findNextNonTashkeelHarf(runes []rune, currentIndex int) rune {
139+
for i := currentIndex + 1; i < len(runes); i++ {
140+
if !IsTashkeel(runes[i]) {
141+
return runes[i]
142+
}
143+
}
144+
return 0
145+
}
146+
147+
// IsTashkeel returns true if the rune is an Arabic diacritical mark
148+
func IsTashkeel(r rune) bool {
149+
return tashkeelMarks[r]
150+
}
151+
86152
func ToArabic(text string) string {
87-
var nextHarf, previousHarf rune
153+
// Preprocess: convert "الله" to the Allah ligature U+FDF2 (ﷲ)
154+
text = convertAllahToLigature(text)
88155

89156
hrof := []rune(text) // hrof is arabic letters
90157
hrofLength := len(hrof) // hrof length is the number of arabic letters
@@ -93,17 +160,24 @@ func ToArabic(text string) string {
93160
for i := 0; i < hrofLength; i++ {
94161
currentHarf := hrof[i]
95162

96-
if i == 0 {
97-
previousHarf = 0
98-
} else {
99-
previousHarf = hrof[i-1]
163+
// If current char is tashkeel
164+
if IsTashkeel(currentHarf) {
165+
// Check if vowel followed by SHADDA - output combined ligature
166+
if i+1 < hrofLength && hrof[i+1] == SHADDA && currentHarf != SHADDA {
167+
if ligature := GetShaddaLigature(currentHarf); ligature != 0 {
168+
arabicSentence = append(arabicSentence, ligature)
169+
i++ // skip the shadda we already added
170+
continue
171+
}
172+
}
173+
arabicSentence = append(arabicSentence, currentHarf)
174+
continue
100175
}
176+
// Find previous non-tashkeel character
177+
previousHarf := findPreviousNonTashkeelHarf(hrof, i)
101178

102-
if i == hrofLength-1 {
103-
nextHarf = 0
104-
} else {
105-
nextHarf = hrof[i+1]
106-
}
179+
// Find next non-tashkeel character
180+
nextHarf := findNextNonTashkeelHarf(hrof, i)
107181

108182
// Lam-Alef Ligature Check
109183
if currentHarf == LAM.Unicode && nextHarf != 0 {
@@ -119,19 +193,26 @@ func ToArabic(text string) string {
119193
}
120194
if foundLigature {
121195
currentHarf = ligatureHarf
122-
i++
123-
// We need to update nextHarf to the one *after* the Alef for correct shaping of the ligature itself
124-
if i == hrofLength-1 {
125-
nextHarf = 0
126-
} else {
127-
nextHarf = hrof[i+1]
196+
// Collect tashkeel between Lam and Alef
197+
var tashkeelBetween []rune
198+
for i++; i < hrofLength && hrof[i] != nextHarf; i++ {
199+
if IsTashkeel(hrof[i]) {
200+
tashkeelBetween = append(tashkeelBetween, hrof[i])
201+
}
128202
}
203+
nextHarf = findNextNonTashkeelHarf(hrof, i)
204+
205+
// Append ligature shape first, then tashkeel (so tashkeel attaches to ligature after reversal)
206+
harfShape := getCharShape(previousHarf, currentHarf, nextHarf)
207+
arabicSentence = append(arabicSentence, harfShape)
208+
arabicSentence = append(arabicSentence, tashkeelBetween...)
209+
continue
129210
}
130211
}
131212

132213
harfShape := getCharShape(previousHarf, currentHarf, nextHarf)
133214
arabicSentence = append(arabicSentence, harfShape)
134215
}
135-
arabicSentenceRTL := Reverse(string(arabicSentence))
216+
arabicSentenceRTL := reverseWithTashkeel(arabicSentence)
136217
return arabicSentenceRTL
137218
}

0 commit comments

Comments
 (0)