11package gopdf
22
3- func Reverse (s string ) string {
4- r := []rune (s )
5- for i , j := 0 , len (r )- 1 ; i < len (r )/ 2 ; i , j = i + 1 , j - 1 {
6- r [i ], r [j ] = r [j ], r [i ]
7- }
8- return string (r )
3+ import "strings"
4+
5+ // ALLAH_LIGATURE is the Unicode character for the Allah ligature (U+FDF2 ﷲ)
6+ const ALLAH_LIGATURE rune = 0xFDF2
7+
8+ // convertAllahToLigature replaces the word "الله" (Allah) with the Allah ligature U+FDF2 (ﷲ)
9+ func convertAllahToLigature (text string ) string {
10+ // الله without tashkeel: Alef + Lam + Lam + Heh
11+ allah := string ([]rune {ALEF .Unicode , LAM .Unicode , LAM .Unicode , HEH .Unicode })
12+ // Replace with the Allah ligature character
13+ return strings .ReplaceAll (text , allah , string (ALLAH_LIGATURE ))
914}
1015
16+ // reverseWithTashkeel reverses Arabic text while keeping tashkeel attached to base characters
17+ func reverseWithTashkeel (runes []rune ) string {
18+ if len (runes ) == 0 {
19+ return ""
20+ }
21+
22+ // Group base characters with their following tashkeel
23+ type hrofGroup struct {
24+ base rune
25+ tashkeel []rune
26+ }
27+
28+ var groups []hrofGroup
29+ var currentGroup * hrofGroup
30+
31+ for _ , r := range runes {
32+ if IsTashkeel (r ) {
33+ if currentGroup != nil {
34+ currentGroup .tashkeel = append (currentGroup .tashkeel , r )
35+ }
36+ } else {
37+ groups = append (groups , hrofGroup {base : r })
38+ currentGroup = & groups [len (groups )- 1 ]
39+ }
40+ }
41+
42+ // Reverse the groups and rebuild
43+ // Output tashkeel BEFORE base for proper RTL rendering in PDF
44+ result := make ([]rune , 0 , len (runes ))
45+ for i := len (groups ) - 1 ; i >= 0 ; i -- {
46+ result = append (result , groups [i ].tashkeel ... )
47+ result = append (result , groups [i ].base )
48+ }
49+ return string (result )
50+
51+ }
1152func getHarf (char rune ) Harf {
12- for _ , s := range arabic_alphabet {
53+ for _ , s := range arabicAlphabet {
1354 if s .equals (char ) {
1455 return s
1556 }
@@ -42,15 +83,15 @@ func getCharShape(previousChar, currentChar, nextChar rune) rune {
4283 nextArabic := false
4384 previousArabic := false
4485
45- if _ , ok := arabic_alphabet [previousChar ]; ok {
86+ if _ , ok := arabicAlphabet [previousChar ]; ok {
4687 previousArabic = true
4788 }
4889
49- if _ , ok := arabic_alphabet [nextChar ]; ok {
90+ if _ , ok := arabicAlphabet [nextChar ]; ok {
5091 nextArabic = true
5192 }
5293
53- if _ , ok := arabic_alphabet [currentChar ]; ! ok {
94+ if _ , ok := arabicAlphabet [currentChar ]; ! ok {
5495 return shape
5596 }
5697
@@ -83,8 +124,34 @@ func getCharShape(previousChar, currentChar, nextChar rune) rune {
83124 return shape
84125}
85126
127+ // findPreviousNonTashkeel finds the previous character that is not a tashkeel mark
128+ func findPreviousNonTashkeelHarf (runes []rune , currentIndex int ) rune {
129+ for i := currentIndex - 1 ; i >= 0 ; i -- {
130+ if ! IsTashkeel (runes [i ]) {
131+ return runes [i ]
132+ }
133+ }
134+ return 0
135+ }
136+
137+ // findNextNonTashkeel finds the next character that is not a tashkeel mark
138+ func findNextNonTashkeelHarf (runes []rune , currentIndex int ) rune {
139+ for i := currentIndex + 1 ; i < len (runes ); i ++ {
140+ if ! IsTashkeel (runes [i ]) {
141+ return runes [i ]
142+ }
143+ }
144+ return 0
145+ }
146+
147+ // IsTashkeel returns true if the rune is an Arabic diacritical mark
148+ func IsTashkeel (r rune ) bool {
149+ return tashkeelMarks [r ]
150+ }
151+
86152func ToArabic (text string ) string {
87- var nextHarf , previousHarf rune
153+ // Preprocess: convert "الله" to the Allah ligature U+FDF2 (ﷲ)
154+ text = convertAllahToLigature (text )
88155
89156 hrof := []rune (text ) // hrof is arabic letters
90157 hrofLength := len (hrof ) // hrof length is the number of arabic letters
@@ -93,17 +160,24 @@ func ToArabic(text string) string {
93160 for i := 0 ; i < hrofLength ; i ++ {
94161 currentHarf := hrof [i ]
95162
96- if i == 0 {
97- previousHarf = 0
98- } else {
99- previousHarf = hrof [i - 1 ]
163+ // If current char is tashkeel
164+ if IsTashkeel (currentHarf ) {
165+ // Check if vowel followed by SHADDA - output combined ligature
166+ if i + 1 < hrofLength && hrof [i + 1 ] == SHADDA && currentHarf != SHADDA {
167+ if ligature := GetShaddaLigature (currentHarf ); ligature != 0 {
168+ arabicSentence = append (arabicSentence , ligature )
169+ i ++ // skip the shadda we already added
170+ continue
171+ }
172+ }
173+ arabicSentence = append (arabicSentence , currentHarf )
174+ continue
100175 }
176+ // Find previous non-tashkeel character
177+ previousHarf := findPreviousNonTashkeelHarf (hrof , i )
101178
102- if i == hrofLength - 1 {
103- nextHarf = 0
104- } else {
105- nextHarf = hrof [i + 1 ]
106- }
179+ // Find next non-tashkeel character
180+ nextHarf := findNextNonTashkeelHarf (hrof , i )
107181
108182 // Lam-Alef Ligature Check
109183 if currentHarf == LAM .Unicode && nextHarf != 0 {
@@ -119,19 +193,26 @@ func ToArabic(text string) string {
119193 }
120194 if foundLigature {
121195 currentHarf = ligatureHarf
122- i ++
123- // We need to update nextHarf to the one *after* the Alef for correct shaping of the ligature itself
124- if i == hrofLength - 1 {
125- nextHarf = 0
126- } else {
127- nextHarf = hrof [ i + 1 ]
196+ // Collect tashkeel between Lam and Alef
197+ var tashkeelBetween [] rune
198+ for i ++ ; i < hrofLength && hrof [ i ] != nextHarf ; i ++ {
199+ if IsTashkeel ( hrof [ i ]) {
200+ tashkeelBetween = append ( tashkeelBetween , hrof [ i ])
201+ }
128202 }
203+ nextHarf = findNextNonTashkeelHarf (hrof , i )
204+
205+ // Append ligature shape first, then tashkeel (so tashkeel attaches to ligature after reversal)
206+ harfShape := getCharShape (previousHarf , currentHarf , nextHarf )
207+ arabicSentence = append (arabicSentence , harfShape )
208+ arabicSentence = append (arabicSentence , tashkeelBetween ... )
209+ continue
129210 }
130211 }
131212
132213 harfShape := getCharShape (previousHarf , currentHarf , nextHarf )
133214 arabicSentence = append (arabicSentence , harfShape )
134215 }
135- arabicSentenceRTL := Reverse ( string ( arabicSentence ) )
216+ arabicSentenceRTL := reverseWithTashkeel ( arabicSentence )
136217 return arabicSentenceRTL
137218}
0 commit comments