Evref-BL · alesshosry · Mar 10, 2026 · Feb 26, 2026
diff --git a/src/TreeSitter-Highlighter/TSHighlighter.class.st b/src/TreeSitter-Highlighter/TSHighlighter.class.st
@@ -53,7 +53,7 @@ TSHighlighter >> highlight: aString [
 	text := aString asText.
 	string := aString.
 	tree := self parser parseString: aString.
-	self highlight: text usingNode: tree rootNode.
+	self highlight: text withPlatformLineEndings usingNode: tree rootNode.
 	^ text
 ]
 

diff --git a/src/TreeSitter/String.extension.st b/src/TreeSitter/String.extension.st
@@ -2,89 +2,38 @@ Extension { #name : 'String' }
 
 { #category : '*TreeSitter' }
 String >> positionFromTSPoint: aTSPoint [
-	"Use me to convert a TSPoint to the position in the original string"
-
-	"Ideally this method (and #ts* methods on String) should be removed because it does not manage the encoding. On top of that, they are too slow because they do too much things while we could directly manipulate a ByteArray or BinaryStream and use #startByte and #endByte to read or get the positions."
-
-	| sum currentLine lines |
-	sum := aTSPoint column.
-	currentLine := 0.
-	lines := self tsLines.
-	[ currentLine < aTSPoint row ] whileTrue: [
-			sum := sum + (lines at: currentLine + 1) size.
-			currentLine := currentLine + 1 ].
-	^ sum
-]
-
-{ #category : '*TreeSitter' }
-String >> tsLineIndicesDo: aBlock [
-	"execute aBlock with 3 arguments for each line:
-	- start index of line
-	- end index of line without line delimiter
-	- end index of line including line delimiter(s) CR, LF or CRLF"
-
-
-	"Ideally this method (and #ts* methods on String) should be removed because it does not manage the encoding. On top of that, they are too slow because they do too much things while we could directly manipulate a ByteArray or BinaryStream and use #startByte and #endByte to read or get the positions."
-
-	| cr lf start sz nextLF nextCR |
-	start := 1.
-	sz := self size.
-	cr := Character cr.
-	nextCR := self indexOf: cr startingAt: 1.
-	lf := Character lf.
-	nextLF := self indexOf: lf startingAt: 1.
-	sz = 0
-		ifTrue: [ aBlock value: sz value: sz value: sz.
-			^ self ].
-	[ start <= sz ]
-		whileTrue: [ (nextLF = 0 and: [ nextCR = 0 ])
-				ifTrue: [ "No more CR, nor LF, the string is over"
-					aBlock value: start value: sz value: sz.
-					^ self ].
-			(nextCR = 0 or: [ 0 < nextLF and: [ nextLF < nextCR ] ])
-				ifTrue: [ "Found a LF"
-					aBlock value: start value: nextLF - 1 value: nextLF.
-					start := 1 + nextLF.
-					nextLF := self indexOf: lf startingAt: start ]
-				ifFalse: [ 1 + nextCR = nextLF
-						ifTrue: [ "Found a CR-LF pair"
-							aBlock value: start value: nextCR - 1 value: nextLF.
-							start := 1 + nextLF.
-							nextCR := self indexOf: cr startingAt: start.
-							nextLF := self indexOf: lf startingAt: start ]
-						ifFalse: [ "Found a CR"
-							aBlock value: start value: nextCR - 1 value: nextCR.
-							start := 1 + nextCR.
-							nextCR := self indexOf: cr startingAt: start ] ] ].
-	aBlock value: start value: sz value: sz
+
+    ^ self positionFromTSPoint: aTSPoint usingEncoding: #utf8
 ]
 
 { #category : '*TreeSitter' }
-String >> tsLines [
-	"Same as lines but empty line exist"
+String >> positionFromTSPoint: aTSPoint usingEncoding: anEncoding [
 
+	"This method is used to convert a TSPoint to the position in the original string"
+	"It is specifically used in the TSHighliter to make it compatible with inspectionFASTSourceCode: of FASTEntity"
 
-	"Ideally this method (and #ts* methods on String) should be removed because it does not manage the encoding. On top of that, they are too slow because they do too much things while we could directly manipulate a ByteArray or BinaryStream and use #startByte and #endByte to read or get the positions."
-
-	^ Array
-		  new: (self size // 60 max: 16)
-		  streamContents: [ :lines |
-		  self tsLinesDo: [ :aLine | lines nextPut: aLine ] ]
-]
-
-{ #category : '*TreeSitter' }
-String >> tsLinesDo: aBlock [
-	"Same as linesDo but empty line exist"
-
-
-	"Ideally this method (and #ts* methods on String) should be removed because it does not manage the encoding. On top of that, they are too slow because they do too much things while we could directly manipulate a ByteArray or BinaryStream and use #startByte and #endByte to read or get the positions."
-
-	self
-		tsLineIndicesDo: [ :start :endWithoutDelimiters :end |
-			| begin |
-			"endWithoutDelimiters = start
-				ifTrue: [ aBlock value: '' ]
-				ifFalse: [" 
-					begin := (start = 0) ifTrue: [ 1 ] ifFalse: [ start ].  
-					aBlock value: (self copyFrom: begin to: end) "]" ]
+	| bytes currentRow index |
+
+    bytes := self encodeWith: anEncoding. "converting cod e to bytes;"
+
+    currentRow := 0.
+    index := 1.
+
+    [ currentRow < aTSPoint row ] whileTrue: [
+        index > bytes size ifTrue: [
+            self error: 'Row exceeds number of lines'
+        ].
+
+        (bytes at: index) = 10 ifTrue: [ "apparently 10 is the byte value of \n in UTF-8 (and ASCII); but this is risky if the encoding is not utf8"
+            currentRow := currentRow + 1
+        ].
+
+        index := index + 1.
+    ].
+
+    (index - 1 + aTSPoint column) > bytes size ifTrue: [
+        self error: 'Column exceeds line length'
+    ].
+
+    ^ index - 1 + aTSPoint column
 ]
diff --git a/src/TreeSitter/TSLibrary.class.st b/src/TreeSitter/TSLibrary.class.st
@@ -208,17 +208,17 @@ TSLibrary >> ts_parser: aParser _parse_string: aString ofLength: length usingOld
 ]
 
 { #category : 'parser' }
-TSLibrary >> ts_parser: aTSParser _print_dot_graphs: fd [
-
+TSLibrary >> ts_parser: aParser _parse_string: aString ofLength: length usingOldTree: anOldTree encoding: anEncoding [
+	 
 	^ self ffiCall:
-		  'void ts_parser_print_dot_graphs (TSParser * aTSParser, int fd )'
+		  'TSTree *ts_parser_parse_string_encoding(TSParser * aParser, const TSTree * anOldTree, const char * aString, uint32 length, TSInputEncoding anEncoding)'
 ]
 
 { #category : 'parser' }
-TSLibrary >> ts_parser: aParser _parse_string: aString ofLength: length usingOldTree: anOldTree encoding: anEncoding [
-	 
+TSLibrary >> ts_parser: aTSParser _print_dot_graphs: fd [
+
 	^ self ffiCall:
-		  'TSTree *ts_parser_parse_string_encoding(TSParser * aParser, const TSTree * anOldTree, const char * aString, uint32 length, TSInputEncoding anEncoding)'
+		  'void ts_parser_print_dot_graphs (TSParser * aTSParser, int fd )'
 ]
 
 { #category : 'parser' }

diff --git a/src/TreeSitter/TSParser.class.st b/src/TreeSitter/TSParser.class.st
@@ -126,15 +126,6 @@ TSParser >> parseString: aString usingTree: aTree pharoEncoding: anEncoding [
 		  usingOldTree: aTree
 ]
 
-{ #category : 'parsing' }
-TSParser >> printDotGraphTo: aFileDescriptor [
-	"-1 for no"
-
-	^ TSLibrary uniqueInstance
-		  ts_parser: self
-		  _print_dot_graphs: aFileDescriptor
-]
-
 { #category : 'parsing' }
 TSParser >> parseString: aString usingTree: aTree tsEncoding: anEncoding [
 
@@ -150,6 +141,15 @@ TSParser >> parseString: aString usingTree: aTree tsEncoding: anEncoding [
 		  encoding: anEncoding
 ]
 
+{ #category : 'parsing' }
+TSParser >> printDotGraphTo: aFileDescriptor [
+	"-1 for no"
+
+	^ TSLibrary uniqueInstance
+		  ts_parser: self
+		  _print_dot_graphs: aFileDescriptor
+]
+
 { #category : 'initialization' }
 TSParser >> reset [