@@ -309,17 +309,20 @@ composeHangulLV marr lv t i = do
309309insertIntoRegBuf :: Char -> RegBuf -> RegBuf
310310insertIntoRegBuf c = \ case
311311 RegOne c0
312- | UC. combiningClass c < UC. combiningClass c0
313- -> {-# SCC insertIntoRegBuf_one_before #-} RegMany c c0 []
314- | otherwise
315- -> {-# SCC insertIntoRegBuf_one_after #-} RegMany c0 c []
312+ | UC. combiningClass c < UC. combiningClass c0 ->
313+ {-# SCC insertIntoRegBuf_one_before #-}
314+ RegMany c c0 []
315+ | otherwise ->
316+ {-# SCC insertIntoRegBuf_one_after #-}
317+ RegMany c0 c []
316318 RegMany c0 c1 cs
317- | cc < UC. combiningClass c0
318- -> {-# SCC insertIntoRegBuf_many_first #-} RegMany c c0 (c1 : cs)
319- | cc < UC. combiningClass c1
320- -> {-# SCC insertIntoRegBuf_many_second #-} RegMany c0 c (c1 : cs)
321- | otherwise
322- ->
319+ | cc < UC. combiningClass c0 ->
320+ {-# SCC insertIntoRegBuf_many_first #-}
321+ RegMany c c0 (c1 : cs)
322+ | cc < UC. combiningClass c1 ->
323+ {-# SCC insertIntoRegBuf_many_second #-}
324+ RegMany c0 c (c1 : cs)
325+ | otherwise ->
323326 {-# SCC insertIntoRegBuf_many_other #-}
324327 RegMany c0 c1 (cs' ++ (c : cs''))
325328 where
@@ -359,7 +362,7 @@ writeRegBuf marr i = \case
359362 where
360363 cc = UC. combiningClass c
361364 (same, bigger) = span ((== cc) . UC. combiningClass) cs
362- _ -> writeStr marr i (st : uncs)
365+ [] -> writeStr marr i (st : uncs)
363366
364367--
365368-- Composition
@@ -372,138 +375,130 @@ composeChar
372375 -> A. MArray s -- ^ Destination array for composition
373376 -> Char -- ^ Input char
374377 -> Int -- ^ Array index
375- -> ComposeState
378+ -> ComposeState -- ^ Compose state
376379 -> ST s (Int , ComposeState )
377- composeChar mode marr = go0
380+ composeChar mode marr ch0 ! i0 ! st0 = case st0 of
381+ -- Pending starter, QC = Yes
382+ ComposeStarter s -> {-# SCC compose_YesStarter #-} case quickCheck ch0 of
383+ -- QC = Yes, starter (includes Jamo L & Hangul syllables),
384+ -- may decompose, may compose with next
385+ QC. YesStarter -> {-# SCC compose_YesStarter_YesStarter #-} do
386+ n <- unsafeWrite marr i0 s
387+ pure (i0 + n, ComposeStarter ch0)
388+ -- QC = Yes or Maybe, combining, not decomposable
389+ QC. Combining
390+ -- Pending decomposition
391+ | UC. isDecomposable mode s ->
392+ {-# SCC compose_YesStarter_Combining_decomp #-}
393+ go (UC. decompose mode s ++ [ch0]) i0 ComposeNone
394+ -- Starter + combining
395+ | otherwise ->
396+ {-# SCC compose_YesStarter_Combining_nodecomp #-}
397+ pure (i0, ComposeReg (RegMany s ch0 [] ))
398+ -- QC = No or Maybe, decomposable
399+ QC. Decomposable
400+ -- Pending decomposition
401+ | UC. isDecomposable mode s ->
402+ {-# SCC compose_YesStarter_Decomposable_decomp #-}
403+ go (UC. decompose mode s ++ UC. decompose mode ch0) i0 ComposeNone
404+ -- Starter + decomposable
405+ | otherwise ->
406+ {-# SCC compose_YesStarter_Decomposable_nodecomp #-}
407+ go (UC. decompose mode ch0) i0 st0
408+ -- QC = Maybe, starter, not decomposable
409+ _
410+ -- Pending decomposition
411+ | UC. isDecomposable mode s ->
412+ {-# SCC compose_YesStarter_other_decomp_starter #-}
413+ go (UC. decompose mode s ++ [ch0]) i0 ComposeNone
414+ -- Jamo V or T
415+ | UC. isJamo ch0 -> {-# SCC compose_YesStarter_other_jamo #-} if
416+ -- Jamo L + jamo V
417+ | UC. jamoLFirst <= cp && cp <= UC. jamoLLast &&
418+ UC. jamoVFirst <= ich0 && ich0 <= UC. jamoVLast ->
419+ pure (i0, composeJamoL s ch0)
420+ -- Hangul LV + T
421+ | UC. isHangul s && UC. isHangulLV s &&
422+ UC. jamoTFirst < ich0 && ich0 <= UC. jamoTLast ->
423+ composeHangulLV marr s ch0 i0
424+ -- Cannot compose: flush buffer
425+ | otherwise -> do
426+ n1 <- unsafeWrite marr i0 s
427+ n2 <- unsafeWrite marr (i0 + n1) ch0
428+ pure (i0 + n1 + n2, ComposeNone )
429+ -- Combining starter
430+ | UC. isCombiningStarter ch0
431+ , Just x <- UC. composeStarters s ch0 ->
432+ {-# SCC compose_YesStarter_other_composable #-}
433+ pure (i0, ComposeReg (RegOne x))
434+ -- Two non-composable starters
435+ | otherwise -> {-# SCC compose_YesStarter_other_other #-} do
436+ n <- unsafeWrite marr i0 s
437+ pure (i0 + n, ComposeReg (RegOne ch0))
438+ where cp = ord s
439+ -- Pending composable string
440+ ComposeReg rbuf -> {-# SCC compose_Reg #-} case quickCheck ch0 of
441+ -- QC = Yes, starter (includes Jamo L & Hangul syllables),
442+ -- may decompose, may compose with next
443+ QC. YesStarter -> {-# SCC compose_reg_YesStarter #-} do
444+ j <- writeRegBuf marr i0 rbuf
445+ pure (j, ComposeStarter ch0)
446+ -- QC = Yes or Maybe, combining, not decomposable
447+ QC. Combining ->
448+ {-# SCC compose_reg_Combining #-}
449+ pure (i0, ComposeReg (insertIntoRegBuf ch0 rbuf))
450+ -- QC = No or Maybe, decomposable
451+ QC. Decomposable ->
452+ {-# SCC compose_reg_Decomposable #-}
453+ go (UC. decompose mode ch0) i0 st0
454+ -- QC = Maybe, starter, not decomposable
455+ _
456+ -- Combining starter
457+ -- The first char in RegBuf may or may not be a starter. In
458+ -- case it is not we rely on composeStarters failing.
459+ | RegOne s <- rbuf
460+ , UC. isCombiningStarter ch0
461+ , Just x <- UC. composeStarters s ch0 ->
462+ {-# SCC compose_reg_composable #-}
463+ pure (i0, ComposeReg (RegOne x))
464+ -- Jamo V or T
465+ | UC. isJamo ch0 -> {-# SCC compose_reg_jamo #-} do
466+ j <- writeRegBuf marr i0 rbuf
467+ n <- unsafeWrite marr j ch0
468+ pure (j + n, ComposeNone )
469+ -- Cannot compose: flush buffer
470+ | otherwise -> {-# SCC compose_reg_other #-} do
471+ j <- writeRegBuf marr i0 rbuf
472+ pure (j, ComposeReg (RegOne ch0))
473+ -- Empty buffer
474+ ComposeNone -> {-# SCC compose_None #-} case quickCheck ch0 of
475+ -- QC = Yes, starter (includes Jamo L & Hangul syllables),
476+ -- may decompose, may compose with next
477+ QC. YesStarter ->
478+ {-# SCC compose_none_YesStarter #-}
479+ pure (i0, ComposeStarter ch0)
480+ -- QC = No or Maybe, decomposable
481+ QC. Decomposable ->
482+ {-# SCC compose_none_Decomposable #-}
483+ go (UC. decompose mode ch0) i0 st0
484+ -- QC = Yes (combining) or Maybe (any), not decomposable
485+ _
486+ -- Jamo V or T
487+ | UC. isJamo ch0 -> {-# SCC compose_none_other_jamo #-} do
488+ n <- unsafeWrite marr i0 ch0
489+ pure (i0 + n, ComposeNone )
490+ -- Starter or combining
491+ | otherwise ->
492+ {-# SCC compose_none_other_other #-}
493+ pure (i0, ComposeReg (RegOne ch0))
378494
379495 where
496+
497+ ich0 = ord ch0
380498 quickCheck = case mode of
381499 UC. Canonical -> QC. isNFC_QC
382500 UC. Kompat -> QC. isNFKC_QC
383501
384- -- ch: input char
385- -- i: array index
386- -- st: compose state
387-
388- -- Start normalization with initial compose state
389- go0 ch ! i ! st =
390- case st of
391- -- Pending starter, QC = Yes
392- ComposeStarter s -> {-# SCC compose_YesStarter #-} case quickCheck ch of
393- -- QC = Yes, starter (includes Jamo L & Hangul syllables),
394- -- may decompose, may compose with next
395- QC. YesStarter -> {-# SCC compose_YesStarter_YesStarter #-} do
396- n <- unsafeWrite marr i s
397- pure (i + n, ComposeStarter ch)
398- -- QC = Yes or Maybe, combining, not decomposable
399- QC. Combining
400- -- Pending decomposition
401- | UC. isDecomposable mode s ->
402- {-# SCC compose_YesStarter_Combining_decomp #-}
403- go (UC. decompose mode s ++ [ch]) i ComposeNone
404- -- Starter + combining
405- | otherwise ->
406- {-# SCC compose_YesStarter_Combining_nodecomp #-}
407- pure (i, ComposeReg (RegMany s ch [] ))
408- -- QC = No or Maybe, decomposable
409- QC. Decomposable
410- -- Pending decomposition
411- | UC. isDecomposable mode s ->
412- {-# SCC compose_YesStarter_Decomposable_decomp #-}
413- go (UC. decompose mode s ++ UC. decompose mode ch) i ComposeNone
414- -- Starter + decomposable
415- | otherwise ->
416- {-# SCC compose_YesStarter_Decomposable_nodecomp #-}
417- go (UC. decompose mode ch) i st
418- -- QC = Maybe, starter, not decomposable
419- _
420- -- Pending decomposition
421- | UC. isDecomposable mode s ->
422- {-# SCC compose_YesStarter_other_decomp_starter #-}
423- go (UC. decompose mode s ++ [ch]) i ComposeNone
424- -- Jamo V or T
425- | UC. isJamo ch -> {-# SCC compose_YesStarter_other_jamo #-} if
426- -- Jamo L + jamo V
427- | UC. jamoLFirst <= cp && cp <= UC. jamoLLast &&
428- UC. jamoVFirst <= ich && ich <= UC. jamoVLast ->
429- pure (i, composeJamoL s ch)
430- -- Hangul LV + T
431- | UC. isHangul s && UC. isHangulLV s &&
432- UC. jamoTFirst < ich && ich <= UC. jamoTLast ->
433- composeHangulLV marr s ch i
434- -- Cannot compose: flush buffer
435- | otherwise -> do
436- n1 <- unsafeWrite marr i s
437- n2 <- unsafeWrite marr (i + n1) ch
438- pure (i + n1 + n2, ComposeNone )
439- -- Combining starter
440- | UC. isCombiningStarter ch
441- , Just x <- UC. composeStarters s ch ->
442- {-# SCC compose_YesStarter_other_composable #-}
443- pure (i, ComposeReg (RegOne x))
444- -- Two non-composable starters
445- | otherwise -> {-# SCC compose_YesStarter_other_other #-}do
446- n <- unsafeWrite marr i s
447- pure (i + n, ComposeReg (RegOne ch))
448- where cp = ord s
449- -- Pending composable string
450- ComposeReg rbuf -> {-# SCC compose_Reg #-} case quickCheck ch of
451- -- QC = Yes, starter (includes Jamo L & Hangul syllables),
452- -- may decompose, may compose with next
453- QC. YesStarter -> {-# SCC compose_reg_YesStarter #-} do
454- j <- writeRegBuf marr i rbuf
455- pure (j, ComposeStarter ch)
456- -- QC = Yes or Maybe, combining, not decomposable
457- QC. Combining ->
458- {-# SCC compose_reg_Combining #-}
459- pure (i, ComposeReg (insertIntoRegBuf ch rbuf))
460- -- QC = No or Maybe, decomposable
461- QC. Decomposable ->
462- {-# SCC compose_reg_Decomposable #-}
463- go (UC. decompose mode ch) i st
464- -- QC = Maybe, starter, not decomposable
465- _
466- -- Combining starter
467- -- The first char in RegBuf may or may not be a starter. In
468- -- case it is not we rely on composeStarters failing.
469- | RegOne s <- rbuf
470- , UC. isCombiningStarter ch
471- , Just x <- UC. composeStarters s ch ->
472- {-# SCC compose_reg_composable #-}
473- pure (i, ComposeReg (RegOne x))
474- -- Jamo V or T
475- | UC. isJamo ch -> {-# SCC compose_reg_jamo #-} do
476- j <- writeRegBuf marr i rbuf
477- n <- unsafeWrite marr j ch
478- pure (j + n, ComposeNone )
479- -- Cannot compose: flush buffer
480- | otherwise -> {-# SCC compose_reg_other #-} do
481- j <- writeRegBuf marr i rbuf
482- pure (j, ComposeReg (RegOne ch))
483- -- Empty buffer
484- ComposeNone -> {-# SCC compose_None #-} case quickCheck ch of
485- -- QC = Yes, starter (includes Jamo L & Hangul syllables),
486- -- may decompose, may compose with next
487- QC. YesStarter ->
488- {-# SCC compose_none_YesStarter #-}
489- pure (i, ComposeStarter ch)
490- -- QC = No or Maybe, decomposable
491- QC. Decomposable ->
492- {-# SCC compose_none_Decomposable #-}
493- go (UC. decompose mode ch) i st
494- -- QC = Yes (combining) or Maybe (any), not decomposable
495- _
496- -- Jamo V or T
497- | UC. isJamo ch -> {-# SCC compose_none_other_jamo #-} do
498- n <- unsafeWrite marr i ch
499- pure (i + n, ComposeNone )
500- -- Starter or combining
501- | otherwise ->
502- {-# SCC compose_none_other_other #-}
503- pure (i, ComposeReg (RegOne ch))
504-
505- where ich = ord ch
506-
507502 -- Recursive decomposition
508503 go [] ! i ! st = pure (i, st)
509504 go (ch : rest) i st =
0 commit comments