@@ -585,6 +585,59 @@ HRESULT Library_corlib_native_System_String::ToCharArray(CLR_RT_StackFrame &stac
585
585
NANOCLR_NOCLEANUP ();
586
586
}
587
587
588
+ // Helper function for comparing UTF-8 substrings
589
+ bool MatchString (CLR_RT_UnicodeHelper &inputIter, const char *searchStr, int searchCharLen)
590
+ {
591
+ // Create copies to preserve original iterator state
592
+ CLR_RT_UnicodeHelper inputCopy = inputIter;
593
+ CLR_RT_UnicodeHelper searchIter;
594
+ searchIter.SetInputUTF8 (searchStr);
595
+
596
+ for (int i = 0 ; i < searchCharLen; i++)
597
+ {
598
+ CLR_UINT16 bufInput[3 ] = {0 };
599
+ CLR_UINT16 bufSearch[3 ] = {0 };
600
+
601
+ // Set up buffers for character conversion
602
+ inputCopy.m_outputUTF16 = bufInput;
603
+ inputCopy.m_outputUTF16_size = MAXSTRLEN (bufInput);
604
+ searchIter.m_outputUTF16 = bufSearch;
605
+ searchIter.m_outputUTF16_size = MAXSTRLEN (bufSearch);
606
+
607
+ // Convert next character from input
608
+ if (!inputCopy.ConvertFromUTF8 (1 , false ))
609
+ {
610
+ // Input ended prematurely
611
+ return false ;
612
+ }
613
+
614
+ // Convert next character from search string
615
+ if (!searchIter.ConvertFromUTF8 (1 , false ))
616
+ {
617
+ // Shouldn't happen for valid search string
618
+ return false ;
619
+ }
620
+
621
+ // Compare first UTF-16 code unit
622
+ if (bufInput[0 ] != bufSearch[0 ])
623
+ {
624
+ return false ;
625
+ }
626
+
627
+ // Handle surrogate pairs (4-byte UTF-8 sequences)
628
+ if (bufInput[0 ] >= 0xD800 && bufInput[0 ] <= 0xDBFF )
629
+ {
630
+ // High surrogate
631
+ if (bufInput[1 ] != bufSearch[1 ])
632
+ {
633
+ // Low surrogate mismatch
634
+ return false ;
635
+ }
636
+ }
637
+ }
638
+ return true ;
639
+ }
640
+
588
641
HRESULT Library_corlib_native_System_String::IndexOf (CLR_RT_StackFrame &stack, int mode)
589
642
{
590
643
NATIVE_PROFILE_CLR_CORE ();
@@ -594,8 +647,8 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
594
647
int startIndex;
595
648
int count;
596
649
int pos;
597
- const char *pString;
598
- const CLR_UINT16 *pChars;
650
+ const char *pString = NULL ;
651
+ const CLR_UINT16 *pChars = NULL ;
599
652
int iChars = 0 ;
600
653
CLR_RT_UnicodeHelper inputIterator;
601
654
int inputLen;
@@ -605,8 +658,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
605
658
if (!szText)
606
659
szText = " " ;
607
660
pos = -1 ;
608
- pString = NULL ;
609
- pChars = NULL ;
610
661
611
662
if (mode & c_IndexOf__SingleChar)
612
663
{
@@ -617,23 +668,20 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
617
668
{
618
669
CLR_RT_HeapBlock_Array *array = stack.Arg1 ().DereferenceArray ();
619
670
FAULT_ON_NULL (array);
620
-
621
671
pChars = (const CLR_UINT16 *)array->GetFirstElement ();
622
672
iChars = array->m_numOfElements ;
623
673
}
624
674
else if (mode & c_IndexOf__String)
625
675
{
626
676
pString = stack.Arg1 ().RecoverString ();
627
677
FAULT_ON_NULL (pString);
628
- // how long is the search string?
629
678
inputIterator.SetInputUTF8 (pString);
630
679
searchLen = inputIterator.CountNumberOfCharacters ();
631
680
}
632
681
633
- // calculate input string length
682
+ // Calculate input length
634
683
inputIterator.SetInputUTF8 (szText);
635
684
inputLen = inputIterator.CountNumberOfCharacters ();
636
-
637
685
if (0 == inputLen)
638
686
{
639
687
pos = -1 ;
@@ -647,7 +695,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
647
695
}
648
696
else
649
697
{
650
- // for mode LastIndex... we are searching backwards toward the start of the string
651
698
if (mode & c_IndexOf__Last)
652
699
{
653
700
startIndex = inputLen - 1 ;
@@ -663,49 +710,53 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
663
710
NANOCLR_SET_AND_LEAVE (CLR_E_OUT_OF_RANGE);
664
711
665
712
// for mode LastIndex... with string we move the start index back by the search string length -1
713
+ // if we search forward
666
714
if ((mode & c_IndexOf__String_Last) == c_IndexOf__String_Last)
667
715
{
668
716
startIndex -= searchLen - 1 ;
669
- // check the start index; if not in range skip the search
717
+ // check the start index; if not in range, skip the search
670
718
if (startIndex < 0 || startIndex > inputLen)
719
+ {
671
720
goto Exit;
721
+ }
672
722
}
673
723
674
724
// calculate the iteration count
675
725
if (mode & c_IndexOf__Count)
676
726
{
677
- // count form parameter
727
+ // count (from parameter)
678
728
count = stack.Arg3 ().NumericByRefConst ().s4 ;
679
729
}
680
730
else
681
731
{
682
732
// for mode LastIndex... we are searching from start index backwards toward the start of the string
683
733
if (mode & c_IndexOf__Last)
684
734
{
685
- // backward until the start of string
686
- // one more time than the startIndex because we should iterate until zero
735
+ // backwards until the start of the string
736
+ // one position ahead of the startIndex because we should iterate until position zero
687
737
count = startIndex + 1 ;
688
738
}
689
739
else
690
740
{
691
- // forward until the end of string
741
+ // move forward until reaching the end of the string
692
742
count = inputLen - startIndex;
693
743
}
694
744
}
695
745
696
- // for mode with string we reduce the count by the search string length -1
697
- // if we search foreward
746
+ // forward search with index of string mode: adjust the count by the search string length -1
698
747
if ((mode & c_IndexOf__String_Last) == c_IndexOf__String)
699
748
{
700
749
count -= searchLen - 1 ;
701
750
}
702
751
703
- // check the count
752
+ // validate count
704
753
if (mode & c_IndexOf__Last)
705
754
{
706
755
// check for backward mode; no exception; just exit
707
756
if (count > startIndex + 1 )
757
+ {
708
758
goto Exit;
759
+ }
709
760
}
710
761
else
711
762
{
@@ -717,132 +768,87 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
717
768
// First move to the character, then read it.
718
769
if (inputIterator.ConvertFromUTF8 (startIndex, true ))
719
770
{
720
- // string mode?
771
+ // String search mode
721
772
if (pString)
722
773
{
723
- // iterate thru all positions
724
774
while (count-- > 0 )
725
775
{
726
- CLR_RT_UnicodeHelper inputString;
727
- inputString.SetInputUTF8 ((const char *)inputIterator.m_inputUTF8 );
728
- CLR_RT_UnicodeHelper searchString;
729
- searchString.SetInputUTF8 (pString);
730
- bool finished = false ;
731
-
732
- while (true )
733
- {
734
- CLR_UINT16 bufInput[3 ];
735
- CLR_UINT16 bufSearch[3 ];
736
-
737
- inputString.m_outputUTF16 = bufInput;
738
- inputString.m_outputUTF16_size = MAXSTRLEN (bufInput);
739
-
740
- searchString.m_outputUTF16 = bufSearch;
741
- searchString.m_outputUTF16_size = MAXSTRLEN (bufSearch);
742
-
743
- // read next char from search string; if no more chars to read (false)
744
- // then we are done and found the search string in the input string
745
- if (searchString.ConvertFromUTF8 (1 , false ) == false )
746
- {
747
- pos = startIndex;
748
- finished = true ;
749
- break ;
750
- }
751
-
752
- // read the next char from the input string; if no more chars to read (false)
753
- // we didn't found the search string in the input string; we abort the search now
754
- if (inputString.ConvertFromUTF8 (1 , false ) == false )
755
- {
756
- finished = true ;
757
- break ;
758
- }
759
-
760
- // does the char from input not match the char from the search string
761
- if (bufInput[0 ] != bufSearch[0 ])
762
- {
763
- // next iteration round but not finished
764
- break ;
765
- }
766
- }
767
-
768
- // finished (with or without a found) then break
769
- if (finished)
776
+ // Use helper for proper UTF-8 comparison
777
+ if (MatchString (inputIterator, pString, searchLen))
770
778
{
779
+ pos = startIndex;
771
780
break ;
772
781
}
773
782
774
- // reading forward or backward
783
+ // Move to next candidate position (both forward or backward reading)
775
784
if (mode & c_IndexOf__Last)
776
785
{
777
786
startIndex--;
778
- // move one chars backward
779
- if (inputIterator.MoveBackwardInUTF8 (szText, 1 ) == false )
787
+ // move backwards one char
788
+ if (! inputIterator.MoveBackwardInUTF8 (szText, 1 ))
780
789
{
781
790
break ;
782
791
}
783
792
}
784
793
else
785
794
{
786
795
startIndex++;
787
- // move to the next char
788
- if (inputIterator.ConvertFromUTF8 (1 , true ) == false )
796
+ // move forward to the next char
797
+ if (! inputIterator.ConvertFromUTF8 (1 , true ))
789
798
{
790
799
break ;
791
800
}
792
801
}
793
802
}
794
803
}
795
804
796
- // char mode?
797
- if (pChars)
805
+ // Character search mode
806
+ else if (pChars)
798
807
{
799
- // iterate thru all positions
808
+ // iterate through all positions
800
809
while (count-- > 0 )
801
810
{
802
- CLR_UINT16 buf[3 ];
811
+ CLR_UINT16 buf[3 ] = { 0 } ;
803
812
804
813
inputIterator.m_outputUTF16 = buf;
805
814
inputIterator.m_outputUTF16_size = MAXSTRLEN (buf);
806
815
807
816
// read the next char from the input string; if no more chars to read (false)
808
- // we didn't found the search chars in the input string
809
- if (inputIterator.ConvertFromUTF8 (1 , false ) == false )
817
+ // the search chars weren't found in the input string
818
+ if (! inputIterator.ConvertFromUTF8 (1 , false ))
810
819
{
811
820
break ;
812
821
}
813
822
814
- // test each search char if it's a match
823
+ // test each search char for a match
815
824
for (int i = 0 ; i < iChars; i++)
816
825
{
817
- // match?
818
826
if (buf[0 ] == pChars[i])
819
827
{
820
- // position found!
828
+ // found position for next char
821
829
pos = startIndex;
822
830
break ;
823
831
}
824
832
}
825
833
826
- // found? => break
834
+ // didn't find any, break
827
835
if (pos != -1 )
828
836
{
829
837
break ;
830
838
}
831
839
832
- // for mode LastIndex... we are searching from start index backwards toward the start of the string
840
+ // for search mode LastIndex: we are searching from start index backwards toward the start of the string
833
841
if (mode & c_IndexOf__Last)
834
842
{
835
- // in backward mode
843
+ // backwards mode
836
844
startIndex--;
837
- // move two chars backward, because the current char is already read
838
- if (inputIterator.MoveBackwardInUTF8 (szText, 2 ) == false )
839
- {
845
+ // have to move two chars backwards, because the current char is already read
846
+ if (!inputIterator.MoveBackwardInUTF8 (szText, 2 ))
840
847
break ;
841
- }
842
848
}
843
849
else
844
850
{
845
- // forward mode; simple advance the start index
851
+ // forward mode: just advance the start index
846
852
startIndex++;
847
853
}
848
854
}
@@ -851,7 +857,6 @@ HRESULT Library_corlib_native_System_String::IndexOf(CLR_RT_StackFrame &stack, i
851
857
852
858
Exit:
853
859
stack.SetResult_I4 (pos);
854
-
855
860
NANOCLR_NOCLEANUP ();
856
861
}
857
862
@@ -889,10 +894,11 @@ HRESULT Library_corlib_native_System_String::ChangeCase(CLR_RT_StackFrame &stack
889
894
*ptr++ = c;
890
895
}
891
896
892
- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
893
- stack.PushValue (),
894
- (CLR_UINT16 *)arrayTmp->GetFirstElement (),
895
- arrayTmp->m_numOfElements ));
897
+ NANOCLR_CHECK_HRESULT (
898
+ CLR_RT_HeapBlock_String::CreateInstance (
899
+ stack.PushValue (),
900
+ (CLR_UINT16 *)arrayTmp->GetFirstElement (),
901
+ arrayTmp->m_numOfElements ));
896
902
897
903
NANOCLR_NOCLEANUP ();
898
904
}
@@ -923,10 +929,11 @@ HRESULT Library_corlib_native_System_String::Substring(CLR_RT_StackFrame &stack,
923
929
NANOCLR_SET_AND_LEAVE (CLR_E_OUT_OF_RANGE);
924
930
}
925
931
926
- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
927
- stack.PushValue (),
928
- (CLR_UINT16 *)arrayTmp->GetElement (startIndex),
929
- length));
932
+ NANOCLR_CHECK_HRESULT (
933
+ CLR_RT_HeapBlock_String::CreateInstance (
934
+ stack.PushValue (),
935
+ (CLR_UINT16 *)arrayTmp->GetElement (startIndex),
936
+ length));
930
937
931
938
NANOCLR_NOCLEANUP ();
932
939
}
@@ -1102,10 +1109,11 @@ HRESULT Library_corlib_native_System_String::Split(CLR_RT_StackFrame &stack, CLR
1102
1109
{
1103
1110
CLR_RT_HeapBlock *str = (CLR_RT_HeapBlock *)arrayDst->GetElement (count);
1104
1111
1105
- NANOCLR_CHECK_HRESULT (CLR_RT_HeapBlock_String::CreateInstance (
1106
- *str,
1107
- pSrcStart,
1108
- (CLR_UINT32)(pSrc - pSrcStart)));
1112
+ NANOCLR_CHECK_HRESULT (
1113
+ CLR_RT_HeapBlock_String::CreateInstance (
1114
+ *str,
1115
+ pSrcStart,
1116
+ (CLR_UINT32)(pSrc - pSrcStart)));
1109
1117
1110
1118
pSrcStart = pSrc + 1 ;
1111
1119
}
0 commit comments