Skip to content
This repository was archived by the owner on Jun 27, 2025. It is now read-only.

Commit 95d47db

Browse files
committed
reimplemented ucs4_get_end_p to go from end
1 parent 9d6dafa commit 95d47db

2 files changed

Lines changed: 36 additions & 4 deletions

File tree

arraymap.c

Lines changed: 25 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -123,16 +123,37 @@ typedef enum ViewKind{
123123

124124

125125
// NOTE: would like to use strchr(str, '\0') instead of this routine, but some buffers might not have a null terminator and stread by full to the the dt_size.
126+
// static inline Py_UCS4*
127+
// ucs4_get_end_p(Py_UCS4* p, Py_ssize_t dt_size) {
128+
// Py_UCS4* p_end = p + dt_size; // invalid
129+
// while (p < p_end && *p != '\0') {
130+
// p++;
131+
// }
132+
// return p; // this might point to null, or point to one after end
133+
// }
134+
135+
// 5 start, length of 4, valid values are (5, 6, 7, 8)
136+
// last valid is start + len - 1
137+
138+
// Return a pointer to the last valid character in the field.
126139
static inline Py_UCS4*
127-
ucs4_get_end_p(Py_UCS4* p, Py_ssize_t dt_size) {
128-
Py_UCS4* p_end = p + dt_size;
129-
while (p < p_end && *p != '\0') {
130-
p++;
140+
ucs4_get_end_p(Py_UCS4* p_start, Py_ssize_t dt_size) {
141+
Py_UCS4* p = p_start + dt_size - 1;
142+
while (p > p_start) {
143+
if (*p != '\0') {
144+
return p + 1;
145+
}
146+
p--;
147+
}
148+
// p is start; it may or may not be null; if its not null, return 1 more
149+
if (*p != '\0') {
150+
return p + 1;
131151
}
132152
return p;
133153
}
134154

135155

156+
136157
static inline char*
137158
char_get_end_p(char* p, Py_ssize_t dt_size) {
138159
char* p_end = p + dt_size;

test/test_unit.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,17 @@ def test_fam_constructor_array_unicode_d():
145145
assert list(fam) == ["", "\x000"]
146146

147147

148+
# >>> u = "\x000\x00"
149+
# >>> len(u)
150+
# 3
151+
# >>> a1 = np.array(['', ''], dtype='U4')
152+
# >>> a1[0] = u
153+
# >>> a1
154+
# array(['\x000', ''], dtype='<U4')
155+
# >>> len(a1[0])
156+
# 2
157+
158+
148159
def test_fam_copy_array_unicode_a():
149160
a1 = np.array(("a", "ccc", "bb"))
150161
a1.flags.writeable = False

0 commit comments

Comments
 (0)