diff --git a/quaddtype/numpy_quaddtype/src/casts.cpp b/quaddtype/numpy_quaddtype/src/casts.cpp index 659d6daf..2107be23 100644 --- a/quaddtype/numpy_quaddtype/src/casts.cpp +++ b/quaddtype/numpy_quaddtype/src/casts.cpp @@ -27,7 +27,7 @@ extern "C" { #include "dragon4.h" #include "ops.hpp" -#define NUM_CASTS 38 // 17 to_casts + 17 from_casts + 1 quad_to_quad + 1 void_to_quad +#define NUM_CASTS 40 // 18 to_casts + 18 from_casts + 1 quad_to_quad + 1 void_to_quad #define QUAD_STR_WIDTH 50 // 42 is enough for scientific notation float128, just keeping some buffer static NPY_CASTING @@ -369,6 +369,39 @@ quad_to_string_adaptive(Sleef_quad *sleef_val, npy_intp unicode_size_chars) } } +static inline const char * +quad_to_string_adaptive_cstr(Sleef_quad *sleef_val, npy_intp unicode_size_chars) +{ + // Try positional format first to see if it would fit + const char* positional_str = Dragon4_Positional_QuadDType_CStr( + sleef_val, DigitMode_Unique, CutoffMode_TotalLength, SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 0); + + if (positional_str == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Float formatting failed"); + return NULL; + } + + // no need to scan full, only checking if its longer + npy_intp pos_len = strnlen(positional_str, unicode_size_chars + 1); + + // If positional format fits, use it; otherwise use scientific notation + if (pos_len <= unicode_size_chars) { + return positional_str; // Keep the positional string + } + else { + // Use scientific notation with full precision + const char *scientific_str = Dragon4_Scientific_QuadDType_CStr(sleef_val, DigitMode_Unique, + SLEEF_QUAD_DECIMAL_DIG, 0, 1, + TrimMode_LeaveOneZero, 1, 2); + if (scientific_str == NULL) { + PyErr_SetString(PyExc_RuntimeError, "Float formatting failed"); + return NULL; + } + return scientific_str; + } +} + template static int quad_to_unicode_loop(PyArrayMethod_Context *context, char *const data[], @@ -605,6 +638,163 @@ quad_to_bytes_loop(PyArrayMethod_Context *context, char *const data[], return 0; } +// StringDType to QuadDType casting +static NPY_CASTING +stringdtype_to_quad_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + if (given_descrs[1] == NULL) { + loop_descrs[1] = (PyArray_Descr *)new_quaddtype_instance(BACKEND_SLEEF); + if (loop_descrs[1] == nullptr) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + return NPY_UNSAFE_CASTING; +} + +// Note: StringDType elements are always aligned, so Aligned template parameter +// is kept for API consistency but both versions use the same logic +template +static int +stringdtype_to_quad_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + PyArray_StringDTypeObject *str_descr = (PyArray_StringDTypeObject *)descrs[0]; + QuadPrecDTypeObject *descr_out = (QuadPrecDTypeObject *)descrs[1]; + QuadBackendType backend = descr_out->backend; + + npy_string_allocator *allocator = NpyString_acquire_allocator(str_descr); + + while (N--) { + const npy_packed_static_string *ps = (npy_packed_static_string *)in_ptr; + npy_static_string s = {0, NULL}; + int is_null = NpyString_load(allocator, ps, &s); + + if (is_null == -1) { + NpyString_release_allocator(allocator); + PyErr_SetString(PyExc_MemoryError, "Failed to load string in StringDType to Quad cast"); + return -1; + } + else if (is_null) { + // Handle null string - use the default string if available, otherwise error + if (str_descr->has_string_na || str_descr->default_string.buf != NULL) { + s = str_descr->default_string; + } + else { + NpyString_release_allocator(allocator); + PyErr_SetString(PyExc_ValueError, "Cannot convert null string to QuadPrecision"); + return -1; + } + } + + quad_value out_val; + if (bytes_to_quad_convert(s.buf, s.size, backend, &out_val) < 0) { + NpyString_release_allocator(allocator); + return -1; + } + + store_quad(out_ptr, out_val, backend); + + in_ptr += in_stride; + out_ptr += out_stride; + } + + NpyString_release_allocator(allocator); + return 0; +} + +// QuadDType to StringDType casting +static NPY_CASTING +quad_to_stringdtype_resolve_descriptors(PyObject *NPY_UNUSED(self), PyArray_DTypeMeta *dtypes[2], + PyArray_Descr *given_descrs[2], PyArray_Descr *loop_descrs[2], + npy_intp *view_offset) +{ + if (given_descrs[1] == NULL) { + // Default StringDType() already has coerce=True + loop_descrs[1] = (PyArray_Descr *)PyObject_CallNoArgs( + (PyObject *)&PyArray_StringDType); + if (loop_descrs[1] == NULL) { + return (NPY_CASTING)-1; + } + } + else { + Py_INCREF(given_descrs[1]); + loop_descrs[1] = given_descrs[1]; + } + + Py_INCREF(given_descrs[0]); + loop_descrs[0] = given_descrs[0]; + + return NPY_SAFE_CASTING; +} + +// Note: StringDType elements are always aligned, so Aligned template parameter +// is kept for API consistency but both versions use the same logic +template +static int +quad_to_stringdtype_strided_loop(PyArrayMethod_Context *context, char *const data[], + npy_intp const dimensions[], npy_intp const strides[], + void *NPY_UNUSED(auxdata)) +{ + npy_intp N = dimensions[0]; + char *in_ptr = data[0]; + char *out_ptr = data[1]; + npy_intp in_stride = strides[0]; + npy_intp out_stride = strides[1]; + + PyArray_Descr *const *descrs = context->descriptors; + QuadPrecDTypeObject *descr_in = (QuadPrecDTypeObject *)descrs[0]; + PyArray_StringDTypeObject *str_descr = (PyArray_StringDTypeObject *)descrs[1]; + QuadBackendType backend = descr_in->backend; + + npy_string_allocator *allocator = NpyString_acquire_allocator(str_descr); + + while (N--) { + quad_value in_val = load_quad(in_ptr, backend); + Sleef_quad sleef_val = quad_to_sleef_quad(&in_val, backend); + + // Get string representation with adaptive notation + // Use a large buffer size to allow for full precision + const char *str_buf = quad_to_string_adaptive_cstr(&sleef_val, QUAD_STR_WIDTH); + if (str_buf == NULL) { + NpyString_release_allocator(allocator); + return -1; + } + + Py_ssize_t str_size = strnlen(str_buf, QUAD_STR_WIDTH); + + npy_packed_static_string *out_ps = (npy_packed_static_string *)out_ptr; + if (NpyString_pack(allocator, out_ps, str_buf, (size_t)str_size) < 0) { + NpyString_release_allocator(allocator); + PyErr_SetString(PyExc_MemoryError, "Failed to pack string in Quad to StringDType cast"); + return -1; + } + + in_ptr += in_stride; + out_ptr += out_stride; + } + + NpyString_release_allocator(allocator); + return 0; +} + // Tag dispatching to ensure npy_bool/npy_ubyte and npy_half/npy_ushort do not alias in templates // see e.g. https://stackoverflow.com/q/32522279 struct spec_npy_bool {}; @@ -1395,6 +1585,44 @@ init_casts_internal(void) }; add_spec(quad_to_bytes_spec); + // StringDType to QuadPrecision cast + PyArray_DTypeMeta **stringdtype_to_quad_dtypes = new PyArray_DTypeMeta *[2]{&PyArray_StringDType, &QuadPrecDType}; + PyType_Slot *stringdtype_to_quad_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&stringdtype_to_quad_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&stringdtype_to_quad_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&stringdtype_to_quad_strided_loop}, + {0, nullptr}}; + + PyArrayMethod_Spec *stringdtype_to_quad_spec = new PyArrayMethod_Spec{ + .name = "cast_StringDType_to_QuadPrec", + .nin = 1, + .nout = 1, + .casting = NPY_UNSAFE_CASTING, + .flags = static_cast(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI), + .dtypes = stringdtype_to_quad_dtypes, + .slots = stringdtype_to_quad_slots, + }; + add_spec(stringdtype_to_quad_spec); + + // QuadPrecision to StringDType cast + PyArray_DTypeMeta **quad_to_stringdtype_dtypes = new PyArray_DTypeMeta *[2]{&QuadPrecDType, &PyArray_StringDType}; + PyType_Slot *quad_to_stringdtype_slots = new PyType_Slot[4]{ + {NPY_METH_resolve_descriptors, (void *)&quad_to_stringdtype_resolve_descriptors}, + {NPY_METH_strided_loop, (void *)&quad_to_stringdtype_strided_loop}, + {NPY_METH_unaligned_strided_loop, (void *)&quad_to_stringdtype_strided_loop}, + {0, nullptr}}; + + PyArrayMethod_Spec *quad_to_stringdtype_spec = new PyArrayMethod_Spec{ + .name = "cast_QuadPrec_to_StringDType", + .nin = 1, + .nout = 1, + .casting = NPY_SAFE_CASTING, + .flags = static_cast(NPY_METH_SUPPORTS_UNALIGNED | NPY_METH_REQUIRES_PYAPI), + .dtypes = quad_to_stringdtype_dtypes, + .slots = quad_to_stringdtype_slots, + }; + add_spec(quad_to_stringdtype_spec); + specs[spec_count] = nullptr; return specs; } diff --git a/quaddtype/numpy_quaddtype/src/dragon4.c b/quaddtype/numpy_quaddtype/src/dragon4.c index b9a896c8..b47e292f 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.c +++ b/quaddtype/numpy_quaddtype/src/dragon4.c @@ -1954,6 +1954,15 @@ Dragon4_Positional_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) return ret; } +const char * +Dragon4_Positional_QuadDType_opt_cstr(Sleef_quad *val, Dragon4_Options *opt) +{; + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + return _bigint_static.repr; +} + PyObject * Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, @@ -1975,6 +1984,27 @@ Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode c return Dragon4_Positional_QuadDType_opt(val, &opt); } +const char * +Dragon4_Positional_QuadDType_CStr(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode, + int precision, int min_digits, int sign, TrimMode trim, int pad_left, + int pad_right) +{ + Dragon4_Options opt; + + opt.scientific = 0; + opt.digit_mode = digit_mode; + opt.cutoff_mode = cutoff_mode; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = pad_right; + opt.exp_digits = -1; + + return Dragon4_Positional_QuadDType_opt_cstr(val, &opt); +} + PyObject * Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) { @@ -1986,6 +2016,15 @@ Dragon4_Scientific_QuadDType_opt(Sleef_quad *val, Dragon4_Options *opt) return ret; } +const char * +Dragon4_Scientific_QuadDType_opt_cstr(Sleef_quad *val, Dragon4_Options *opt) +{ + if (Dragon4_PrintFloat_Sleef_quad(val, opt) < 0) { + return NULL; + } + return _bigint_static.repr; +} + PyObject * Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int exp_digits) @@ -2006,6 +2045,26 @@ Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precisio return Dragon4_Scientific_QuadDType_opt(val, &opt); } +const char * +Dragon4_Scientific_QuadDType_CStr(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int exp_digits) +{ + Dragon4_Options opt; + + opt.scientific = 1; + opt.digit_mode = digit_mode; + opt.cutoff_mode = CutoffMode_TotalLength; + opt.precision = precision; + opt.min_digits = min_digits; + opt.sign = sign; + opt.trim_mode = trim; + opt.digits_left = pad_left; + opt.digits_right = -1; + opt.exp_digits = exp_digits; + + return Dragon4_Scientific_QuadDType_opt_cstr(val, &opt); +} + PyObject * Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int pad_right) diff --git a/quaddtype/numpy_quaddtype/src/dragon4.h b/quaddtype/numpy_quaddtype/src/dragon4.h index 1977595e..8e7753d8 100644 --- a/quaddtype/numpy_quaddtype/src/dragon4.h +++ b/quaddtype/numpy_quaddtype/src/dragon4.h @@ -51,10 +51,18 @@ PyObject *Dragon4_Positional_QuadDType(Sleef_quad *val, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int pad_right); +const char *Dragon4_Positional_QuadDType_CStr(Sleef_quad *val, DigitMode digit_mode, + CutoffMode cutoff_mode, int precision, int min_digits, + int sign, TrimMode trim, int pad_left, int pad_right); + PyObject *Dragon4_Scientific_QuadDType(Sleef_quad *val, DigitMode digit_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int exp_digits); +const char *Dragon4_Scientific_QuadDType_CStr(Sleef_quad *val, DigitMode digit_mode, + int precision, int min_digits, int sign, TrimMode trim, + int pad_left, int exp_digits); + PyObject *Dragon4_Positional(PyObject *obj, DigitMode digit_mode, CutoffMode cutoff_mode, int precision, int min_digits, int sign, TrimMode trim, int pad_left, int pad_right); diff --git a/quaddtype/tests/test_quaddtype.py b/quaddtype/tests/test_quaddtype.py index 1585f6fd..0ef1fd39 100644 --- a/quaddtype/tests/test_quaddtype.py +++ b/quaddtype/tests/test_quaddtype.py @@ -554,7 +554,7 @@ def test_unsupported_astype(dtype): np.array(QuadPrecision(1)).astype(dtype, casting="unsafe") class TestArrayCastStringBytes: - @pytest.mark.parametrize("strtype", [np.str_, str]) + @pytest.mark.parametrize("strtype", [np.str_, str, np.dtypes.StringDType()]) @pytest.mark.parametrize("input_val", [ "3.141592653589793238462643383279502884197", "2.71828182845904523536028747135266249775", @@ -747,6 +747,27 @@ def test_empty_bytes_raises_error(self): with pytest.raises(ValueError): bytes_array.astype(QuadPrecDType()) + @pytest.mark.parametrize("strtype", [np.str_, np.dtypes.StringDType()]) + @pytest.mark.parametrize("backend", ["sleef", "longdouble"]) + def test_string_backend_consistency(self, strtype, backend): + """Test that string parsing works consistently across backends""" + input_str = "3.141592653589793238462643383279502884197" + str_array = np.array([input_str], dtype=strtype) + quad_array = str_array.astype(QuadPrecDType(backend=backend)) + scalar_val = QuadPrecision(input_str, backend=backend) + np.testing.assert_array_equal(quad_array, np.array([scalar_val], dtype=QuadPrecDType(backend=backend))) + + @pytest.mark.parametrize("strtype", [np.str_, np.dtypes.StringDType()]) + def test_string_large_array(self, strtype): + """Test conversion of large string array""" + str_values = [str(i * 0.001) for i in range(1000)] + str_array = np.array(str_values, dtype=strtype) + quad_array = str_array.astype(QuadPrecDType()) + + assert quad_array.shape == (1000,) + np.testing.assert_array_equal(quad_array, np.array(str_values, dtype=QuadPrecDType())) + + class TestStringParsingEdgeCases: """Test edge cases in NumPyOS_ascii_strtoq string parsing""" @pytest.mark.parametrize("input_str", ['3.14', '-2.71', '0.0', '1e10', '-1e-10']) @@ -783,9 +804,10 @@ def test_numeric_string_parsing(self, input_str, byte_order): ("+INFINITY", 1), ("-INFINITY", -1), ]) - def test_infinity_sign_preservation(self, input_str, expected_sign): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_infinity_sign_preservation(self, input_str, expected_sign, strtype): """Test that +/- signs are correctly applied to infinity values""" - arr = np.array([input_str], dtype='U20') + arr = np.array([input_str], dtype=strtype) result = arr.astype(QuadPrecDType()) assert np.isinf(float(str(result[0]))), f"Expected inf for '{input_str}'" @@ -800,9 +822,10 @@ def test_infinity_sign_preservation(self, input_str, expected_sign): "NAN", "+NAN", "-NAN", "nan()", "nan(123)", "nan(abc_)", "NAN(XYZ)", ]) - def test_nan_case_insensitive(self, input_str): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_nan_case_insensitive(self, input_str, strtype): """Test case-insensitive NaN parsing with optional payloads""" - arr = np.array([input_str], dtype='U20') + arr = np.array([input_str], dtype=strtype) result = arr.astype(QuadPrecDType()) assert np.isnan(float(str(result[0]))), f"Expected NaN for '{input_str}'" @@ -821,9 +844,10 @@ def test_nan_case_insensitive(self, input_str): ("+1.23e-45", 1.23e-45), ("-1.23e-45", -1.23e-45), ]) - def test_numeric_sign_handling(self, input_str, expected_val): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_numeric_sign_handling(self, input_str, expected_val, strtype): """Test that +/- signs are correctly handled for numeric values""" - arr = np.array([input_str], dtype='U20') + arr = np.array([input_str], dtype=strtype) result = arr.astype(QuadPrecDType()) result_val = float(str(result[0])) @@ -848,9 +872,10 @@ def test_numeric_sign_handling(self, input_str, expected_val): "\t-inf\t", " nan ", ]) - def test_whitespace_handling(self, input_str): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_whitespace_handling(self, input_str, strtype): """Test that leading/trailing whitespace is handled correctly""" - arr = np.array([input_str], dtype='U20') + arr = np.array([input_str], dtype=strtype) result = arr.astype(QuadPrecDType()) # Should not raise an error @@ -870,9 +895,10 @@ def test_whitespace_handling(self, input_str): "na", # Incomplete nan "infinit", # Incomplete infinity ]) - def test_invalid_strings_raise_error(self, invalid_str): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_invalid_strings_raise_error(self, invalid_str, strtype): """Test that invalid strings raise ValueError""" - arr = np.array([invalid_str], dtype='U20') + arr = np.array([invalid_str], dtype=strtype) with pytest.raises(ValueError): arr.astype(QuadPrecDType()) @@ -883,9 +909,10 @@ def test_invalid_strings_raise_error(self, invalid_str): "3.1€4", # Mid non-ASCII "π", # Greek pi ]) - def test_non_ascii_raises_error(self, input_str): + @pytest.mark.parametrize("strtype", ['U20', np.dtypes.StringDType()]) + def test_non_ascii_raises_error(self, input_str, strtype): """Test that non-ASCII characters raise ValueError""" - arr = np.array([input_str], dtype='U20') + arr = np.array([input_str], dtype=strtype) with pytest.raises(ValueError): arr.astype(QuadPrecDType())