44from uuid import UUID
55
66import numpy as np
7+ import pandas as pd
78from google .protobuf .descriptor import FieldDescriptor
89from google .protobuf .duration_pb2 import Duration
910from google .protobuf .message import Message
1718from tilebox .datasets .datasets .v1 .well_known_types_pb2 import Geometry , LatLon , LatLonAlt , Quaternion , Vec3
1819
1920ScalarProtoFieldValue = Message | float | str | bool | bytes
21+
22+
2023ProtoFieldValue = ScalarProtoFieldValue | Sequence [ScalarProtoFieldValue ] | None
2124
2225_FILL_VALUES_BY_DTYPE : dict [type [np .dtype [Any ]], Any ] = {
@@ -107,7 +110,7 @@ def from_proto(self, value: ProtoFieldValue) -> int:
107110 return value .seconds * 10 ** 9 + value .nanos
108111
109112 def to_proto (self , value : DatetimeScalar ) -> Timestamp | None :
110- if value is None or (isinstance (value , np .datetime64 ) and np .isnat (value )):
113+ if is_missing ( value ) or (isinstance (value , np .datetime64 ) and np .isnat (value )):
111114 return None
112115 # we use pandas to_datetime function to handle a variety of input types that can be coerced to datetimes
113116 seconds , nanos = divmod (to_datetime (value , utc = True ).value , 10 ** 9 )
@@ -124,10 +127,10 @@ def from_proto(self, value: ProtoFieldValue) -> int:
124127 return value .seconds * 10 ** 9 + value .nanos
125128
126129 def to_proto (self , value : str | float | timedelta | np .timedelta64 ) -> Duration | None :
127- if value is None or (isinstance (value , np .timedelta64 ) and np .isnat (value )):
130+ if is_missing ( value ) or (isinstance (value , np .timedelta64 ) and np .isnat (value )):
128131 return None
129132 # we use pandas to_timedelta function to handle a variety of input types that can be coerced to timedeltas
130- seconds , nanos = divmod (to_timedelta (value ).value , 10 ** 9 ) # type: ignore[arg-type]
133+ seconds , nanos = divmod (to_timedelta (value ).value , 10 ** 9 )
131134 return Duration (seconds = seconds , nanos = nanos )
132135
133136
@@ -141,7 +144,7 @@ def from_proto(self, value: ProtoFieldValue) -> str:
141144 return str (UUID (bytes = value .uuid ))
142145
143146 def to_proto (self , value : str | UUID ) -> UUIDMessage | None :
144- if not value : # None or empty string
147+ if is_missing ( value ) or value == "" : # missing or empty string
145148 return None
146149
147150 if isinstance (value , str ):
@@ -160,7 +163,7 @@ def from_proto(self, value: ProtoFieldValue) -> Any:
160163 return from_wkb (value .wkb )
161164
162165 def to_proto (self , value : Any ) -> Geometry | None :
163- if value is None :
166+ if is_missing ( value ) :
164167 return None
165168 return Geometry (wkb = value .wkb )
166169
@@ -175,7 +178,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
175178 return value .x , value .y , value .z
176179
177180 def to_proto (self , value : tuple [float , float , float ]) -> Vec3 | None :
178- if value is None or np .all (np .isnan (value )):
181+ if is_missing ( value ) or np .all (np .isnan (value )):
179182 return None
180183 return Vec3 (x = value [0 ], y = value [1 ], z = value [2 ])
181184
@@ -190,7 +193,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float, float
190193 return value .q1 , value .q2 , value .q3 , value .q4
191194
192195 def to_proto (self , value : tuple [float , float , float , float ]) -> Quaternion | None :
193- if value is None or np .all (np .isnan (value )):
196+ if is_missing ( value ) or np .all (np .isnan (value )):
194197 return None
195198 return Quaternion (q1 = value [0 ], q2 = value [1 ], q3 = value [2 ], q4 = value [3 ])
196199
@@ -205,7 +208,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float]:
205208 return value .latitude , value .longitude
206209
207210 def to_proto (self , value : tuple [float , float ]) -> LatLon | None :
208- if value is None or np .all (np .isnan (value )):
211+ if is_missing ( value ) or np .all (np .isnan (value )):
209212 return None
210213 return LatLon (latitude = value [0 ], longitude = value [1 ])
211214
@@ -221,7 +224,7 @@ def from_proto(self, value: ProtoFieldValue) -> tuple[float, float, float]:
221224 return value .latitude , value .longitude , value .altitude
222225
223226 def to_proto (self , value : tuple [float , float , float ]) -> LatLonAlt | None :
224- if value is None or np .all (np .isnan (value )):
227+ if is_missing ( value ) or np .all (np .isnan (value )):
225228 return None
226229 return LatLonAlt (latitude = value [0 ], longitude = value [1 ], altitude = value [2 ])
227230
@@ -301,3 +304,19 @@ def _camel_to_uppercase(name: str) -> str:
301304 'PROCESSING_LEVEL'
302305 """
303306 return "" .join (["_" + c .lower () if c .isupper () else c for c in name ]).lstrip ("_" ).upper ()
307+
308+
309+ def is_missing (value : Any ) -> bool :
310+ """Check if a value represents a missing/null value.
311+
312+ Handles None, np.nan, pd.NA, NaT, and other pandas missing value sentinels.
313+ This is needed for pandas 3.0+ compatibility where object-dtype columns use
314+ np.nan instead of None for missing values.
315+ """
316+ try :
317+ return bool (pd .isna (value ))
318+ except ValueError :
319+ # pd.isna returns either a bool, or an array of bools. In case of an array, converting the result to bool()
320+ # will raise a ValueError. For an array, we know it's not a missing value, even an array of all NaNs is not
321+ # a missing value.
322+ return False
0 commit comments