diff --git a/docs/api.rst b/docs/api.rst index 39433e9c..c608a53b 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -231,6 +231,16 @@ Functions that return a Boolean mask indicating day and night. features.daytime.power_or_irradiance +Shading +------- + +Functions for labeling shadows. + +.. autosummary:: + :toctree: generated/ + + features.shading.fixed + System ====== diff --git a/docs/index.rst b/docs/index.rst index 898fe943..10261198 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -53,6 +53,7 @@ library status. functions in the :py:mod:`system` module in that we are identifying features of data rather than properties of the system that produced the data. + - :py:mod:`features.shading` functions for identifying shadows. - :py:mod:`system` identification of PV system characteristics from data (e.g. nameplate power, tilt, azimuth) diff --git a/docs/whatsnew/0.1.1.rst b/docs/whatsnew/0.1.1.rst index 632306f0..3be4681b 100644 --- a/docs/whatsnew/0.1.1.rst +++ b/docs/whatsnew/0.1.1.rst @@ -13,6 +13,8 @@ Enhancements :py:func:`pvanalytics.metrics.variability_index`. (:issue:`60`, :pull:`106`) * Internal refactor of `pvanalytics.metrics.performance_ratio_nrel` to support other performance ratio formulas. (:pull:`109`) +* Detect shadows from fixed objects in GHI data using + :py:func:`pvanalytics.features.shading.fixed`. (:issue:`24`, :pull:`101`) Bug Fixes ~~~~~~~~~ @@ -22,3 +24,4 @@ Contributors * Kevin Anderson (:ghuser:`kanderso-nrel`) * Cliff Hansen (:ghuser:`cwhanse`) +* Will Vining (:ghuser:`wfvining`) diff --git a/pvanalytics/features/__init__.py b/pvanalytics/features/__init__.py index dfb7a12d..9a8ff7c5 100644 --- a/pvanalytics/features/__init__.py +++ b/pvanalytics/features/__init__.py @@ -1,2 +1,3 @@ from pvanalytics.features import clearsky # noqa: F401 from pvanalytics.features import clipping # noqa: F401 +from pvanalytics.features import shading # noqa: F401 diff --git a/pvanalytics/features/shading.py b/pvanalytics/features/shading.py new file mode 100644 index 00000000..90c9a419 --- /dev/null +++ b/pvanalytics/features/shading.py @@ -0,0 +1,374 @@ +"""Functions for labeling shading and shodows.""" +import numpy as np +import pandas as pd +from scipy import ndimage +from skimage import morphology, measure +import pvlib +from pvanalytics import util + + +def _to_image(data, width): + """Convert data to an image. + + Parameters + ---------- + data : array_like + Values of the pixels + width : int + Width of the image in pixels + + Returns + ------- + ndarray + Data reshaped into an ndarray with width `width` + """ + return data.reshape(len(data) // width, width) + + +def _to_series(image, image_index): + """Convert a binary image to a boolean series. + + Parameters + ---------- + image : ndarray + Binary image. + image_index : pd.Index + Index for the returned series. + + Returns + ------- + Series + A pandas Series with index `index` and values from the image. + """ + return pd.Series(image.flatten(), index=image_index) + + +def _smooth(image): + """Smooth `image` by local mean filtering. + + Local means are computed in a 3x3 square surrounding each pixel. + """ + return ndimage.uniform_filter(image, size=(3, 3)) + + +def _prepare_images(ghi, clearsky, daytime, interval): + """Prepare data as images. + + Performs pre-processing steps on `ghi` and `clearsky` before + returning images for use in the shadow detection algorithm. + + Parameters + ---------- + ghi : Series + Measured GHI. [W/m^2] + clearsky : Series + Expected clearsky GHI. [W/m^2] + daytime : Series + Boolean series with True for daytime and False for night. + interval : int + Time between data points in `ghi`. [minutes] + + Returns + ------- + ghi_image : np.ndarray + Image form of `ghi` + clearsky_image : np.ndarray + Image form of `clearsky` + clouds_image : np.ndarray + Image of the cloudy periods in `ghi` + image_times : pandas.DatetimeIndex + Index for the data included in the returned images. Leading + and trailing days with incomplete data are not included in the + image, these times are needed to build a Series from the image + later on. + + """ + # Fill missing times by interpolation. Missing data at the + # beginning or end of the series is not filled in, and will be + # excluded from the images used for shadow detection. + image_width = 1440 // interval + ghi = ghi.interpolate(limit_area='inside') + # drop incomplete days. + ghi = ghi[ghi.resample('D').transform('count') == image_width] + image_times = ghi.index + ghi_image = _to_image(ghi.to_numpy(), image_width) + scaled_ghi = (ghi * 1000) / np.max(_smooth(ghi_image)) + scaled_clearsky = (clearsky * 1000) / clearsky.max() + scaled_clearsky = scaled_clearsky.reindex_like(scaled_ghi) + daytime = daytime.reindex_like(scaled_ghi) + # Detect clouds. + window_size = 50 // interval + clouds = _detect_clouds(scaled_ghi, scaled_clearsky, window_size) + cloud_mask = _to_image(clouds.to_numpy(), image_width) + # Interpolate across days (i.e. along columns) to remove clouds + # replace clouds with nans + # + # This could probably be done directly with scipy.interpolate.inter1d, + # but the easiest approach is to turn the image into a dataframe and + # interpolate along the columns. + cloudless_image = ghi_image.copy() + cloudless_image[cloud_mask] = np.nan + clouds_image = ghi_image.copy() + clouds_image[~cloud_mask] = np.nan + ghi_image = pd.DataFrame(cloudless_image).interpolate( + axis=0, + limit_direction='both' + ).to_numpy() + # set night to nan + ghi_image[~_to_image(daytime.to_numpy(), image_width)] = np.nan + return ( + ghi_image, + _to_image(scaled_clearsky.to_numpy(), image_width), + clouds_image, + image_times + ) + + +def _detect_clouds(ghi, clearsky_ghi, window_size): + """Use :py:func:`pvanalytics.clearsky.reno` to detect clouds. + + Returns + ------- + Series + Boolean series with true for cloudy periods that are at least as long + as `window_size`. + """ + cleartimes = pvlib.clearsky.detect_clearsky( + ghi, + clearsky_ghi, + ghi.index, + window_length=10, + max_iterations=1 + ) + + return cleartimes.rolling(window_size, center=True).sum() == 0 + + +def _remove_pillars(wires): + j = np.array([[0, 0, 0, 0, 0], + [0, 0, 1, 0, 0], + [0, 0, 1, 0, 0]]).T + k = np.array([[1, 0, 0, 0, 1], + [1, 0, 0, 0, 1], + [1, 0, 0, 0, 1]]).T + a1 = np.logical_and( + wires, + np.logical_not(ndimage.binary_hit_or_miss(wires, j, k)) + ) + a2 = np.logical_and( + wires, + np.logical_not( + ndimage.binary_hit_or_miss(wires, np.flipud(j), np.flipud(k)) + ) + ) + return np.logical_and(a1, a2) + + +def _fill_gaps(wires, out): + """Fill 1-minute gaps.""" + mask = np.array([[1, 0, 1]]) + return np.logical_or( + wires, ndimage.binary_hit_or_miss(wires, mask), out=out) + + +def _remove_spikes(wires, out): + """Remove 1-day spikes.""" + mask = np.array([[0, 1, 0]]).T + temp_image = np.ndarray(wires.shape) + hit_miss = ndimage.binary_hit_or_miss(wires, mask) + return np.logical_and( + wires, + np.logical_not( + hit_miss, + out=temp_image + ), + out=out + ) + + +def _restore_gaps(wires): + """Restore 2-minute and 1-minute gaps""" + wires = np.logical_or( + wires, + ndimage.binary_hit_or_miss( + wires, + np.array([[0, 1, 0, 0, 1]]), np.array([[0, 0, 1, 1, 0]]) + ) + ) + return np.logical_or( + wires, + ndimage.binary_hit_or_miss(wires, np.array([[1, 0, 1]])) + ) + + +def _height(bbox): + """Return the height of a bounding box. + + Parameters + ---------- + bbox : tuple + 4-tuple specifying ``(min_row, min_col, max_row, max_col)``. + + Returns + ------- + int + The height of the bounding box in pixels. + """ + return bbox[2] - bbox[0] + 1 + + +def _filter_blobs(wires, blob_length, connectivity): + """Remove blobs shorter than blob_length. + + Examines all connected components in the image and removes any + that span fewer than `blob_length` columns. + """ + components = measure.label(wires, connectivity=connectivity, background=0) + keep = [props.label for props in measure.regionprops(components) + if _height(props.bbox) > blob_length] + return np.isin(components, keep) + + +def _tand(degrees): + """Tangent of an angle in degrees.""" + return np.tan(np.deg2rad(degrees)) + + +def _filter_bars(wires, out): + """Only keep shadows that are 'bar-shaped' and span multiple days. + + Parameters + ---------- + wires : ndarray + Binary image of candidate shadows. + out : ndarray + An ndarray that will hold the output image resulting from this + filtering operation. If None then a new array will be allocated. + + Returns + ------- + ndarray + The output array. If `out` is not none, will return `out`. + """ + temp_image = np.ndarray(wires.shape) + for angle in range(0, 90, 5): + if angle < 75: + dim = 20 + mid = 10 + else: + dim = round(10 + 2 * angle // 5) + mid = round(6 + angle // 5) + cc = np.arange(0, dim) + se = np.zeros((dim, dim)) + if angle <= 45: + rr = mid + np.round((cc - mid) * _tand(angle)) + se[cc, rr.astype('int')] = 1 + else: + rr = mid + np.round((cc - mid) * _tand(90 - angle)) + se[cc, rr.astype('int')] = 1 + se = se.T + morphology.binary_opening(wires, se, out=temp_image) + np.logical_or( + out, + temp_image, + out=out + ) + morphology.binary_opening(wires, np.flipud(se), out=temp_image) + np.logical_or( + out, + temp_image, + out=out + ) + return out + + +def _clean_wires(wires): + """Clean up clouds that are connected to wires.""" + out = _remove_pillars(wires) + out = _fill_gaps(out, out) + out = _remove_spikes(out, out) + out = _fill_gaps(out, out) + out = _restore_gaps(out) + out = _filter_blobs(out, 20, connectivity=1) + out = _filter_bars(wires, out) + out = _filter_blobs(out, 15, connectivity=2) + return out + + +def fixed(ghi, daytime, clearsky, interval=None, min_gradient=2): + """Detects shadows from fixed structures such as wires and poles. + + Uses morphological image processing methods to identify shadows + from fixed local objects in GHI data. GHI data are assumed to + be reasonably complete with relatively few missing values and at a + fixed time interval nominally of 1 minute over the course of + several months. Detection focuses on shadows with relatively short + duration. The algorithm forms a 2D image of the GHI data by + arranging time of day along the x-axis and day of year along the + y-axis. Rapid change in GHI in the x-direction is used to identify + edges of shadows; continuity in the y-direction is used to + separate local object shading from cloud shadows. + + Parameters + ---------- + ghi : Series + Time series of GHI measurements. Data must be in local time at + 1-minute frequency and should cover at least 60 days. + daytime : Series + Boolean series with True for times when the sun is up. + clearsky : Series + Clearsky GHI with same index as `ghi`. + interval : int, optional + Interval between data points in minutes. If not specified the + interval is inferred from the frequency of the index of `ghi`. + + Returns + ------- + Series + Boolean series with true for times that are impacted by shadows. + ndarray + A boolean image (black and white) showing the shadows that were + detected. + + References + ---------- + + .. [1] Martin, C. E., Hansen, C. W., An Image Processing Algorithm to + Identify Near-Field Shading in Irradiance Measurements, preprint 2016 + .. [2] Reno, M.J. and C.W. Hansen, "Identification of periods of clear sky + irradiance in time series of GHI measurements" Renewable Energy, v90, + p. 520-531, 2016. + """ + if interval is None: + interval = util.freq_to_timedelta( + pd.infer_freq(ghi.index) + ).seconds // 60 + if interval != 1: + raise ValueError("Data must be at 1-minute intervals") + ghi_image, clearsky_image, clouds_image, index = _prepare_images( + ghi, + clearsky, + daytime, + interval + ) + # normalize the GHI and dampen the dynamic range where the clear + # sky model may have large errors (e.g. at very low sun elevation) + alpha = 2000 + ghi_boosted = 1000 * (ghi_image + alpha) / (clearsky_image + alpha) + + # We must use scipy.ndimage here because skimage does not support + # floating point data outside the range [-1, 1]. + gradient = ndimage.morphological_gradient(ghi_boosted, size=(1, 3)) + threshold = gradient > min_gradient # binary image of wire candidates + + # From here we CAN use skimage because we are working with binary images. + three_minute_mask = morphology.rectangle(1, 3) + wires = morphology.remove_small_objects( + morphology.binary_closing(threshold, three_minute_mask), + min_size=200, + connectivity=2 # all neighbors (including diagonals) + ) + wires_image = _clean_wires(wires) + wires_series = _to_series(wires, index) + wires_series = wires_series.reindex(ghi.index, fill_value=False) + return wires_series, wires_image diff --git a/pvanalytics/tests/features/test_shading.py b/pvanalytics/tests/features/test_shading.py new file mode 100644 index 00000000..311594b5 --- /dev/null +++ b/pvanalytics/tests/features/test_shading.py @@ -0,0 +1,59 @@ +import pandas as pd +from pandas.testing import assert_series_equal +import pytest +from pvanalytics.features import shading + + +@pytest.fixture(scope='module') +def times(): + return pd.date_range( + start='1/1/2020', + end='12/31/2020', + closed='left', + freq='T', + tz='MST' + ) + + +@pytest.fixture(scope='module') +def daytime(times, albuquerque): + solar_position = albuquerque.get_solarposition(times) + return solar_position['zenith'] < 87 + + +@pytest.fixture(scope='module') +def clearsky_ghi(times, albuquerque): + clearsky = albuquerque.get_clearsky(times, model='simplified_solis') + return clearsky['ghi'] + + +def test_fixed_no_shadows(daytime, clearsky_ghi): + shadows, image = shading.fixed(clearsky_ghi, daytime, clearsky_ghi) + assert not shadows.any() + + +def test_fixed_same_index(daytime, clearsky_ghi): + daytime = daytime['1/2/2020 11:00':'11/2/2020 13:00'] + clearsky_ghi = clearsky_ghi['1/2/2020 11:00':'11/2/2020 13:00'] + shadows, image = shading.fixed(clearsky_ghi, daytime, clearsky_ghi) + assert_series_equal( + pd.Series(False, index=daytime.index), + shadows, + check_names=False + ) + + +def test_simple_shadow(daytime, clearsky_ghi): + shadow_ghi = clearsky_ghi.copy() + shadow_ghi[shadow_ghi.between_time('11:00', '11:03').index] *= 0.5 + shadows, image = shading.fixed(shadow_ghi, daytime, clearsky_ghi) + assert shadows.between_time('11:00', '11:03').all() + + +def test_invalid_interval(daytime, clearsky_ghi): + ghi = clearsky_ghi.resample('5T').first() + daytime_resampled = daytime.resample('5T').first() + with pytest.raises(ValueError, match="Data must be at 1-minute intervals"): + shading.fixed(ghi, daytime_resampled, ghi) + with pytest.raises(ValueError, match="Data must be at 1-minute intervals"): + shading.fixed(daytime, clearsky_ghi, clearsky_ghi, interval=2) diff --git a/requirements-min.txt b/requirements-min.txt index c27b656e..46f4b391 100644 --- a/requirements-min.txt +++ b/requirements-min.txt @@ -1,5 +1,6 @@ numpy~=1.15.0 -pandas~=0.23.0 +pandas~=0.24.0 pvlib~=0.8.0 scipy~=1.2.0 statsmodels~=0.9.0 +scikit-image~=0.16.0 diff --git a/requirements.txt b/requirements.txt index 07e9cf86..b195b452 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,6 @@ numpy>=1.15.0 -pandas>=0.23.0,<1.1.0 +pandas>=0.24.0,<1.1.0 pvlib>=0.8.0 scipy>=1.2.0 statsmodels>=0.9.0 +scikit-image>=0.16.0 diff --git a/setup.py b/setup.py index da684ae4..e6917256 100644 --- a/setup.py +++ b/setup.py @@ -32,10 +32,11 @@ INSTALL_REQUIRES = [ 'numpy >= 1.15.0', - 'pandas >= 0.23.0', + 'pandas >= 0.24.0', 'pvlib >= 0.8.0', 'scipy >= 1.2.0', - 'statsmodels >= 0.9.0' + 'statsmodels >= 0.9.0', + 'scikit-image >= 0.16.0' ] DOCS_REQUIRE = [