From 78a6d046ef323881047dc23f7b1f7b5da25153a9 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Fri, 8 May 2020 18:44:04 -0400 Subject: [PATCH 01/20] fix issue with parsing empty sheets. --- impact/parsers.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/impact/parsers.py b/impact/parsers.py index 2f76fe7..c7557b6 100644 --- a/impact/parsers.py +++ b/impact/parsers.py @@ -357,7 +357,8 @@ def parse_raw_data(cls, data_format=None, id_type='traverse', file_name=None, da # Extract data from sheets data = {sheet.title: [[elem.value if elem is not None else None for elem in row] - for row in xls_data[sheet.title]] for sheet in xls_data} + for row in xls_data[sheet.title]] + if xls_data[sheet.title].max_row > 1 else None for sheet in xls_data} if data_format in cls.parser_case_dict.keys(): cls.parser_case_dict[data_format](experiment, data=data, id_type=id_type, plate_type = plate_type) @@ -415,7 +416,8 @@ def parse_raw_data(format=None, id_type='CSV', file_name=None, data=None, experi # Extract data from sheets data = {sheet.title: [[elem.value if elem is not None else None for elem in row] - for row in xls_data[sheet.title]] for sheet in xls_data} + for row in xls_data[sheet.title]] + if xls_data[sheet.title].max_row > 1 else None for sheet in xls_data} # Import parsers parser_case_dict = {'spectromax_OD' : spectromax_OD, From d8772f03b385eea5ba3834f96bb8821702388a1c Mon Sep 17 00:00:00 2001 From: Kaushik Date: Fri, 8 May 2020 18:53:29 -0400 Subject: [PATCH 02/20] removed unnecessary dependencies --- requirements.txt | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0f48b27..0260975 100644 --- a/requirements.txt +++ b/requirements.txt @@ -3,6 +3,4 @@ numpy>=1.10.4 pandas scipy>=0.17.0 lmfit==0.8.3 -openpyxl==2.5.3 -tabulate -pyexcel-xlsx \ No newline at end of file +openpyxl==2.5.3 \ No newline at end of file From b9fd1236701e15b8830aa33df8c90c6051f08a54 Mon Sep 17 00:00:00 2001 From: Kaushik Raj Date: Mon, 11 May 2020 20:57:37 -0400 Subject: [PATCH 03/20] Update readme.md --- readme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index 012de95..d0a8f7f 100644 --- a/readme.md +++ b/readme.md @@ -4,6 +4,7 @@ # Impact: a framework for analyzing microbial physiology +Associated with the research article: "Impact framework: A python package for writing data analysis workflows to interpret microbial physiology" https://doi.org/10.1016/j.mec.2019.e00089 Impact assists scientists and engineers to interpret data describing microbial physiology. Impact parses raw data from common equipment such as chromatography (HPLC), and plate readers. Data is automatically interpreted and built into hierarchical objects @@ -51,4 +52,4 @@ Impact comes with scripts that test the proper functioning of the package. These The documentation is available in `docs` or a rendered version is available [here](http://impact.readthedocs.io/en/latest/) ## Starter Files -A starter ipynb which can be opened with Jupyter notebook has been provided in the Examples_and_Helpers folders. The file comes with comments which will assist users in analyzing their data. A helper file to create trial identifiers has also been provided in the Examples_and_Helpers folder. \ No newline at end of file +A starter ipynb which can be opened with Jupyter notebook has been provided in the Examples_and_Helpers folders. The file comes with comments which will assist users in analyzing their data. A helper file to create trial identifiers has also been provided in the Examples_and_Helpers folder. From 7b93231e76dfb5aa8533199d1f657a4f0ddf9843 Mon Sep 17 00:00:00 2001 From: Kaushik Raj Date: Mon, 11 May 2020 20:57:52 -0400 Subject: [PATCH 04/20] Update readme.md --- readme.md | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/readme.md b/readme.md index d0a8f7f..3dd05a0 100644 --- a/readme.md +++ b/readme.md @@ -4,7 +4,8 @@ # Impact: a framework for analyzing microbial physiology -Associated with the research article: "Impact framework: A python package for writing data analysis workflows to interpret microbial physiology" https://doi.org/10.1016/j.mec.2019.e00089 +Associated with the research article: "Impact framework: A python package for writing data analysis workflows to interpret microbial physiology" https://doi.org/10.1016/j.mec.2019.e00089. + Impact assists scientists and engineers to interpret data describing microbial physiology. Impact parses raw data from common equipment such as chromatography (HPLC), and plate readers. Data is automatically interpreted and built into hierarchical objects From 4669a8aae3e57bbf1c31320d81f9eaf6af488c39 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 15 Feb 2021 01:27:05 -0500 Subject: [PATCH 05/20] update attribute names for cobrapy objects to account for deprecated vars --- impact/helpers/synthetic_data.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/impact/helpers/synthetic_data.py b/impact/helpers/synthetic_data.py index 5ff88e7..70ecfe0 100644 --- a/impact/helpers/synthetic_data.py +++ b/impact/helpers/synthetic_data.py @@ -35,11 +35,11 @@ def generate_data(y0, t, model, biomass_keys, substrate_keys, product_keys, nois sol = model.optimize() # Let's assign the data to these variables - biomass_flux = [sol.x_dict[biomass_keys[0]]] + biomass_flux = [sol.fluxes[biomass_keys[0]]] - substrate_flux = [sol.x_dict[substrate_keys[0]]] + substrate_flux = [sol.fluxes[substrate_keys[0]]] - product_flux = [sol.x_dict[key] for key in product_keys] + product_flux = [sol.fluxes[key] for key in product_keys] exchange_keys = biomass_keys + substrate_keys + product_keys @@ -96,11 +96,11 @@ def dFBA_functions(y, t, t_max, model, analyte_dict, bar): return [0]*len(exchange_keys) else: # Let's assign the data to these variables - biomass_flux = [model.solution.x_dict[biomass_keys[0]]] + biomass_flux = [solution.fluxes[biomass_keys[0]]] - substrate_flux = [model.solution.x_dict[substrate_keys[0]]] + substrate_flux = [solution.fluxes[substrate_keys[0]]] - product_flux = [model.solution.x_dict[key] for key in product_keys] + product_flux = [solution.fluxes[key] for key in product_keys] exchange_keys = biomass_keys + substrate_keys + product_keys From 2a7b5061ff6ee8aa9f87314ee45732b4dda7c20c Mon Sep 17 00:00:00 2001 From: Kaushik Date: Sun, 7 Mar 2021 23:06:18 -0500 Subject: [PATCH 06/20] fixed an error in handling pd_series where duplicated elements are not identified --- impact/core/analytes/Base.py | 7 +------ impact/parsers.py | 5 +++++ 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/impact/core/analytes/Base.py b/impact/core/analytes/Base.py index 66f7d87..681c5c9 100644 --- a/impact/core/analytes/Base.py +++ b/impact/core/analytes/Base.py @@ -312,7 +312,6 @@ def add_timepoint(self, time_point): self.time_points.append(time_point) if len(self.time_points) == 1: self.trial_identifier = time_point.trial_identifier - self.pd_series = pd.Series([time_point.data],index=[time_point.time]) else: if self.time_points[-1].trial_identifier.unique_single_trial() \ != self.time_points[-2].trial_identifier.unique_single_trial(): @@ -326,11 +325,7 @@ def add_timepoint(self, time_point): self.time_points.sort(key=lambda timePoint: timePoint.time) - if sum(self.pd_series.index.duplicated()) > 0: - print(self.pd_series) - print(self.trial_identifier) - print(time_point.trial_identifier) - raise Exception('Duplicate time points found, this is not supported - likely an identifier input error') + def curve_fit_data(self): raise Exception('This must be implemented in a child') diff --git a/impact/parsers.py b/impact/parsers.py index c7557b6..7c43ca9 100644 --- a/impact/parsers.py +++ b/impact/parsers.py @@ -475,6 +475,11 @@ def parse_time_point_list(time_point_list): for analyte in analyte_dict.values(): analyte.pd_series = pd.Series([timePoint.data for timePoint in analyte.time_points], index=[timePoint.time for timePoint in analyte.time_points]) + if sum(analyte.pd_series.index.duplicated()) > 0: + print(analyte.pd_series) + print(analyte.trial_identifier) + print(analyte.pd_series[analyte.pd_series.index.duplicated()]) + raise Exception('Duplicate time points found, this is not supported - likely an identifier input error') tf = sys_time.time() print("Parsed %i time points in %0.1fs" % (len(time_point_list), (tf - t0))) From a29d002f8169f3e2b17dd65ac8dbe62c50613755 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 00:08:46 -0500 Subject: [PATCH 07/20] fixes error in handling datasets shorter than savgol filter window --- impact/core/analytes/Base.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/impact/core/analytes/Base.py b/impact/core/analytes/Base.py index 681c5c9..23166c0 100644 --- a/impact/core/analytes/Base.py +++ b/impact/core/analytes/Base.py @@ -245,6 +245,8 @@ def calculate(self): def find_death_phase(self, data_vector): from ..settings import settings use_filtered_data = settings.use_filtered_data + if len(self.pd_series) < self.savgol_filter_window_size: + use_filtered_data = False verbose = settings.verbose self.death_phase_start = self.find_death_phase_static(data_vector, use_filtered_data = use_filtered_data, From 729a9a7ce6fae5fbf28301fbe8dfcc17e358b470 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 00:29:05 -0500 Subject: [PATCH 08/20] Fixes issue with outlier removal - iterative outlier removal was previously problematic. Also, fixes issue with blank subtraction where non-filtered data is thrown away and only filtered data is stored. --- impact/core/ReplicateTrial.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/impact/core/ReplicateTrial.py b/impact/core/ReplicateTrial.py index 90a8ae3..f9273e8 100644 --- a/impact/core/ReplicateTrial.py +++ b/impact/core/ReplicateTrial.py @@ -228,7 +228,6 @@ def calculate_statistics(self): self.avg.analyte_dict[analyte].pd_series = self.replicate_df[analyte].mean(axis=1) self.std.analyte_dict[analyte].pd_series = self.replicate_df[analyte].std(axis=1) - #This is the right way to calculate standard deviations for blank subtraction. You must add the two variances if self.blank_subtraction_flag and analyte in self.blank_subtracted_analytes: self.std.analyte_dict[analyte].pd_series = np.sqrt(np.square(self.std.analyte_dict[analyte].pd_series) + np.square(self.blank.std.analyte_dict[analyte].pd_series)) @@ -379,7 +378,7 @@ def prune_bad_replicates(self, analyte): # Remove outliers # A value between 0 and 1, > 1 means removing the replicate makes the yield worse # df = pd.DataFrame({key: np.random.randn(5) for key in ['a', 'b', 'c']}) - std_by_mean = np.mean(abs(backup.std(axis = 1)/backup.mean(axis = 1))) + std_by_mean = np.mean(abs(df.std(axis=1)/df.mean(axis=1))) temp_std_by_mean = {} for temp_remove_replicate in list(df.columns.values): indices = [replicate for i, replicate in enumerate(df.columns.values) if @@ -390,7 +389,7 @@ def prune_bad_replicates(self, analyte): # Remove outliers temp_std_by_mean[temp_remove_replicate] = np.mean(abs(temp_std / temp_mean)) temp_min_val = min([temp_std_by_mean[key] for key in temp_std_by_mean]) - if temp_min_val < std_deviation_cutoff and temp_min_val < std_by_mean: + if abs(temp_min_val-std_by_mean) >= std_deviation_cutoff and temp_min_val < std_by_mean: bad_replicate_cols.append( [key for key in temp_std_by_mean if temp_std_by_mean[key] == temp_min_val][0]) @@ -425,13 +424,13 @@ def substract_blank(self): for single_trial in self.single_trials: for blank_analyte in analytes_with_blanks: if blank_analyte in single_trial.analyte_dict: - single_trial.analyte_dict[blank_analyte].data_vector = \ - single_trial.analyte_dict[blank_analyte].data_vector \ - - self.blank.avg.analyte_dict[blank_analyte].data_vector + single_trial.analyte_dict[blank_analyte].pd_series = \ + single_trial.analyte_dict[blank_analyte].pd_series \ + - self.blank.avg.analyte_dict[blank_analyte].pd_series #single_trial.analyte_dict[blank_analyte].data_vector = \ # single_trial.analyte_dict[blank_analyte].data_vector.clip(min = 0) - self.blank_subtracted_analytes.append(blank_analyte) + self.blank_subtracted_analytes.append(blank_analyte) self.blank_subtracted_analytes = list(set(self.blank_subtracted_analytes)) self.blank_subtraction_flag = True From 78cc88a728231ebf7b0fc6c28555c180cbbb8ad2 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 01:02:35 -0500 Subject: [PATCH 09/20] fixes issue with blank subtraction to store subtracted data in the timepoint list --- impact/core/ReplicateTrial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/impact/core/ReplicateTrial.py b/impact/core/ReplicateTrial.py index f9273e8..118db02 100644 --- a/impact/core/ReplicateTrial.py +++ b/impact/core/ReplicateTrial.py @@ -427,7 +427,7 @@ def substract_blank(self): single_trial.analyte_dict[blank_analyte].pd_series = \ single_trial.analyte_dict[blank_analyte].pd_series \ - self.blank.avg.analyte_dict[blank_analyte].pd_series - + single_trial.analyte_dict[blank_analyte].generate_time_point_list() #single_trial.analyte_dict[blank_analyte].data_vector = \ # single_trial.analyte_dict[blank_analyte].data_vector.clip(min = 0) self.blank_subtracted_analytes.append(blank_analyte) From 04045ac0f65e79a8c2d5e3eb85fdd0270bd1163d Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 02:06:59 -0500 Subject: [PATCH 10/20] treat blanks as constants for stat calculations. makes calculations much simpler --- impact/core/ReplicateTrial.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/impact/core/ReplicateTrial.py b/impact/core/ReplicateTrial.py index 118db02..b7753c9 100644 --- a/impact/core/ReplicateTrial.py +++ b/impact/core/ReplicateTrial.py @@ -228,9 +228,9 @@ def calculate_statistics(self): self.avg.analyte_dict[analyte].pd_series = self.replicate_df[analyte].mean(axis=1) self.std.analyte_dict[analyte].pd_series = self.replicate_df[analyte].std(axis=1) - if self.blank_subtraction_flag and analyte in self.blank_subtracted_analytes: - self.std.analyte_dict[analyte].pd_series = np.sqrt(np.square(self.std.analyte_dict[analyte].pd_series) - + np.square(self.blank.std.analyte_dict[analyte].pd_series)) + #if self.blank_subtraction_flag and analyte in self.blank_subtracted_analytes: + # self.std.analyte_dict[analyte].pd_series = np.sqrt(np.square(self.std.analyte_dict[analyte].pd_series) + # + np.square(self.blank.std.analyte_dict[analyte].pd_series)) #Assume that stdev for all values <0 is simply 0 since negative values are forced to be 0. #Negative values of any analyte in this context is not possible #self.std.analyte_dict[analyte].pd_series[self.avg.analyte_dict[analyte].pd_series<=0] = 0 @@ -255,17 +255,17 @@ def calculate_statistics(self): feature_data = feature_object.data if feature_data is not None: single_trial_data.append(feature_data) - if self.blank: - with np.errstate(divide='ignore'): - - temp_var = np.square(feature_data)*(np.square(self.blank.std.analyte_dict[analyte].pd_series\ - - /trial.analyte_dict[analyte].pd_series)+ - np.square(self.blank.std.analyte_dict[biomass_analyte].pd_series - /trial.analyte_dict[biomass_analyte].pd_series)) - temp_var[trial.analyte_dict[analyte].pd_series == 0] = 0 - temp_var[trial.analyte_dict[biomass_analyte].pd_series == 0] = 0 - single_trial_var.append(temp_var) + #if self.blank: + # with np.errstate(divide='ignore'): + + # temp_var = np.square(feature_data)*(np.square(self.blank.std.analyte_dict[analyte].pd_series\ + # + # /trial.analyte_dict[analyte].data_vector)+ + # np.square(self.blank.std.analyte_dict[biomass_analyte].pd_series + # /trial.analyte_dict[biomass_analyte].data_vector)) + # temp_var[trial.analyte_dict[analyte].pd_series == 0] = 0 + # temp_var[trial.analyte_dict[biomass_analyte].pd_series == 0] = 0 + # single_trial_var.append(temp_var) if single_trial_data: rep_mean = sum(single_trial_data)/len(trial_list) else: @@ -275,8 +275,8 @@ def calculate_statistics(self): rep_var = pd.Series(data=np.var(single_trial_data,axis=0),index=trial_list[-1].analyte_dict[analyte].time_vector) # Variance on dataset due to blanks is average of individual standard deviation squared. # Total variance is variance due to blanks + variance between individual normalized datapoints - if self.blank: - rep_var = sum(single_trial_var)/np.square(len(single_trial_var)) + rep_var + #if self.blank: + # rep_var = sum(single_trial_var)/np.square(len(single_trial_var)) + rep_var setattr(self.std.analyte_dict[analyte], feature.name, np.sqrt(rep_var).values) setattr(self.avg.analyte_dict[analyte], feature.name, rep_mean) From 4ca315355712420d42927459b5bd0b4c998ba12a Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 02:07:40 -0500 Subject: [PATCH 11/20] accomodate OD700, OD660 as biomass analytes --- impact/core/Experiment.py | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/impact/core/Experiment.py b/impact/core/Experiment.py index c9977aa..9dfcb94 100644 --- a/impact/core/Experiment.py +++ b/impact/core/Experiment.py @@ -1,4 +1,5 @@ import sqlite3 as sql +import sqlite3 as sql import time from .AnalyteData import TimeCourse, Biomass, Product, Substrate, Reporter @@ -140,15 +141,21 @@ def calculate(self): repstage.calculate() print("Ran analysis in %0.1fs\n" % ((time.time() - t0))) - if settings.perform_curve_fit and 'OD600' in self.analyte_names: + biomass_analyte = None + if 'OD600' in self.analyte_names: + biomass_analyte = 'OD600' + elif 'OD700' in self.analyte_names: + biomass_analyte = 'OD700' + + if settings.perform_curve_fit and biomass_analyte: rep_list = [rep for rep in self.replicate_trials if rep.trial_identifier.strain.name.lower() not in ['blank', 'none']] rep_list = sorted(rep_list, key=lambda rep: str(rep.trial_identifier)) avg_list = [] error_list = [] for rep in rep_list: - avg_growth = rep.avg.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value - std_growth = rep.std.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value + avg_growth = rep.avg.analyte_dict[biomass_analyte].fit_params['growth_rate'].parameter_value + std_growth = rep.std.analyte_dict[biomass_analyte].fit_params['growth_rate'].parameter_value avg_list.append(avg_growth) error_list.append(std_growth / avg_growth * 100) max_growth_rate = max(avg_list) From ebeedb3985bf48948c358fd2458516cbbf43f913 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 02:08:27 -0500 Subject: [PATCH 12/20] accomodate OD700, OD660 as biomass analytes. Fix sorting issues with replicate trials --- impact/plotting.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/impact/plotting.py b/impact/plotting.py index 00723b0..4f3fa10 100644 --- a/impact/plotting.py +++ b/impact/plotting.py @@ -661,7 +661,7 @@ def time_profile_traces(replicate_trials=None, feature=None, analyte='OD600', co if colors is None: colors = get_colors(len(replicate_trials), colors=colors, cl_scales=cl_scales) - + replicate_trials = sorted(replicate_trials, key=lambda rep: str(rep.trial_identifier)) for index, replicate in enumerate(replicate_trials): # Determine how many points should be plotted required_num_pts = replicate.t[-1] * pts_per_hour @@ -1407,8 +1407,12 @@ def plot_growth_curve_fit(expt=None, format=None): fig['layout'].update(title='Growth curve fit for ' + str(rep.trial_identifier)) fig['layout']['yaxis1'].update(title='OD600') plot(fig, image=format) - avg_growth = rep.avg.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value - std_growth = rep.std.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value + if 'OD600' in rep.avg.analyte_dict: + avg_growth = rep.avg.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value + std_growth = rep.std.analyte_dict['OD600'].fit_params['growth_rate'].parameter_value + elif 'OD700' in rep.avg.analyte_dict: + avg_growth = rep.avg.analyte_dict['OD700'].fit_params['growth_rate'].parameter_value + std_growth = rep.std.analyte_dict['OD700'].fit_params['growth_rate'].parameter_value print("\u03BC\u2090\u1D65 = %3.3f \u00B1 %3.3f /h" % (avg_growth, std_growth)) else: print("Curve fitting was not implemented for this experiment. Please check Impact settings.") From 40dc64cadd44b6f485f1be72167b08bf8363e4b5 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Mon, 8 Mar 2021 02:13:56 -0500 Subject: [PATCH 13/20] fix issue with plotting error bars - error bars were previously plotted with the savgol filter --- impact/plotting.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/impact/plotting.py b/impact/plotting.py index 4f3fa10..1402d6e 100644 --- a/impact/plotting.py +++ b/impact/plotting.py @@ -291,7 +291,7 @@ def printGenericTimeCourse_plotly(replicateTrialList=None, dbName=None, strainsT else: y_avg = [replicate.avg.analyte_dict[product].data_vector for replicate in replicateTrialList if getattr(replicate.trial_identifier, sortBy) == unique or sort_by_flag is False] - y_std = [replicate.std.analyte_dict[product].data_vector for replicate in replicateTrialList + y_std = [replicate.std.analyte_dict[product].pd_series.values for replicate in replicateTrialList if getattr(replicate.trial_identifier, sortBy) == unique or sort_by_flag is False] label = ' titer (g/L)' @@ -491,7 +491,7 @@ def print_generic_timecourse_plotly(replicate_trial_list, product, colors, pts_p if product != 'OD600': dataLabel = '
titer (g/L)' y_avg = replicate.avg.analyte_dict[product].data_vector[::removePointFraction] - y_std = replicate.std.analyte_dict[product].data_vector[::removePointFraction] + y_std = replicate.std.analyte_dict[product].pd_series.values[::removePointFraction] elif normalize_to is not None: y_avg = replicate.avg.analyte_dict[product].get_normalized_data(normalize_to)[ ::removePointFraction] @@ -672,7 +672,7 @@ def time_profile_traces(replicate_trials=None, feature=None, analyte='OD600', co y=replicate.avg.analyte_dict[analyte].data_vector[::removePointFraction], error_y={ 'type' : 'data', - 'array' : replicate.std.analyte_dict[analyte].data_vector[::removePointFraction], + 'array' : replicate.std.analyte_dict[analyte].pd_series.values[::removePointFraction], 'visible': True, 'color' : colors[index]}, # mode=mode, @@ -735,7 +735,7 @@ def analyte_bar_trace(replicate_trials=None, feature=None, analyte='OD600', colo print("That is not a valid point") return None data_point = replicate.avg.analyte_dict[analyte].data_vector[index_to_plot] - error = replicate.std.analyte_dict[analyte].data_vector[index_to_plot] + error = replicate.std.analyte_dict[analyte].pd_series.values[index_to_plot] x_list.append(str(replicate.trial_identifier.strain) +" in " + str(replicate.trial_identifier.media)) y_list.append(data_point) From 33608489f8218991fb152fc67885e1f587fce38f Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 15:51:17 -0400 Subject: [PATCH 14/20] plotly.plotly deprecated --- impact/plotting.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/impact/plotting.py b/impact/plotting.py index 1402d6e..1e2f8d6 100644 --- a/impact/plotting.py +++ b/impact/plotting.py @@ -44,7 +44,6 @@ from plotly import tools import plotly.graph_objs as go -import plotly.plotly as py import colorlover as cl import math @@ -434,7 +433,6 @@ def render_output_ploty(output_type, fig, number_of_columns=None, column_width_m plotly.tools.set_credentials_file(username=plotly_username, api_key=plotly_api_key) fig['layout'].update(width=number_of_columns * column_width_multiplier) random_file_name = ''.join(random.choice(string.ascii_letters) for _ in range(10)) + '.png' - py.image.save_as(fig, random_file_name, scale=img_scale) return random_file_name From fbddce5b9825f47dd1114bf2e20431134ea15e8c Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 15:52:06 -0400 Subject: [PATCH 15/20] fixes issue with exception being raised if curve fitting is enabled for any expt containing products --- impact/core/analytes/Product.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/impact/core/analytes/Product.py b/impact/core/analytes/Product.py index a305f4c..486edb0 100644 --- a/impact/core/analytes/Product.py +++ b/impact/core/analytes/Product.py @@ -15,7 +15,5 @@ def curve_fit_data(self): from ..settings import settings verbose = settings.verbose - if self.trial_identifier.analyte_type == 'product': - raise Exception('Product curve fitting not implemented') - else: + if self.trial_identifier.analyte_type != 'product': raise Exception('Incorrect analyte_type') From 7b2e5004f2fd050239659d29495a1ccc3855e2dd Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 15:52:43 -0400 Subject: [PATCH 16/20] re-enable proper stdev calculations for features --- impact/core/ReplicateTrial.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/impact/core/ReplicateTrial.py b/impact/core/ReplicateTrial.py index b7753c9..acc645b 100644 --- a/impact/core/ReplicateTrial.py +++ b/impact/core/ReplicateTrial.py @@ -228,9 +228,9 @@ def calculate_statistics(self): self.avg.analyte_dict[analyte].pd_series = self.replicate_df[analyte].mean(axis=1) self.std.analyte_dict[analyte].pd_series = self.replicate_df[analyte].std(axis=1) - #if self.blank_subtraction_flag and analyte in self.blank_subtracted_analytes: - # self.std.analyte_dict[analyte].pd_series = np.sqrt(np.square(self.std.analyte_dict[analyte].pd_series) - # + np.square(self.blank.std.analyte_dict[analyte].pd_series)) + if self.blank_subtraction_flag and analyte in self.blank_subtracted_analytes: + self.std.analyte_dict[analyte].pd_series = np.sqrt(np.square(self.std.analyte_dict[analyte].pd_series) + + np.square(self.blank.std.analyte_dict[analyte].pd_series)) #Assume that stdev for all values <0 is simply 0 since negative values are forced to be 0. #Negative values of any analyte in this context is not possible #self.std.analyte_dict[analyte].pd_series[self.avg.analyte_dict[analyte].pd_series<=0] = 0 @@ -255,17 +255,17 @@ def calculate_statistics(self): feature_data = feature_object.data if feature_data is not None: single_trial_data.append(feature_data) - #if self.blank: - # with np.errstate(divide='ignore'): - - # temp_var = np.square(feature_data)*(np.square(self.blank.std.analyte_dict[analyte].pd_series\ - # - # /trial.analyte_dict[analyte].data_vector)+ - # np.square(self.blank.std.analyte_dict[biomass_analyte].pd_series - # /trial.analyte_dict[biomass_analyte].data_vector)) - # temp_var[trial.analyte_dict[analyte].pd_series == 0] = 0 - # temp_var[trial.analyte_dict[biomass_analyte].pd_series == 0] = 0 - # single_trial_var.append(temp_var) + if self.blank: + with np.errstate(divide='ignore'): + + temp_var = np.square(feature_data)*(np.square(self.blank.std.analyte_dict[analyte].pd_series\ + + /trial.analyte_dict[analyte].data_vector)+ + np.square(self.blank.std.analyte_dict[biomass_analyte].pd_series + /trial.analyte_dict[biomass_analyte].data_vector)) + temp_var[trial.analyte_dict[analyte].pd_series == 0] = 0 + temp_var[trial.analyte_dict[biomass_analyte].pd_series == 0] = 0 + single_trial_var.append(temp_var) if single_trial_data: rep_mean = sum(single_trial_data)/len(trial_list) else: @@ -275,8 +275,8 @@ def calculate_statistics(self): rep_var = pd.Series(data=np.var(single_trial_data,axis=0),index=trial_list[-1].analyte_dict[analyte].time_vector) # Variance on dataset due to blanks is average of individual standard deviation squared. # Total variance is variance due to blanks + variance between individual normalized datapoints - #if self.blank: - # rep_var = sum(single_trial_var)/np.square(len(single_trial_var)) + rep_var + if self.blank: + rep_var = sum(single_trial_var)/np.square(len(single_trial_var)) + rep_var setattr(self.std.analyte_dict[analyte], feature.name, np.sqrt(rep_var).values) setattr(self.avg.analyte_dict[analyte], feature.name, rep_mean) From 82241045f51981fd06bd8f5eb5ff5ff58792e3d1 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 16:25:19 -0400 Subject: [PATCH 17/20] testing failing build with 3.6 --- tests/test_database.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/test_database.py b/tests/test_database.py index 2481569..d4488eb 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -68,11 +68,13 @@ def test_time_course(self): tc.add_timepoint(impt.TimePoint(trial_identifier=ti, time=time, data=data)) tc.pd_series = pd.Series([timePoint.data for timePoint in tc.time_points],index=[timePoint.time for timePoint in\ tc.time_points]) + print(tc.data_vector) self.session.add(tc) self.session.commit() del tc tc = self.session.query(impt.TimeCourse).all()[0] + print(tc.data_vector) tc.calculate() self.assertCountEqual(tc.data_vector,[0,5,10]) From 7c569ba68012ad78a2b290a4154e3b7ef177ff8f Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 16:53:19 -0400 Subject: [PATCH 18/20] fixes issue with sqlalchemy's latest version. --- requirements.txt | 2 +- tests/test_database.py | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 0260975..e892ccb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -sqlalchemy +sqlalchemy==1.3.23 numpy>=1.10.4 pandas scipy>=0.17.0 diff --git a/tests/test_database.py b/tests/test_database.py index d4488eb..2481569 100644 --- a/tests/test_database.py +++ b/tests/test_database.py @@ -68,13 +68,11 @@ def test_time_course(self): tc.add_timepoint(impt.TimePoint(trial_identifier=ti, time=time, data=data)) tc.pd_series = pd.Series([timePoint.data for timePoint in tc.time_points],index=[timePoint.time for timePoint in\ tc.time_points]) - print(tc.data_vector) self.session.add(tc) self.session.commit() del tc tc = self.session.query(impt.TimeCourse).all()[0] - print(tc.data_vector) tc.calculate() self.assertCountEqual(tc.data_vector,[0,5,10]) From bca986846a097d02a9673b3140a14a9eabd975a8 Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 16:53:38 -0400 Subject: [PATCH 19/20] can handle newer versions of plotly now --- requirements_plotting.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements_plotting.txt b/requirements_plotting.txt index 441ed02..42c2b99 100644 --- a/requirements_plotting.txt +++ b/requirements_plotting.txt @@ -1,3 +1,3 @@ -plotly==2.7.0 +plotly colorlover matplotlib>=1.5.1 \ No newline at end of file From 18a50a3b291d739f7c6a615df30feff227fce74e Mon Sep 17 00:00:00 2001 From: Kaushik Date: Wed, 12 May 2021 17:38:22 -0400 Subject: [PATCH 20/20] cleaning up unused packages --- impact/core/Experiment.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/impact/core/Experiment.py b/impact/core/Experiment.py index 9dfcb94..f57c5e2 100644 --- a/impact/core/Experiment.py +++ b/impact/core/Experiment.py @@ -1,18 +1,9 @@ -import sqlite3 as sql -import sqlite3 as sql import time from .AnalyteData import TimeCourse, Biomass, Product, Substrate, Reporter from .ReplicateTrial import ReplicateTrial from .SingleTrial import SingleTrial -try: - from pyexcel_xlsx import get_data -except ImportError as e: - print('Could not import pyexcel') - print(e) - pass - from ..database import Base from sqlalchemy import Column, Integer, ForeignKey, Float, Date, String, event from sqlalchemy.orm import relationship