Hi! I’m using the Artifact API to export all the results of beta diversity group significance testing to a csv. I’ve been able to do this with the alpha diversity significance and correlation tests, and I have been able to get this to work for most of the columns in my metadata, but there are a few problematic ones.
I’ve included a simplified version of the code that still returns the error below. The actual code loops through the metrics and categorical metadata columns.
I’m running QIIME2 2020.6 and python 3.6.10
Just to make sure it’s clear, the general structure is to:
- Run the core_metrics_phylogenetic pipeline
- Export the visualization from group significance testing
- Parse the resulting index.html file, extracting the relevant info
- Create a data frame from the information
- When looping through the columns, I concat each dataframe to make a main dataframe with the results of all columns for each specific metric
- Export each metric’s dataframe as a csv (not included)
feattable = Artifact.load('noMito_noChloro-filtered-table.qza')
tree = Artifact.load('insertion-tree.qza')
metadata = Metadata.load('meta-v3.tsv')
core_metrics = diversity.pipelines.core_metrics_phylogenetic(table = feattable,
phylogeny = tree,
sampling_depth = 13100,
metadata = metadata_API)
metric = 'jaccard'
column = 'Race_Ethnicity-cat'
output_dir = f'{metric}_sig_{column}'
matrix = getattr(core_metrics, f'{metric}_distance_matrix')
df = pd.DataFrame()
try:
significance = beta_group_significance(distance_matrix=matrix,
metadata=metadata_API.get_column(column))
significance.visualization.export_data(output_dir)
with open(f'{output_dir}/index.html') as f:
soup = BeautifulSoup(f, 'html.parser')
keys = [key.string for key in soup.find_all('th')[2:]]
#keys output is ['method name','test statistic name','sample size','number of groups',
# 'test statistic','p-value','number of permutations']
values = np.array([value.string for value in soup.find_all('td')])
values.reshape((1,7))
#values is a vertical array with the corresponding values for each key
col_index.append(column)
#Make the data frame for the current column, concatenate it with the one for other columns
#under the given metric
#look into changing to df[f'{column}'] == values? Bigger fish to fry rn though
dfn = pd.DataFrame(values,index=keys)
df = pd.concat([df,dfn],axis=1)
except ValueError:
print(f'{column} could not be added, as each sample is in a different category.\n Moving to the next column.')
The error I’m met with is:
FileNotFoundError: [Errno 2] No such file or directory:
'/var/folders/t7/vhs8v0wd2xqb851ps1hzn0mm0000gn/T/qiime2-temp-ier4fo3c/Asian/Pacific%20Islander-boxplots.png'.
The specific line for the traceback is metadata=metadata_API.get_column(column))
The whole traceback is:
`FileNotFoundError Traceback (most recent call last).`
`<ipython-input-58-8ae548feea3a> in <module>`
` 16 try:`
`17 significance = beta_group_significance(distance_matrix=matrix,`
` ---> 18 metadata=metadata_API.get_column(column))`
`19 significance.visualization.export_data(output_dir)`
<decorator-gen-432> in beta_group_significance(distance_matrix, metadata, method, pairwise, permutations)
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/qiime2/sdk/action.py in bound_callable(*args, **kwargs)
243 # Execute
244 outputs = self._callable_executor_(scope, callable_args,
--> 245 output_types, provenance)
246
247 if len(outputs) != len(self.signature.outputs):
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/qiime2/sdk/action.py in _callable_executor_(self, scope, view_args, output_types, provenance)
450 # will also need to be updated to support OutPath instead of str.
451 with tempfile.TemporaryDirectory(prefix='qiime2-temp-') as temp_dir:
--> 452 ret_val = self._callable(output_dir=temp_dir, **view_args)
453 if ret_val is not None:
454 raise TypeError(
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/q2_diversity/_beta/_visualizer.py in beta_group_significance(output_dir, distance_matrix, metadata, method, pairwise, permutations)
194 fig = ax.get_figure()
195 fig.savefig(os.path.join(output_dir, '%s-boxplots.png' %
--> 196 urllib.parse.quote(str(group_id))))
197 fig.savefig(os.path.join(output_dir, '%s-boxplots.pdf' %
198 urllib.parse.quote(str(group_id))))
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/matplotlib/figure.py in savefig(self, fname, transparent, **kwargs)
2201 self.patch.set_visible(frameon)
2202
-> 2203 self.canvas.print_figure(fname, **kwargs)
2204
2205 if frameon:
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/matplotlib/backend_bases.py in print_figure(self, filename, dpi, facecolor, edgecolor, orientation, format, bbox_inches, **kwargs)
2124 orientation=orientation,
2125 bbox_inches_restore=_bbox_inches_restore,
-> 2126 **kwargs)
2127 finally:
2128 if bbox_inches and restore_bbox:
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/matplotlib/backends/backend_agg.py in print_png(self, filename_or_obj, metadata, pil_kwargs, *args, **kwargs)
533 else:
534 renderer = self.get_renderer()
--> 535 with cbook.open_file_cm(filename_or_obj, "wb") as fh:
536 _png.write_png(renderer._renderer, fh, self.figure.dpi,
537 metadata={**default_metadata, **metadata})
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/contextlib.py in __enter__(self)
79 def __enter__(self):
80 try:
---> 81 return next(self.gen)
82 except StopIteration:
83 raise RuntimeError("generator didn't yield") from None
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/matplotlib/cbook/__init__.py in open_file_cm(path_or_file, mode, encoding)
416 def open_file_cm(path_or_file, mode="r", encoding=None):
417 r"""Pass through file objects and context-manage `.PathLike`\s."""
--> 418 fh, opened = to_filehandle(path_or_file, mode, True, encoding)
419 if opened:
420 with fh:
/opt/anaconda3/envs/qiime2-2020.6/lib/python3.6/site-packages/matplotlib/cbook/__init__.py in to_filehandle(fname, flag, return_opened, encoding)
401 fh = bz2.BZ2File(fname, flag)
402 else:
--> 403 fh = open(fname, flag, encoding=encoding)
404 opened = True
405 elif hasattr(fname, 'seek'):
FileNotFoundError: [Errno 2] No such file or directory: '/var/folders/t7/vhs8v0wd2xqb851ps1hzn0mm0000gn/T/qiime2-temp-ier4fo3c/Asian/Pacific%20Islander-boxplots.png'
Any ideas as to why I’m getting this error for this column? I also get it for my date collected column, which I’m meaning to convert to not be categorical, so that isn’t quite as important.
Is there a way to accomplish this without exporting the data and then parsing it? I couldn’t find a way to pull it directly from the artifact in the API.
(Also, I apologize if my code is a little clunky, I’m still somewhat new to python, and I haven’t had a chance to clean this code up yet.)
Thanks!