@misc{righetti2019pags, author={Damiano {Righetti} and Meike {Vogt} and Niklaus E {Zimmermann} and Michael D {Guiry} and Nicolas {Gruber}}, title={{PHYTOBASE: A global synthesis of open ocean phytoplankton occurrences}}, year={2019}, doi={10.1594/PANGAEA.904397}, url={https://doi.org/10.1594/PANGAEA.904397}, abstract={Marine phytoplankton are responsible for half of the global net primary production and perform multiple other ecological functions and services of the global ocean. These photosynthetic organisms comprise more than 4300 marine species, but their biogeographic patterns and the resulting species diversity are poorly known, mostly owing to severe data limitations. Here, we compile, synthesize, and harmonize marine phytoplankton occurrence data from the two largest biological occurrence archives (Ocean Biogeographic Information System; OBIS, and Global Biodiversity Information Facility; GBIF) and three recent data collections. The resulting PhytoBase data set contains over 1.36 million marine phytoplankton occurrence records (1.28 million at the level of species) for a total of 1704 species, spanning the principal groups of the Bacillariophyceae, Dinoflagellata, and Haptophyta as well as several other groups. This data compilation increases the amount of phytoplankton occurrence data available through the single largest contributing archive (OBIS) by 65{\%}. Data span all ocean basins, latitudes and most seasons. Analyzing the oceanic inventory of sampled phytoplankton species richness at the broadest spatial scales possible, using a resampling procedure, we find that richness tends to saturate in the pantropics at {\textasciitilde}93{\%} of all species in our database, at {\textasciitilde}64{\%} in temperate waters, and at {\textasciitilde}35{\%} in the cold Northern Hemisphere, while the Southern Hemisphere remains underexplored. We provide metadata on the cruise, research institution, depth, and date for each occurrence record. Cell-counts for 193 763 records are also included. We strongly recommend consideration of global spatiotemporal biases in sampling intensity and varying taxonomic sampling scopes between research programs when analyzing the occurrence database. Including such information into statistical analysis tools, such as species distribution models, may serve to project the diversity, niches, and distribution of species in the contemporary and future ocean, opening the door for a quantification of macro-ecological phytoplankton patterns.}, type={data set}, publisher={PANGAEA} }