Skip to content
Snippets Groups Projects

Compare revisions

Changes are shown as if the source revision was being merged into the target revision. Learn more about comparing revisions.

Source

Select target project
No results found

Target

Select target project
  • faproietti/ar2018
  • chierici/ar2018
  • SDDS/ar2018
  • cnaf/annual-report/ar2018
4 results
Show changes
Commits on Source (213)
Showing
with 770 additions and 305 deletions
......@@ -63,12 +63,12 @@ fi
cd ${builddir}
# prepare cover
#link_pdf cover cover.pdf
#link_pdf experiment experiment.pdf
#link_pdf datacenter datacenter.pdf
#link_pdf research research.pdf
#link_pdf transfer transfer.pdf
#link_pdf additional additional.pdf
link_pdf cover cover.pdf
link_pdf experiment experiment.pdf
link_pdf datacenter datacenter.pdf
link_pdf research research.pdf
link_pdf transfer transfer.pdf
link_pdf additional additional.pdf
build_from_source user-support main.tex *.PNG
build_from_source ams AMS-report-2019.tex AMS_nuovo.pdf contributors.pdf He-MC.pdf input_output.jpg production_jobs.jpg
......@@ -80,10 +80,10 @@ link_pdf belle Cnaf-2019-5.0.pdf
#build_from_source cosa cosa.tex biblio.bib beegfs.PNG
build_from_source cnprov cnprov.tex
build_from_source cta CTA_annualreport_2018_v1.tex *.eps
link_pdf cuore cuore.pdf
build_from_source cuore cuore.tex cuore.bib
build_from_source cupid main.tex cupid-biblio.bib
build_from_source dampe main.tex *.jpg *.png
link_pdf darkside ds-annual-report-2019.pdf
build_from_source darkside ds-annual-report-2019.tex
#build_from_source eee eee.tex EEEarch.eps EEEmonitor.eps EEEtracks.png ELOGquery.png request.png
#build_from_source exanest exanest.tex biblio.bib monitoring.PNG storage.png
#build_from_source fazia fazia.tex
......@@ -106,11 +106,11 @@ build_from_source xenon main.tex xenon-computing-model.pdf
build_from_source sc18 SC18.tex *.png
#build_from_source mw-esaco mw-esaco.tex *.png
#build_from_source mw-kube mw-kube.tex
#build_from_source mw-cdmi-storm mw-cdmi-storm.tex *.png *.jpeg
#build_from_source mw-software mw-software.tex
#build_from_source mw-iam mw-iam.tex
## Research and Developments
build_from_source sd_iam main.tex biblio.bib *.png
build_from_source sd_storm main.tex biblio.bib *.png
build_from_source sd_storm2 main.tex biblio.bib *.png
build_from_source sd_nginx_voms main.tex biblio.bib *.png
#build_from_source na62 na62.tex
link_pdf padme 2019_PADMEcontribution.pdf
......@@ -123,7 +123,7 @@ build_from_source tier1 tier1.tex *.png
build_from_source HTC_testbed HTC_testbed_AR2018.tex
build_from_source farming ARFarming2018.tex *.png *.jpg
#build_from_source dynfarm dynfarm.tex
build_from_source storage storage.tex
build_from_source storage storage.tex *.PNG
#build_from_source seagate seagate.tex biblio.bib *.png *.jpg
#build_from_source dataclient dataclient.tex
#build_from_source ltpd ltpd.tex *.png
......@@ -132,24 +132,25 @@ build_from_source net main.tex *.png
#build_from_source ssnn2 vmware.tex *.JPG *.jpg
#build_from_source infra Chiller.tex chiller-location.png
build_from_source audit Audit-2018.tex
build_from_source audit Audit-2018.tex image.png
#build_from_source cloud_cnaf cloud_cnaf.tex *.png
build_from_source dmsq dmsq2018.tex ar2018.bib
#build_from_source st StatMet.tex sm2017.bib
build_from_source ds_eoscpilot ds_eoscpilot.tex *.png
build_from_source ds_eoschub ds_eoschub.tex
build_from_source ds_eoschub ds_eoschub.tex *.png
build_from_source ds_cloud_c ds_cloud_c.tex *.png
build_from_source ds_infn_cc ds_infn_cc.tex *.png
build_from_source ds_devops_pe ds_devops_pe.tex
build_from_source ds_devops_pe ds_devops_pe.tex *.png
#build_from_source cloud_b cloud_b.tex *.png *.jpg
#build_from_source cloud_c cloud_c.tex *.png *.pdf
#build_from_source cloud_d cloud_d.tex *.png
build_from_source sdds-xdc SDDS-XDC.tex *.png
build_from_source sdds-deep SDDS-DEEP.tex *.png
build_from_source PhD_DataScience_2018 PhD-DataScience-2018.tex
build_from_source chnet dhlab.tex *.png
build_from_source pett pett.tex bibliopett.bib
#build_from_source iso iso.tex 27001.png biblioiso.bib
build_from_source summerstudent summerstudent.tex *.png
pdflatex ${topdir}/cnaf-annual-report-2018.tex \
&& pdflatex ${topdir}/cnaf-annual-report-2018.tex 2> /dev/null \
......
......@@ -28,7 +28,7 @@
%\author{}
%\maketitle
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/cover.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/cover.pdf}
\newpage
\thispagestyle{empty}
......@@ -82,7 +82,46 @@ Tel. +39 051 209 5475, Fax +39 051 209 5477\\
\markboth{\MakeUppercase{Introduction}}{\MakeUppercase{Introduction}}
\chapter*{Introduction}
\thispagestyle{plain}
Introducing the sixth annual report of CNAF...
\small The first months of 2018 were still affected by the effects of the flooding suffered in November 2017 and it was only in March 2018
that our data center was able to resume its full activity.
Despite this, the overall performance of the Tier 1 for the LHC experiments and for the many other astroparticle and nuclear physics experiments was very good,
and enough to place CNAF's Tier 1 among the most productive ones in the WLCG ecosystem, as the reports of the experiments in this document show.
Even the activities of both the HPC clusters and the Cloud@CNAF infrastructure resumed regular operations after the systems have been brought back to CNAF
from the sites that had temporarily hosted them.
The flooding had indeed beneficial repercussions in speeding up the decision to find a new location for our data center.
The move was already planned in order to face the challenges of High-Luminosity LHC and of the astroparticle experiments that will begin their data acquisition
in the second half of 2020, but the dramatic event of November 2017 made the fragility and weaknesses of the current installation clear.
Also, during 2018 three events have matured paving the way for the definition of a development strategy towards both a new site and a new computing model,
that includes the possibility to exploit the computing power of the HPC systems: the availability of a big area such as Bologna Tecnopolo where to install
our new data center; the possibility of a joint upgrade together with the Italian supercomputing center CINECA thanks to European and Italian funding;
the additional funds from the Italian Government for a project aimed at strengthening the INFN computing infrastructures.
Our R\&D activities have proceeded regularly, meeting the expected milestones and deliverables.
In particular, the path towards a European Open Science Cloud (EOSC) has seen significant progress thanks to the EOSCHub and EOSCPilot projects,
in both of which CNAF plays an important role. Contributions to the EOSC have also come from other H2020 projects in which we are involved,
namely XDC-eXtreme Data Cloud, which focuses mainly on data management services evolved for a context of distributed resources,
and DEEP-Hybrid DataCloud, which addresses the need to support intensive computing techniques, requiring specialized HPC hardware,
to explore very large data sets.
The External Projects and Technology Transfer (PETT) Organizational Unit has contributed to various projects in the field of computing,
communication of science, technology transfer and education. Great effort has been dedicated to the consolidation of the Technology Transfer Laboratory (INFN-TTLab),
a collaboration between CNAF and the INFN divisions of Bologna and Ferrara with the goal of promoting the transfer of our know-how towards regional enterprises.
2018 has also been the first full year in which the TTLab operated an ISO-27001 ISMS consisting of a subset of the Data Center resources.
Such certification, which was acquired in order to be qualified for storing and managing sensitive data,
could open new opportunities of exploitation of our resources in the next future.
Also noteworthy is the involvement of CNAF in the INFN Cultural Heritage Network (CHNet),
where our expertise in Cloud technologies and software development is put to good use for the preparation of a digital library
where members of the network can safely store their datasets and have access to applications for their processing.
This report about the accomplishments of CNAF during 2018 arrives just at the end of 2019.
The delay is due to higher-priority commitments that have overlapped with its finalization,
but we are well aware that such situation affects its usefulness as a means of transparency towards our stakeholders
and of recognition of the hard work and dedication of the personnel of the Center.
To prevent similar situations in the future we are adopting some corrections to the editing process
already for the report about the year 2019, and we are also planning some interesting surprises that we hope will please our readers.
\begin{flushright}
\parbox{0.7\textwidth}{
......@@ -127,7 +166,7 @@ Introducing the sixth annual report of CNAF...
%\addcontentsline{toc}{chapter}{Scientific Exploitation of CNAF ICT Resources}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/esperiment.pdf}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
%\ip{Scientific Exploitation of CNAF ICT Resources}
......@@ -141,10 +180,10 @@ Introducing the sixth annual report of CNAF...
\phantomsection
\addcontentsline{toc}{part}{Scientific Exploitation of CNAF ICT Resources}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/experiment.pdf}
\cleardoublepage
\ia{User and Operational Support at CNAF}{user-support}
\ia{ALICE computing at the INFN CNAF Tier1}{alice}
\ia{ALICE computing at the INFN CNAF Tier 1}{alice}
\ia{AMS-02 data processing and analysis at CNAF}{ams}
\ia{The ATLAS experiment at the INFN CNAF Tier 1}{atlas}
\ia{The Borexino experiment at the INFN-CNAF}{borexino}
......@@ -182,13 +221,13 @@ Introducing the sixth annual report of CNAF...
\phantomsection
\addcontentsline{toc}{part}{The Tier 1 and Data center}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/datacenter.pdf}
\ia{The INFN Tier-1}{tier1}
\ia{The INFN-Tier1: the computing farm}{farming}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/datacenter.pdf}
\ia{The INFN Tier 1}{tier1}
\ia{The INFN-Tier 1: the computing farm}{farming}
\ia{Data management and storage systems}{storage}
%\ia{Evaluation of the ClusterStor G200 Storage System}{seagate}
%\ia{Activity of the INFN CNAF Long Term Data Preservation (LTDP) group}{ltpd}
\ia{The INFN-Tier1: Network and Security}{net}
\ia{The INFN-Tier 1: Network and Security}{net}
%\ia{Cooling system upgrade and Power Usage Effectiveness improvement in the INFN CNAF Tier 1 infrastructure}{infra}
%\ia{National ICT Services Infrastructure and Services}{ssnn1}
%\ia{National ICT Services hardware and software infrastructures for Central Services}{ssnn2}
......@@ -202,48 +241,46 @@ CREAM-CE/LSF to HTCondor-CE/HTCondor}{HTC_testbed}
\phantomsection
\addcontentsline{toc}{part}{Research and Developments}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/research.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/research.pdf}
\cleardoublepage
\ia{Internal Auditing INFN for GDPR compliance}{audit}
%\ia{Continuous Integration and Delivery with Kubernetes}{mw-kube}
%\ia{Middleware support, maintenance and development}{mw-software}
%\ia{Evolving the INDIGO IAM service}{mw-iam}
%\ia{Esaco: an OAuth/OIDC token introspection service}{mw-esaco}
%\ia{StoRM Quality of Service and Data Lifecycle support through CDMI}{mw-cdmi-storm}
%\ia{A low-cost platform for space software development}{lowcostdev}
\ia{Evolving the INDIGO IAM service}{sd_iam}
\ia{StoRM maintenance and evolution}{sd_storm}
\ia{StoRM 2: initial design and development activities}{sd_storm2}
\ia{A VOMS module for the Nginx web server}{sd_nginx_voms}
\ia{Comparing Data Mining Techniques for Software Defect Prediction}{dmsq}
%\ia{Summary of a tutorial on statistical methods}{st}
%\ia{Dynfarm: Transition to Production}{dynfarm}
%\ia{Official testing and increased compatibility for Dataclient}{dataclient}
%\ia{Common software lifecycle management in external projects: Placeholder}{ds_devops_pe}
%\ia{EOSC-hub: Placeholder}{ds_eoschub}
\ia{Common software lifecycle management in external projects:}{ds_devops_pe}
\ia{EOSC-hub: contributions to project achievements}{ds_eoschub}
\ia{EOSCpilot - Interoperability aspects and results}{ds_eoscpilot}
\ia{Cloud@CNAF Management and Evolution}{ds_cloud_c}
\ia{INFN CorporateCloud: Management and evolution}{ds_infn_cc}
\ia{eXtreme DataCloud project: Advanced data management services for distributed e-infrastructures}{sdds-xdc}
\ia{DEEP-HybridDataCloud project: Hybrid services for distributed e-infrastructures}{sdds-deep}
\ia{DHLab: a digital library for the INFN Cultural Heritage Network}{chnet}
\cleardoublepage
\thispagestyle{empty}
\phantomsection
\addcontentsline{toc}{part}{Technology transfer and other projects}
\addcontentsline{toc}{part}{Technology transfer, outreach and more}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/transfer.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/transfer.pdf}
\cleardoublepage
\ia{External Projects and Technology Transfer}{pett}
%\ia{The ISO 27001 Certification}{iso}
%\ia{COmputing on SoC Architectures: the COSA project at CNAF}{cosa}
%\ia{The ExaNeSt project - activities at CNAF}{exanest}
\ia{INFN CNAF log analysis: a first experience with summer students}{summerstudent}
\ia{The annual international conference of high performance computing: SC18 from INFN point of view}{sc18}
\ia{Infrastructures and Big Data processing as pillars in the XXXIII PhD course in Data Science and Computation}{PhD_DataScience_2018}
\ia{Internal Auditing INFN for GDPR compliance}{audit}
\cleardoublepage
\thispagestyle{empty}
\phantomsection
\addcontentsline{toc}{part}{Additional information}
\addtocontents{toc}{\protect\mbox{}\protect\hrulefill\par}
%\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/additional.pdf}
\includepdf[pages=1, pagecommand={\thispagestyle{empty}}]{papers/additional.pdf}
\cleardoublepage
\ia{Infrastructures and Big Data processing as pillars in the XXXIII PhD couse in Data Sciece and Computation}{PhD_DataScience_2018}
\ia{The annual international conference of high performance computing: SC18 from INFN point of view}{sc18}
\phantomsection
\addcontentsline{toc}{chapter}{Organization}
......@@ -262,14 +299,14 @@ Gaetano Maron
\subsection*{Scientific Advisory Panel}
\begin{tabular}{ l l p{7cm} }
\textit{Chairperson} & Michael Ernst & \textit{\small Brookhaven National Laboratory, USA} \\
& Gian Paolo Carlino & \textit{\small INFN -- Sezione di Napoli, Italy} \\
& Patrick Fuhrmann & \textit{\small Deutsches Elektronen-Synchrotron, Germany} \\
& Josè Hernandez & \textit{\small Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas, Spain} \\
& Donatella Lucchesi & \textit{\small Università di Padova, Italy} \\
& Vincenzo Vagnoni & \textit{\small INFN -- Sezione di Bologna, Italy} \\
& Pierre-Etienne Macchi & \textit{\small IN2P3/CNRS, France}
\textit{Chairperson} & Eleonora Luppi & \textit{\small Università di Ferrara, Italy} \\
& Roberto Saban & \textit{\small INFN, Italy} \\
& Laura Perini & \textit{\small Università di Milano, Italy} \\
& Volker Beckman & \textit{\small IN2P3, France} \\
& Volker Guelzow & \textit{\small Deutsches Elektronen-Synchrotron, Germany} \\
& Alberto Pace & \textit{\small CERN} \\
& Eric Lancon & \textit{\small Brookhaven National Laboratory, USA} \\
& Josè Hernandez & \textit{\small Centro de Investigaciones Energéticas, Medioambientales y Tecnológicas, Spain}
\end{tabular}
% open local environment where the format of section and subsection
......
......@@ -11,15 +11,15 @@
\begin{document}
\title{Evaluating Migration of INFN--T1 from CREAM-CE/LSF to
\title{Evaluating Migration of INFN--Tier 1 from CREAM-CE/LSF to
HTCondor-CE/HTCondor}
\author{Stefano Dal Pra$^1$}
\address{$^1$ INFN-CNAF, viale Berti-Pichat 6/2, 40127 Bologna, Italy}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{stefano.dalpra@cnaf.infn.it}
\begin{abstract}
The Tier--1 datacentre provides computing resources for a variety of HEP and
The Tier 1 data center provides computing resources for a variety of HEP and
Astrophysics experiments, organized in Virtual Organization submitting their
jobs to our computing facilities through Computing Elements, acting as Grid
interfaces to the Local Resource Manager. We planned to phase--out our
......@@ -32,7 +32,7 @@ HTCondor-CE/HTCondor}
\section{Introduction}
The INFN-T1 currently provides a computing power of about 400KHS06, 35000
The INFN-Tier 1 currently provides a computing power of about 400 kHS06, 35000
slots on one thousand physical Worker Nodes. These resources are accessed
through Grid by 24 Grid VOs and locally by 25 user groups.
......@@ -48,10 +48,10 @@ an effective solution for several years. However, the compatibility between
CREAM and HTCondor seems to be less tight than with LSF. Moreover, active
development of CREAM has recently ceased and thus we cannot expect new
versions to be released, nor better HTCondor support to be implemented by an
officiale development team. We decided to migrate our batch system solution
official development team. We decided to migrate our batch system solution
from LSF to HTCondor, thus we need to also change our CEs. We have selected
HTCondor-CE as a natural choice, because it is maintained by the same
development team of HTCondor. Following we provide a report about our
development team of HTCondor. In the following we provide a report about our
experience with HTCondor and HTCondor--CE.
\section{The HTCondor cluster}
......@@ -62,7 +62,7 @@ accounting systems can be integrated with HTCondor and HTCondor--CE and
finally to devise a reasonable migration plan, a simple small HTCondor 8.6.13
cluster has been set up during spring 2018. A HTCondor--CE was soon added, in
late April. HTCondor is a very mature opensource product, deployed at several
major Tier-1 for years, thus we already know that it will certainly fit our
major Tier 1 for years, thus we already know that it will certainly fit our
use cases. The HTCondor--CE, on the other hand, is a more recent product, and
a number of issues might be too problematic for us to deal with. Our focus is
about ensuring that this CE implementation can be a viable solution for us.
......@@ -71,7 +71,7 @@ about ensuring that this CE implementation can be a viable solution for us.
The test cluster consists of:
\begin{itemizedot}
\item a HTCondor-CE on top of
\item a HTCondor--CE on top of
\item a HTCondor \ Central Manager and Collector
......@@ -82,24 +82,24 @@ The test cluster consists of:
The first CE installation was a bit tricky. The RPMs were available from OSG
repositories only, meaning that a number of default settings and dependencies
were unmet for EGI standards. Short after however, HTCondor--CE RPMs were made
available on \ the same official repository of HTCondor.
were unmet for EGI standards. Short after, however, HTCondor--CE RPMs were made
available on the same official repository of HTCondor.
\subsubsection{Setup}
To setup the configuration for the HTCondor and HTCondor--CE puppet modules
To setup the configuration for the HTCondor and HTCondor--CE, puppet modules
are available. Unfortunately the puppet system at our site is not compatible
with these modules as they depend on \tmtextit{hiera}, which is not supported
at our site. These were later adapted to make them compatible with our
configuration management system. In the meanwhile, the setup was finalized
looking at the official documentation.
\subsubsection{Configuration.}
\subsubsection{Configuration}
The first configuration was completed manually. The main documentation
source for the HTCondor--CE is that of the OSG website~\cite{OSGDOC},
which refers to a tool \tmtextit{osg-configure} not present on the
general HTCondor--CE release. Because of this the setup was completed
general HTCondor--CE release. Because of this, the setup was completed
by trial and error. Once a working setup was obtained, a set of
integration notes were added to a public wiki~\cite{INFNWIKI}. This
should help other non OSG users to get some supplementary hint to
......@@ -123,12 +123,12 @@ accounting data. This simplifies the accounting problem, as it is no more
necessary to collect grid data separately from the BLAH component and then
look for matches with the corresponding grid counterpart.
This solution have been used during the 2018 to provide accounting for
This solution have been used during 2018 to provide accounting for
HTCondor--CE testbed cluster.
\subsection{Running HTCondor--CE}
After some time to become confident with the main configuration tasks the
After some time to become confident with the main configuration tasks, the
testbed begun working with jobs submitted by the 4 LHC experiments from
September 2018. The system proved to be stable and smooth, being able to work
unattended. This confirms that this system can be a reliable substitute for
......@@ -172,13 +172,11 @@ been devised:
A testbed cluster based on HTCondor--CE on top of HTCondor batch system has
been deployed to evaluate these as a substitute for CREAM--CE and LSF. The
evaluation as mostly focused on the HTCondor--CE, as it is the most recent
evaluation has mostly focused on the HTCondor--CE, as it is the most recent
product. Apart for a few minor issues, mainly related to gaps in the available
documentation, The CE proved to be a stable component. the possibility to
documentation, the CE proved to be a stable component. The possibility to
perform accounting has been verified.
Using the described testbed we have
\section*{References}
\begin{thebibliography}{9}
\bibitem{OSGDOC} \url{https://opensciencegrid.org/docs/compute-element/install-htcondor-ce/}
......
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{ Infrastructures and Big Data processing as pillars in the XXXIII PhD couse in Data Sciece and Computation}
\title{ Infrastructures and Big Data processing as pillars in the XXXIII PhD course in Data Sciece and Computation}
%\address{Production Editor, \jpcs, \iopp, Dirac House, Temple Back, Bristol BS1~6BE, UK}
\author{D. Salomoni$^1$, A. Costantini$^1$, C. D. Duma$^1$, B. Martelli$^1$, D. Cesini$^1$, E. Fattibene$^1$ and D. Michelotto $^1$
\author{D. Salomoni$^1$, A. Costantini$^1$, C. D. Duma$^1$, B. Martelli$^1$, D. Cesini$^1$, E. Fattibene$^1$, D. Michelotto $^1$
% etc.
}
\address{$^1$ INFN-CNAF, Bologna, Italy}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{davide.salomoni@cnaf.infn.it}
......@@ -32,7 +32,7 @@ issue, for example: joint doctoral degrees, co-­tutorship and student exchanges
member of the Course Board will provide.
The PhD course runs for four years and is aimed at train people to become able to carry out academic and industrial research at a level of abstraction that
builds atop each single scientific skill which lies at the basis of the field of ``Data Science".
builds atop each single scientific skill which lies at the basis of the field of ``Data Science''.
Drawing on this, students graduated in the field of Mathematical Physical, Chemical and Astronomical Sciences should produce original and significant
researches in terms of scientific publications and innovative applications, blending basic disciplines and finally specializing in specific fields as from those
......@@ -69,7 +69,7 @@ least 3 months abroad, during the 3rd/4th year of the course.
\section{Infrastructure for Big Data processing}
As already mentioned, the didactical units Infrastructure for Big Data processing Basic (IBDB) and Advanced (IBDA), headed by Davide Salomoni with the
support of the authors, have been an integral part of the PhD couse and constituted the personalized learning plan of some PhD students.
support of the authors, have been an integral part of the PhD course and constituted the personalized learning plan of some PhD students.
In order to made available the teaching material and to made possible an active interaction among the teachers and the students, a Content
Management System have been deployed and made available. The CMS elected for such activity have been Moodle \cite{moodle} and the entire courses
have been made available trough it via a dedicated link (https://moodle.cloud.cnaf.infn.it/).
......@@ -98,7 +98,7 @@ and Disaster Recovery have been described. Moreover, a discussion on computing m
\subsection{Infrastructure for Big Data processing Advanced}
The course is aimed at discussing the foundations of Cloud computing and storage services beyond IaaS (PaaS and SaaS) leading the students to understand how to
exploit distributed infrastructures for Big Data processing.
The IBDA couse is intended as an evolution of the IBDB and, therefore, before following this course the IBDB should have already been achieved, or having familiarity with the covered topics.
The IBDA course is intended as an evolution of the IBDB and, therefore, before following this course the IBDB should have already been achieved, or having familiarity with the covered topics.
At the end of the course, the student had practical and theoretical knowledge on distributed computing and storage infrastructures, cloud computing and virtualization,
parallel computing and their application to Big Data Analysis.
The course foresees an oral exam focusing on the presented topics. Students have been requested to prepare a small project discussed during the exam.
......
File added
......@@ -25,20 +25,20 @@
\newcommand{\ctau} {$c \tau$}
\newcommand{\ct} {$ct$}
\newcommand{\LKz} {$\Lambda$/$K^{0}$}
\newcommand{\s} {$\sqrt{s}$}
\newcommand{\snn} {$\sqrt{s_{\mathrm{NN}}}$}
\newcommand{\s} {\sqrt{s}}
\newcommand{\snn} {\sqrt{s_{\mathrm{NN}}}}
\newcommand{\dndy} {d$N$/d$y$}
\newcommand{\OO} {$\mathrm{O^{2}}$}
\begin{document}
\title{ALICE computing at the INFN CNAF Tier1}
\title{ALICE computing at the INFN CNAF Tier 1}
\author{Stefano Piano$^1$, Domenico Elia$^2$, Stefano Bagnasco$^3$, Francesco Noferini$^4$, Nicol\`o Jacazio$^5$, Gioacchino Vino$^2$}
\address{$^1$ INFN Sezione di Trieste, Trieste, Italy}
\address{$^2$ INFN Sezione di Bari, Bari, Italy}
\address{$^3$ INFN Sezione di Torino, Torino, Italy}
\address{$^4$ INFN Sezione di Bologna, Bologna, Italy}
\address{$^5$ INFN CNAF, Bologna, Italy}
\author{S. Piano$^1$, D. Elia$^2$, S. Bagnasco$^3$, F. Noferini$^4$, N. Jacazio$^5$, G. Vino$^2$}
\address{$^1$ INFN Sezione di Trieste, Trieste, IT}
\address{$^2$ INFN Sezione di Bari, Bari, IT}
\address{$^3$ INFN Sezione di Torino, Torino, IT}
\address{$^4$ INFN Sezione di Bologna, Bologna, IT}
\address{$^5$ INFN-CNAF, Bologna, IT}
\ead{stefano.piano@ts.infn.it}
......@@ -56,13 +56,13 @@ The main goal of ALICE is the study of the hot and dense matter created in ultra
The main goal of the run in 2018 was to complete the approved Run2 physics program and it was fully achieved thanks to the excellent performance of the apparatus.
ALICE resumed data taking with beams in April at the restart of LHC operation with pp collisions (\s~=~13~TeV). ALICE continued to collect statistics with pp collisions from April 2nd to October 25th with the same trigger mix as in 2017. As planned, ALICE was operating with pp luminosity leveled to $\mathrm{2.6\times10^{30}}$ $\mathrm{cm^{-2}s^{-1}}$ providing an interaction rate of 150 kHz. The HLT compression factor was improved to 8.5 throughout the data taking, thus the HLT was able to reject the higher amount of spurious clusters, which were anticipated with Ar-CO2 gas mixture in the TPC. The average RAW data event size after compression was 1.7MB at the nominal interaction rate (150 kHz), exactly as expected and used for the resource calculations. At the end of the pp period, ALICE arrived at 43\% combined efficiency (LHC availability 47\% * ALICE efficiency 92\%).
ALICE resumed data taking with beams in April at the restart of LHC operation with pp collisions ($\s=13$~TeV). ALICE continued to collect statistics with pp collisions from April 2nd to October 25th with the same trigger mix as in 2017. As planned, ALICE was operating with pp luminosity leveled to $\mathrm{2.6\times10^{30}}$ $\mathrm{cm^{-2}s^{-1}}$ providing an interaction rate of 150 kHz. The HLT compression factor was improved to 8.5 throughout the data taking, thus the HLT was able to reject the higher amount of spurious clusters, which were anticipated with Ar-CO2 gas mixture in the TPC. The average RAW data event size after compression was 1.7MB at the nominal interaction rate (150 kHz), exactly as expected and used for the resource calculations. At the end of the pp period, ALICE arrived at 43\% combined efficiency (LHC availability 47\% * ALICE efficiency 92\%).
The \PbPb (\snn~=~5.02~TeV) data taking period started in November 2018 and was scheduled for 24 days. The target was to reach a total integrated luminosity of 1 $\mathrm{nb^{-1}}$ for Run2 and to complete the ALICE goals for the collection of a large sample of central and minimum bias collisions. To achieve this, the interaction rate was leveled at 8 kHz (L = $\mathrm{1.0\times10^{27}}$ $\mathrm{cm^{-2}s^{-1}}$) and data taken at close to the maximum achievable readout rate. The accelerator conditions were different compared to the foreseen mainly because of the delay in the beam start by 3-4 days due to solenoid coil fault in LINAC3 and the 20\% loss of integrated luminosity due to beam sizes 50\% larger at IP2 than at IP1/IP5 during the whole Pb-Pb period. The LHC time in Stable Beams was 47\%, the average data taking efficiency by ALICE was 87\% and a maximum HLT compression factor close to 9 has been reached during the Pb-Pb period. To compensate for the reduced beam availability, the rates of different triggers were adjusted to increase as much as possible the statistics in central and semi-central events. Overall, we collected 251M central and mid-central events and 159M minimum bias events. To further minimize the impact of Pb-bPb run on tape resources, ALICE additionally compressed the non-TPC portion of RAW data (by applying level 2 gzip compression) resulting in additional 17\% reduction of data volume on tape. As a result, the accumulated amount of Pb--Pb RAW data was 5.5~PiB. A total amount of RAW data of 11~PiB, including pp, was written to tape at Tier0, and then replicated at the Tier1s. The data accumulation curve at Tier0 is shown in Fig.\ref{fig:rawdata} and about 4.2~PiB of RAW data has been replicated to CNAF during 2018 with a maximum rate of 360 TiB per week, limited only by the tape drives speed considering the 100 Gb/s LHCOPN bandwidth between CERN and CNAF, as shown by the Fig.\ref{fig:tottraftape}.
The \PbPb ($\snn=5.02$~TeV) data taking period started in November 2018 and was scheduled for 24 days. The target was to reach a total integrated luminosity of 1 $\mathrm{nb^{-1}}$ for Run2 and to complete the ALICE goals for the collection of a large sample of central and minimum bias collisions. To achieve this, the interaction rate was leveled at 8 kHz (L = $\mathrm{1.0\times10^{27}}$ $\mathrm{cm^{-2}s^{-1}}$) and data taken at close to the maximum achievable readout rate. The accelerator conditions were different compared to the foreseen mainly because of the delay in the beam start by 3-4 days due to solenoid coil fault in LINAC3 and the 20\% loss of integrated luminosity due to beam sizes 50\% larger at IP2 than at IP1/IP5 during the whole Pb-Pb period. The LHC time in Stable Beams was 47\%, the average data taking efficiency by ALICE was 87\% and a maximum HLT compression factor close to 9 has been reached during the Pb-Pb period. To compensate for the reduced beam availability, the rates of different triggers were adjusted to increase as much as possible the statistics in central and semi-central events. Overall, we collected 251M central and mid-central events and 159M minimum bias events. To further minimize the impact of Pb-bPb run on tape resources, ALICE additionally compressed the non-TPC portion of RAW data (by applying level 2 gzip compression) resulting in additional 17\% reduction of data volume on tape. As a result, the accumulated amount of Pb--Pb RAW data was 5.5~PiB. A total amount of RAW data of 11~PiB, including pp, was written to tape at Tier0, and then replicated at the Tier1s. The data accumulation curve at Tier0 is shown in Fig.\ref{fig:rawdata} and about 4.2~PiB of RAW data has been replicated to CNAF during 2018 with a maximum rate of 360 TiB per week, limited only by the tape drives speed considering the 100 Gb/s LHCOPN bandwidth between CERN and CNAF, as shown by the Fig.\ref{fig:tottraftape}.
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{raw_data_accumulation_run2.png}
\includegraphics[width=0.75\textwidth]{raw_data_accumulation_run2}
\end{center}
\caption{Raw data accumulation curve for Run2.}
\label{fig:rawdata}
......@@ -72,7 +72,7 @@ The p-p data collected in 2018 has been fully calibrated and processed in Pass1,
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{total_traffic_cnaf_tape_2018.png}
\includegraphics[width=0.75\textwidth]{total_traffic_cnaf_tape_2018}
\end{center}
\caption{ALICE traffic per week and total traffic on the CNAF tape during 2018.}
\label{fig:tottraftape}
......@@ -81,15 +81,16 @@ The p-p data collected in 2018 has been fully calibrated and processed in Pass1,
Along 2018 ALICE many new physics results have been obtained from pp, p--Pb, \PbPb and \XeXe collisions from Run2 data taking, while also the collaboration has continued to work on results from the analysis of the Run1 data. Almost 50 papers have been submitted to journals in the last year, including in particular the main topics reported in the following.
In \pp and in \pPb collisions, for instance, ALICE studied the
$\Lambda_{\rm c}^+$ production~\cite{Acharya:2017kfy}, the prompt and non-prompt $\hbox {J}/\psi $ production and nuclear modification at mid-rapidity~\cite{Acharya:2018yud} and the measurement of the inclusive J/$\psi$ polarization at forward rapidity in \pp collisions at $\mathbf {\sqrt{s} = 8}$~TeV \cite{Acharya:2018uww}.
$\Lambda_{\rm c}^+$ production~\cite{Acharya:2017kfy}, the prompt and non-prompt $\hbox {J}/\psi$ production and nuclear modification at mid-rapidity~\cite{Acharya:2018yud} and the measurement of the inclusive $\hbox {J}/\psi$ polarization at forward rapidity in \pp collisions
at $\s= 8$~TeV \cite{Acharya:2018uww}.
Looking at \PbPb data ALICE succeeded in studying
the $D$-meson azimuthal anisotropy in midcentral Pb-Pb collisions at $\mathbf{\sqrt{s_{\rm NN}}=5.02}$ TeV~\cite{Acharya:2017qps}, the Z$^0$-boson production at large rapidities in Pb-Pb collisions at $\sqrt{s_{\rm NN}}=5.02$ TeV~\cite{Acharya:2017wpf} and the anisotropic flow of identified particles in Pb-Pb collisions at $ {\sqrt{s}}_{\mathrm{NN}}=5.02 $ TeV~\cite{Acharya:2018zuq}. The anisotropic flow was also studied in \XeXe collisions at $\mathbf{\sqrt{s_{\rm{NN}}} = 5.44}$ TeV~\cite{Acharya:2018ihu}, together with the inclusive J/$\psi$ production~\cite{Acharya:2018jvc} and the transverse momentum spectra and nuclear modification factors of charged particles~\cite{Acharya:2018eaq}.\\
the $D$-meson azimuthal anisotropy in midcentral Pb-Pb collisions at $\snn=5.02$~TeV~\cite{Acharya:2017qps}, the Z$^0$-boson production at large rapidities in Pb-Pb collisions at $\snn=5.02$~TeV~\cite{Acharya:2017wpf} and the anisotropic flow of identified particles in Pb-Pb collisions at $ \snn=5.02 $~TeV~\cite{Acharya:2018zuq}. The anisotropic flow was also studied in \XeXe collisions at $\snn = 5.44$~TeV~\cite{Acharya:2018ihu}, together with the inclusive $\hbox {J}/\psi$ production~\cite{Acharya:2018jvc} and the transverse momentum spectra and nuclear modification factors of charged particles~\cite{Acharya:2018eaq}.\\
The general upgrade strategy for Run3 is conceived to deal with this challenge with expected \PbPb interaction rates of up to 50 kHz aiming at an integrated luminosity above 10 $\mathrm{nb^{-1}}$. The five TDRs, namely for the new ITS, the TPC GEM-based readout chambers, the Muon Forward Tracker, the Trigger and Readout system, and the Online/Offline computing system were fully approved by the CERN Research Board between 2014 and 2015. In 2017 the transition from the R\&D phase to the construction of prototypes of the final detector elements was successfully completed. For the major systems, the final prototype tests and evaluations were performed and the production readiness reviews have been successful, the production started during the 2017 and has been continued throughout 2018.
\section{Computing model and R\&D activity in Italy}
The ALICE computing model is still heavily based on Grid distributed computing; since the very beginning, the base principle underlying it has been that every physicist should have equal access to the data and computing resources~\cite{ALICE:2005aa}. According to this principle, the ALICE peculiarity has always been to operate its Grid as a “cloud” of computing resources (both CPU and storage) with no specific role assigned to any given centre, the only difference between them being the Tier level to which they belong. All resources have to be made available to all ALICE members, according only to experiment policy and not on resource physical location, and data is distributed according to network topology and availability of resources and not in pre-defined datasets. Tier1s only peculiarities are their size and the availability of tape custodial storage, which holds a collective second copy of raw data and allows the collaboration to run event reconstruction tasks there. In the ALICE model, though, tape recall is almost never done: all useful data reside on disk, and the custodial tape copy is used only for safekeeping. All data access is done through the xrootd protocol, either through the use of “native” xrootd storage or, like in many large deployments, using xrootd servers in front of a distributed parallel filesystem like GPFS.\\
The ALICE computing model is still heavily based on Grid distributed computing; since the very beginning, the base principle underlying it has been that every physicist should have equal access to the data and computing resources~\cite{ALICE:2005aa}. According to this principle, the ALICE peculiarity has always been to operate its Grid as a “cloud” of computing resources (both CPU and storage) with no specific role assigned to any given center, the only difference between them being the Tier level to which they belong. All resources have to be made available to all ALICE members, according only to experiment policy and not on resource physical location, and data is distributed according to network topology and availability of resources and not in pre-defined datasets. Tier1s only peculiarities are their size and the availability of tape custodial storage, which holds a collective second copy of raw data and allows the collaboration to run event reconstruction tasks there. In the ALICE model, though, tape recall is almost never done: all useful data reside on disk, and the custodial tape copy is used only for safekeeping. All data access is done through the xrootd protocol, either through the use of “native” xrootd storage or, like in many large deployments, using xrootd servers in front of a distributed parallel filesystem like GPFS.\\
The model has not changed significantly for Run2, except for scavenging of some extra computing power by opportunistically use the HLT farm when not needed for data taking. All raw data collected in 2017 has been passed through the calibration stages, including the newly developed track distortion calibration for the TPC, and has been validated by the offline QA process before entering the final reconstruction phase. The ALICE software build system has been extended with additional functionality to validate the AliRoot release candidates with a large set of raw data from different years as well as with various MC generators and configurations. It uses the CERN elastic cloud infrastructure, thus allowing for dynamic provision of resources as needed. The Grid utilization in the accounting period remained high, with no major incidents. The CPU/Wall efficiency remained constant, at about 85\% across all Tiers, similar to the previous year. The much higher data rate foreseen for Run3, though, will require a major rethinking of the current computing model in all its components, from the software framework to the algorithms and of the distributed infrastructure. The design of the new computing framework for Run3, started in 2013 and mainly based on the concepts of Online-Offline integration (“\OO\ Project”), has been finalized with the corresponding Technical Design Report~\cite{Buncic:2015ari}: development and implementation phases as well as performance tests are currently ongoing.\\
The Italian share of the ALICE distributed computing effort (currently about 17\%) includes resources both form the Tier1 at CNAF and from the Tier2s in Bari, Catania, Torino and Padova-LNL, plus some extra resources in Trieste. The contribution from the Italian community to the ALICE computing in 2018 has been mainly spread over the usual items, such as the development and maintenance of the (AliRoot) software framework, the management of the computing infrastructure (Tier1 and Tier2 sites) and the participation in the Grid operations of the experiment.\\
......@@ -97,12 +98,12 @@ In addition, in the framework of the computing R\&D activities in Italy, the des
\section{Role and contribution of the INFN Tier1 at CNAF}
CNAF is a full-fledged ALICE Tier1 centre, having been one of the first to join the production infrastructure years ago. According to the ALICE cloud-like computing model, it has no special assigned task or reference community, but provides computing and storage resources to the whole collaboration, along with offering valuable support staff for the experiment’s computing activities. It provides reliable xrootd access both to its disk storage and to the tape infrastructure, through a TSM plugin that was developed by CNAF staff specifically for ALICE use.\\
As a result of flooding, the CNAF computing centre stopped operation on November 8th, 2017; tape access had been made available again on January 31st 2018, and the ALICE Storage Element was fully recovered by February 23th. The loss of CPU resources during the Tier1 shutdown was partially mitigated by the reallocation of the Tier1 worker nodes located in Bari to the Tier2 Bari queue. At the end of February 2018 the CNAF local farm had been powered again moving from 50 kHS06 gradually to 140 kHS06. In addition, on March 15th 170 kHS06 at CINECA became available thanks to a 500 Gb/s dedicated link.
Since March running at CNAF has been remarkably stable: for example, both the disk and tape storage availabilities have been better than 98\%, ranking CNAF in the top 5 most reliable sites for ALICE. The computing resources provided for ALICE at the CNAF Tier1 centre were fully used along the year, matching and often exceeding the pledged amounts due to access to resources unused by other collaborations. Overall, about 64\% of the ALICE computing activity was Monte Carlo simulation, 14\% raw data processing (which takes place at the Tier0 and Tier1 centres only) and 22\% analysis activities: Fig.~\ref{fig:runjobsusers} illustrates the share among the different activities in the ALICE running job profile along the last 12 months.\\
CNAF is a full-fledged ALICE Tier1 center, having been one of the first to join the production infrastructure years ago. According to the ALICE cloud-like computing model, it has no special assigned task or reference community, but provides computing and storage resources to the whole collaboration, along with offering valuable support staff for the experiment’s computing activities. It provides reliable xrootd access both to its disk storage and to the tape infrastructure, through a TSM plugin that was developed by CNAF staff specifically for ALICE use.\\
As a result of flooding, the CNAF computing center stopped operation on November 8th, 2017; tape access had been made available again on January 31st 2018, and the ALICE Storage Element was fully recovered by February 23th. The loss of CPU resources during the Tier1 shutdown was partially mitigated by the reallocation of the Tier1 worker nodes located in Bari to the Tier2 Bari queue. At the end of February 2018 the CNAF local farm had been powered again moving from 50 kHS06 gradually to 140 kHS06. In addition, on March 15th 170 kHS06 at CINECA became available thanks to a 500 Gb/s dedicated link.
Since March running at CNAF has been remarkably stable: for example, both the disk and tape storage availabilities have been better than 98\%, ranking CNAF in the top 5 most reliable sites for ALICE. The computing resources provided for ALICE at the CNAF Tier1 center were fully used along the year, matching and often exceeding the pledged amounts due to access to resources unused by other collaborations. Overall, about 64\% of the ALICE computing activity was Monte Carlo simulation, 14\% raw data processing (which takes place at the Tier0 and Tier1 centers only) and 22\% analysis activities: Fig.~\ref{fig:runjobsusers} illustrates the share among the different activities in the ALICE running job profile along the last 12 months.\\
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{running_jobs_per_users_2018.png}
\includegraphics[width=0.75\textwidth]{running_jobs_per_users_2018}
\end{center}
\caption{Share among the different ALICE activities in the 2018 running jobs profile.}
\label{fig:runjobsusers}
......@@ -112,16 +113,16 @@ Since April 2018, CNAF deployed the pledged resources corresponding to about 52
The INFN Tier1 has provided about 4,9\% since March 2018 and about 4.20\% along all year of the total CPU hours used by ALICE, ranking second of the ALICE Tier1 sites despite the flooding incident, as shown in Fig. \ref{fig:walltimesharet1}. The cumulated fraction of CPU hours along the whole year for CNAF is about 21\% of the all ALICE Tier1 sites, following only FZK in Karlsruhe (24\%).
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{wall_time_tier1_2018.png}
\includegraphics[width=0.75\textwidth]{wall_time_tier1_2018}
\end{center}
\caption{Ranking of CNAF among ALICE Tier1 centres in 2018.}
\caption{Ranking of CNAF among ALICE Tier1 centers in 2018.}
\label{fig:walltimesharet1}
\end{figure}
This amounts to about 44\% of the total Wall Time of the INFN contribution: it successfully completed nearly 10.5 million jobs, for a total of more than 44 millions CPU hours, the running job profile at CNAF in 2018 is shown in Fig.\ref{fig:rjobsCNAFunov}.\\
Since mid-November a new job submission queue has been made available to ALICE and used to successfully test the job queueing mechanism, the scheduling policy, the priority scheme, the resource monitoring and the resource management with HTCondor at CNAF.
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{running_jobs_CNAF_2018.png}
\includegraphics[width=0.75\textwidth]{running_jobs_CNAF_2018}
\end{center}
\caption{Running jobs profile at CNAF in 2018.}
\label{fig:rjobsCNAFunov}
......@@ -129,7 +130,7 @@ Since mid-November a new job submission queue has been made available to ALICE a
At the end of the last year ALICE was keeping on disk at CNAF more than 4.1 PiB of data in nearly 118 million files, plus more than 10 PiB of raw data on custodial tape storage; the reliability of the storage infrastructure is commendable, even taking into account the extra layer of complexity introduced by the xrootd interfaces. The excellent FS performances allow to analyse data from SE with an average throughput of about 1.6 GB/s and a peak throughput of about 3.0 GB/s, as shown in Fig.\ref{fig:nettrafse}.
\begin{figure}[!ht]
\begin{center}
\includegraphics[width=0.75\textwidth]{network_traffic_cnaf_se_2018.png}
\includegraphics[width=0.75\textwidth]{network_traffic_cnaf_se_2018}
\end{center}
\caption{Network traffic on the ALICE xrootd servers at CNAF during 2018.}
\label{fig:nettrafse}
......@@ -154,7 +155,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Adam:2015ptt}
\bibitem{Adam:2015ptt}
J.~Adam {\it et al.} [ALICE Collaboration],
%``Centrality dependence of the charged-particle multiplicity density at midrapidity in Pb-Pb collisions at $\sqrt{s_{\rm NN}}$ = 5.02 TeV,''
%``Centrality dependence of the charged-particle multiplicity density at midrapidity in Pb-Pb collisions at $\snn = 5.02$ TeV,''
Phys.\ Rev.\ Lett.\ {\bf 116} (2016) no.22, 222302.
% doi:10.1103/PhysRevLett.116.222302
% [arXiv:1512.06104 [nucl-ex]].
......@@ -164,7 +165,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Adam:2016izf}
\bibitem{Adam:2016izf}
J.~Adam {\it et al.} [ALICE Collaboration],
%``Anisotropic flow of charged particles in Pb-Pb collisions at $\sqrt{s_{\rm NN}}=5.02$ TeV,''
%``Anisotropic flow of charged particles in Pb-Pb collisions at $\snn=5.02$ TeV,''
Phys.\ Rev.\ Lett.\ {\bf 116} (2016) no.13, 132302.
% doi:10.1103/PhysRevLett.116.132302
% [arXiv:1602.01119 [nucl-ex]].
......@@ -181,7 +182,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2017kfy}
\bibitem{Acharya:2017kfy}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``$\Lambda_{\rm c}^+$ production in pp collisions at $\sqrt{s} = 7$ TeV and in p-Pb collisions at $\sqrt{s_{\rm NN}} = 5.02$ TeV,''
%``$\Lambda_{\rm c}^+$ production in pp collisions at $\s = 7$ TeV and in p-Pb collisions at $\snn = 5.02$ TeV,''
JHEP {\bf 1804} (2018) 108.
% doi:10.1007/JHEP04(2018)108
% [arXiv:1712.09581 [nucl-ex]].
......@@ -190,7 +191,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
\bibitem{Acharya:2018yud}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Prompt and non-prompt $\hbox {J}/\psi $ production and nuclear modification at mid-rapidity in p–Pb collisions at $\mathbf{\sqrt{{ s}_{\text {NN}}}= 5.02}$ TeV,''
%``Prompt and non-prompt $\hbox {J}/\psi $ production and nuclear modification at mid-rapidity in p–Pb collisions at $\snn= 5.02}$ TeV,''
Eur.\ Phys.\ J.\ C {\bf 78} (2018) no.6, 466.
% doi:10.1140/epjc/s10052-018-5881-2
% [arXiv:1802.00765 [nucl-ex]].
......@@ -200,7 +201,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2018uww}
\bibitem{Acharya:2018uww}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Measurement of the inclusive J/ $\psi $ polarization at forward rapidity in pp collisions at $\mathbf {\sqrt{s} = 8}$ TeV,''
%``Measurement of the inclusive J/ $\psi $ polarization at forward rapidity in pp collisions at $\s = 8$ TeV,''
Eur.\ Phys.\ J.\ C {\bf 78} (2018) no.7, 562.
% doi:10.1140/epjc/s10052-018-6027-2
% [arXiv:1805.04374 [hep-ex]].
......@@ -210,7 +211,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2017qps}
\bibitem{Acharya:2017qps}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``$D$-meson azimuthal anisotropy in midcentral Pb-Pb collisions at $\mathbf{\sqrt{s_{\rm NN}}=5.02}$ TeV,''
%``$D$-meson azimuthal anisotropy in midcentral Pb-Pb collisions at $\snn=5.02}$ TeV,''
Phys.\ Rev.\ Lett.\ {\bf 120} (2018) no.10, 102301.
% doi:10.1103/PhysRevLett.120.102301
% [arXiv:1707.01005 [nucl-ex]].
......@@ -220,7 +221,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2017wpf}
\bibitem{Acharya:2017wpf}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Measurement of Z$^0$-boson production at large rapidities in Pb-Pb collisions at $\sqrt{s_{\rm NN}}=5.02$ TeV,''
%``Measurement of Z$^0$-boson production at large rapidities in Pb-Pb collisions at $\snn=5.02$ TeV,''
Phys.\ Lett.\ B {\bf 780} (2018) 372.
% doi:10.1016/j.physletb.2018.03.010
% [arXiv:1711.10753 [nucl-ex]].
......@@ -230,7 +231,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2018zuq}
\bibitem{Acharya:2018zuq}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Anisotropic flow of identified particles in Pb-Pb collisions at $ {\sqrt{s}}_{\mathrm{NN}}=5.02 $ TeV,''
%``Anisotropic flow of identified particles in Pb-Pb collisions at $\snn=5.02 $ TeV,''
JHEP {\bf 1809} (2018) 006.
% doi:10.1007/JHEP09(2018)006
% [arXiv:1805.04390 [nucl-ex]].
......@@ -240,7 +241,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2018ihu}
\bibitem{Acharya:2018ihu}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Anisotropic flow in Xe-Xe collisions at $\mathbf{\sqrt{s_{\rm{NN}}} = 5.44}$ TeV,''
%``Anisotropic flow in Xe-Xe collisions at $\snn = 5.44}$ TeV,''
Phys.\ Lett.\ B {\bf 784} (2018) 82.
% doi:10.1016/j.physletb.2018.06.059
% [arXiv:1805.01832 [nucl-ex]].
......@@ -250,7 +251,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2018jvc}
\bibitem{Acharya:2018jvc}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Inclusive J/$\psi$ production in Xe–Xe collisions at $\sqrt{s_{\rm NN}}$ = 5.44 TeV,''
%``Inclusive J/$\psi$ production in Xe–Xe collisions at $\snn = 5.44$ TeV,''
Phys.\ Lett.\ B {\bf 785} (2018) 419.
% doi:10.1016/j.physletb.2018.08.047
% [arXiv:1805.04383 [nucl-ex]].
......@@ -260,7 +261,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\cite{Acharya:2018eaq}
\bibitem{Acharya:2018eaq}
S.~Acharya {\it et al.} [ALICE Collaboration],
%``Transverse momentum spectra and nuclear modification factors of charged particles in Xe-Xe collisions at $\sqrt{s_{\rm NN}}$ = 5.44 TeV,''
%``Transverse momentum spectra and nuclear modification factors of charged particles in Xe-Xe collisions at $\snn= 5.44$ TeV,''
Phys.\ Lett.\ B {\bf 788} (2019) 166.
% doi:10.1016/j.physletb.2018.10.052
% [arXiv:1805.04399 [nucl-ex]].
......@@ -295,7 +296,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\bibitem{Abelev:2014dsa}
% B.~B.~Abelev {\it et al.} [ALICE Collaboration],
% %``Transverse momentum dependence of inclusive primary charged-particle production in p-Pb
% collisions at $\sqrt{s_\mathrm{{NN}}}=5.02~\text {TeV}$,''
% collisions at $\snn=5.02~\text {TeV}$,''
% Eur.\ Phys.\ J.\ C {\bf 74} (2014) no.9, 3054.
% %doi:10.1140/epjc/s10052-014-3054-5
% %[arXiv:1405.2737 [nucl-ex]].
......@@ -306,7 +307,7 @@ Also network connectivity has always been reliable; the 100 Gb/s of the LHCOPN a
%\bibitem{Abelev:2013haa}
% B.~B.~Abelev {\it et al.} [ALICE Collaboration],
% %``Multiplicity Dependence of Pion, Kaon, Proton and Lambda Production in p-Pb Collisions
% at $\sqrt{s_{NN}}$ = 5.02 TeV,''
% at $\snn = 5.02$ TeV,''
% Phys.\ Lett.\ B {\bf 728} (2014) 25.
% %doi:10.1016/j.physletb.2013.11.020
% %[arXiv:1307.6796 [nucl-ex]].
......
......@@ -17,19 +17,12 @@
\begin{abstract}
AMS is a large acceptance instrument conceived to search for anti-particles (positrons, anti-protons, anti-deuterons) coming from dark matter
annihilation, primordial anti-matter (anti-He or light anti nuclei) and to
perform accurate measurements in space of the cosmic radiation in the GeV-TeV
energy range.
Installed on the International Space Station (ISS) in mid-May 2011, it is
operating continuously since then, with a collected statistics of $\sim$ 130
billion events up to the end of 2018.
CNAF is one of the repositories of the full AMS data set and contributes to the
data production and Monte Carlo simulation of the international collaboration.
It represents the central computing resource for the data analysis performed by
Italian collaboration.
In the following, the AMS computing framework, the role of the CNAF computing
center and the use of the CNAF resources in 2018 will be given.
AMS is large acceptance Cosmic Ray (CR) detector operating in space, on board the International Space Station (ISS) since the 19$^{\textrm{th}}$ of May of 2011.\\
%AMS is a large acceptance instrument conceived to search for anti-particles (positrons, anti-protons, anti-deuterons) coming from dark matter annihilation, primordial anti-matter (anti-He or light anti nuclei) and to perform accurate measurements in space of the cosmic radiation in the GeV-TeV energy range.
%Installed on the International Space Station (ISS) in mid-May 2011, it is operating continuously since then, with a collected statistics of $\sim$ 130 billion events up to the end of 2018.
CNAF is one of the repositories of the full AMS data set and contributes to the data production and Monte Carlo simulation of the international collaboration. It represents the central computing resource for the data analysis performed by Italian collaboration and its role will be reviewed in this document.
In the following, the AMS computing framework, the role of the CNAF computing center and the use of the CNAF resources in 2018 will be given.\\
In addition the R\&D activities on going, to integrate cloud resources in such a framework, will discussed.
\end{abstract}
\section{Introduction}
......@@ -93,7 +86,7 @@ The data reconstruction pipeline is mainly composed by two logical step:
\begin{itemize}
\item[1)]{
the {\bf First Production} runs continuously over incoming data doing an
initial validation and indexing. It produces the so called "standard" (STD)
initial validation and indexing. It produces the so called ``standard'' (STD)
reconstructed data stream, ready within two hours after data are received at
CERN, that is used to calibrate different sub-detectors as well as to monitor
off-line the detector performances. In this stage Data Summary Files are
......@@ -107,7 +100,7 @@ The data reconstruction pipeline is mainly composed by two logical step:
to the STD data sample, every 6 months, the time needed to produce and
certify the calibrations. A full reprocessing of all AMS data is carried
out periodically in case of major software major updates, providing the so
called "pass" production. Up to 2018 there were 7 full data reproductions
called ``pass'' production. Up to 2018 there were 7 full data reproductions
done. The last published measurements were based on the pass6 data set, but all the analyses being carried out for the next publications are based on the pass7 ones.
}
\end{itemize}
......@@ -138,7 +131,7 @@ CNAF is the main computing resource for data analysis of the AMS Italian collabo
A full copy of the AMS raw data is preserved on tape, while, usually, the latest production and part of the Monte Carlo sample are available on disk.
More then 30 users are routinely performing the bulk of their analysis at CNAF, transferring to local sites (i.e. their small local computing farm or their laptop) just reduced data sets or histograms.
As described in the following, during 2018, the possibility of a XRootD endpoint at CNAF has been explored. The goal is to federate, through XRootD, the $\sim$ 5 PB available for the AMS Collaboration at CERN, with the $\sim$ 2 PB at CNAF. In this picture, CNAF will be the second data center to share its disk space togheter with the one available for the collaboration, large-scale optimizing it.
As described in the following, during 2018, the possibility of a XRootD endpoint at CNAF has been explored. The goal is to federate, through XRootD, the $\sim$ 5 PB available for the AMS Collaboration at CERN, with the $\sim$ 2 PB at CNAF. In this picture, CNAF will be the second data center to share its disk space togheter with the one available for the collaboration, optimizing it on large-scale.
\section{Data processing strategy at CNAF}
......@@ -216,7 +209,7 @@ Different analysis are carried on by the Italian collaboration. In 2018, the CNA
\subsection*{Research and Development}
\label{ReD}
As mentioned above, during 2017 AMS started evaluating the technical feasibility of integrating also cloud resources (possibly seamlessly) in order to primarily benefit of external computing resources, meant as opportunistic resources. The architectural model foreseen is that all AMS data are and will be hosted at CNAF. Possible cloud compute resources should be able to remotely access data (might be caching locally for the sake of the I/O optimization) and produced data (namely output files) should be moved into the CNAF storage.\\
AMS work-flow has been successfully integrated in DODAS (Dynamic On Demand Analysis Service, a thematic service funded by the EOSC-hub European project) and the work-flow has been validated and consolidated during 2018. The success of the validation tests performed over HelixNebula Science Cloud provided resources and over Google Cloud INFN grant motivate further exploitation as well as evolution of the strategy. In total in 2018 the Italian collaboration benefited of more than 4\textit{\,k\,HS06\,yr} of opportunistic resources, that represent $\sim$ 20\% of the ones obtained from CNAF.\\
AMS work-flow has been successfully integrated in DODAS (Dynamic On Demand Analysis Service, a thematic service funded by the EOSC-hub European project, \cite{DODAS}) and the work-flow has been validated and consolidated during 2018. The success of the validation tests performed over the HelixNebula Science Cloud provided resources and over the Google Cloud INFN grant, motivate further exploitation as well as evolution of the strategy. In total in 2018 the Italian collaboration benefited of more than 4\textit{\,k\,HS06\,yr} of opportunistic resources, that represent $\sim$ 20\% of the ones obtained from CNAF.\\
More in detail during the 2019 the plan is to consolidate the usage of the INFN on-premises cloud providers, namely Cloud@ReCaS Bari and Cloud@CNAF in the context of DODAS. Consolidation by means of improvement in managing I/O by using emerging solution for data caching as well as starting exploiting geographically distributed clusters.\\
The latter is about exploiting DODAS based solutions to create a single logical cluster running over any available resource provider. The desired solution is to allow user submitting jobs from e.g. CNAF provided User Interface to a single queue and allow dynamic clusters to fetch payloads in a secure and transparent (to the end user) way.\\
From a technical perspective the distributed cluster implementation will be based on HTCondor technology which is a important strategic aspect because of we expect this will allow, later on, a completely seamless integration within the batch system of the CNAF Tier 1.
......@@ -269,7 +262,12 @@ The goal by the end of 2019 is to bring the ASI-SSDC hosted computing resources
Phys.\ Rev.\ Lett.\ {\bf 122} (2019) no.10, 101101.
doi:\url{10.1103/PhysRevLett.122.101101}
\bibitem{dst} D. D'Urso \& M. Duranti, Journal of Physics: Conference Series, 664 (2015), 072016
\bibitem{dst} D. D'Urso \& M. Duranti, Journal of Physics: Conference Series, 664 (2015), 072016
\bibitem{DODAS}
D. Spiga {\it et al.}
%“DODAS: How to effectively exploit heterogeneous clouds for scientific computations”,
PoS(ISGC-2018 \& FCDD) {\bf 024} doi:\url{https://doi.org/10.22323/1.327.0024}
%\bibitem{xrootd} http://xrootd.org.
......
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{The ATLAS Experiment at the INFN CNAF Tier-1}
\title{The ATLAS Experiment at the INFN CNAF Tier 1}
\author{Alessandro De Salvo$^1$, Lorenzo Rinaldi$^2$}
\author{A. De Salvo$^1$, L. Rinaldi$^2$}
\address{$^1$ INFN Sezione di Roma-1, piazzale Aldo Moro 2, 00185 Roma, Italy,\\ $^2$ Universit\`a di Bologna e INFN, via Irnerio 46, 40126 Bologna, Italy}
\address{$^1$ INFN Sezione di Roma-1, Roma, IT}
\address{$^2$ Universit\`a di Bologna e INFN Sezione di Bologna, Bologna, IT}
\ead{alessandro.desalvo@roma1.infn.it, lorenzo.rinaldi@bo.infn.it}
\begin{abstract}
The ATLAS experiment at LHC was fully operating in 2017. In this contribution we describe the ATLAS computing activities performed in the Italian sites of the Collaboration, and in particular the utilisation of the CNAF Tier-1.
The ATLAS experiment at LHC was fully operating in 2017. In this contribution we describe the ATLAS computing activities performed in the Italian sites of the Collaboration, and in particular the utilisation of the CNAF Tier 1.
\end{abstract}
\section{Introduction}
ATLAS \cite{ATLAS-det} is one of two general-purpose detectors at the Large Hadron Collider (LHC). It investigates a wide range of physics, from the search for the Higgs boson and standard model studies to extra dimensions and particles that could make up dark matter. Beams of particles from the LHC collide at the centre of the ATLAS detector making collision debris in the form of new particles, which fly out from the collision point in all directions. Six different detecting subsystems arranged in layers around the collision point record the paths, momentum, and energy of the particles, allowing them to be individually identified. A huge magnet system bends the paths of charged particles so that their momenta can be measured. The interactions in the ATLAS detectors create an enormous flow of data. To digest the data, ATLAS uses an advanced trigger system to tell the detector which events to record and which to ignore. Complex data-acquisition and computing systems are then used to analyse the collision events recorded. At 46 m long, 25 m high and 25 m wide, the 7000-tons ATLAS detector is the largest volume particle detector ever built. It sits in a cavern 100 m below ground near the main CERN site, close to the village of Meyrin in Switzerland.
ATLAS \cite{ATLAS-det} is one of two general-purpose detectors at the Large Hadron Collider (LHC). It investigates a wide range of physics, from the search for the Higgs boson and standard model studies to extra dimensions and particles that could make up dark matter. Beams of particles from the LHC collide at the center of the ATLAS detector making collision debris in the form of new particles, which fly out from the collision point in all directions. Six different detecting subsystems arranged in layers around the collision point record the paths, momentum, and energy of the particles, allowing them to be individually identified. A huge magnet system bends the paths of charged particles so that their momenta can be measured. The interactions in the ATLAS detectors create an enormous flow of data. To digest the data, ATLAS uses an advanced trigger system to tell the detector which events to record and which to ignore. Complex data-acquisition and computing systems are then used to analyse the collision events recorded. At 46 m long, 25 m high and 25 m wide, the 7000-tons ATLAS detector is the largest volume particle detector ever built. It sits in a cavern 100 m below ground near the main CERN site, close to the village of Meyrin in Switzerland.
More than 3000 scientists from 174 institutes in 38 countries work on the ATLAS experiment.
ATLAS has been taking data from 2010 to 2012, at center of mass energies of 7 and 8 TeV, collecting about 5 and 20 fb$^{-1}$ of integrated luminosity, respectively. During the complete Run-2 phase (2015-2018) ATLAS collected and registered at the Tier-0 147 fb$^{-1}$ of integrated luminosity at center of mass energies of 13 TeV.
ATLAS has been taking data from 2010 to 2012, at center of mass energies of 7 and 8 TeV, collecting about 5 and 20 fb$^{-1}$ of integrated luminosity, respectively. During the complete Run-2 phase (2015-2018) ATLAS collected and registered at the Tier 0 147 fb$^{-1}$ of integrated luminosity at center of mass energies of 13 TeV.
The experiment has been designed to look for New Physics over a very large set of final states and signatures, and for precision measurements of known Standard Model (SM) processes. Its most notable result up to now has been the discovery of a new resonance at a mass of about 125 GeV \cite{ATLAS higgs}, followed by the measurement of its properties (mass, production cross sections in various channels and couplings). These measurements have confirmed the compatibility of the new resonance with the Higgs boson, foreseen by the SM but never observed before.
......@@ -30,14 +31,14 @@ The experiment has been designed to look for New Physics over a very large set o
The ATLAS Computing System \cite{ATLAS-cm} is responsible for the provision of the software framework and services, the data management system, user-support services, and the world-wide data access and job-submission system. The development of detector-specific algorithmic code for simulation, calibration, alignment, trigger and reconstruction is under the responsibility of the detector projects, but the Software and Computing Project plans and coordinates these activities across detector boundaries. In particular, a significant effort has been made to ensure that relevant parts of the “offline” framework and event-reconstruction code can be used in the High Level Trigger. Similarly, close cooperation with Physics Coordination and the Combined Performance groups ensures the smooth development of global event-reconstruction code and of software tools for physics analysis.
\subsection{The ATLAS Computing Model}
The ATLAS Computing Model embraces the Grid paradigm and a high degree of decentralisation and sharing of computing resources. The required level of computing resources means that off-site facilities are vital to the operation of ATLAS in a way that was not the case for previous CERN-based experiments. The primary event processing occurs at CERN in a Tier-0 Facility. The RAW data is archived at CERN and copied (along with the primary processed data) to the Tier-1 facilities around the world. These facilities archive the raw data, provide the reprocessing capacity, provide access to the various processed versions, and allow scheduled analysis of the processed data by physics analysis groups. Derived datasets produced by the physics groups are copied to the Tier-2 facilities for further analysis. The Tier-2 facilities also provide the simulation capacity for the experiment, with the simulated data housed at Tier-1s. In addition, Tier-2 centres provide analysis facilities, and some provide the capacity to produce calibrations based on processing raw data. A CERN Analysis Facility provides an additional analysis capacity, with an important role in the calibration and algorithmic development work. ATLAS has adopted an object-oriented approach to software, based primarily on the C++ programming language, but with some components implemented using FORTRAN and Java. A component-based model has been adopted, whereby applications are built up from collections of plug-compatible components based on a variety of configuration files. This capability is supported by a common framework that provides common data-processing support. This approach results in great flexibility in meeting both the basic processing needs of the experiment, but also for responding to changing requirements throughout its lifetime. The heavy use of abstract interfaces allows for different implementations to be provided, supporting different persistency technologies, or optimized for the offline or high-level trigger environments.
The ATLAS Computing Model embraces the Grid paradigm and a high degree of decentralisation and sharing of computing resources. The required level of computing resources means that off-site facilities are vital to the operation of ATLAS in a way that was not the case for previous CERN-based experiments. The primary event processing occurs at CERN in a Tier 0 Facility. The RAW data is archived at CERN and copied (along with the primary processed data) to the Tier 1 facilities around the world. These facilities archive the raw data, provide the reprocessing capacity, provide access to the various processed versions, and allow scheduled analysis of the processed data by physics analysis groups. Derived datasets produced by the physics groups are copied to the Tier 2 facilities for further analysis. The Tier 2 facilities also provide the simulation capacity for the experiment, with the simulated data housed at Tier 1 centers. In addition, Tier 2 centers provide analysis facilities, and some provide the capacity to produce calibrations based on processing raw data. A CERN Analysis Facility provides an additional analysis capacity, with an important role in the calibration and algorithmic development work. ATLAS has adopted an object-oriented approach to software, based primarily on the C++ programming language, but with some components implemented using FORTRAN and Java. A component-based model has been adopted, whereby applications are built up from collections of plug-compatible components based on a variety of configuration files. This capability is supported by a common framework that provides common data-processing support. This approach results in great flexibility in meeting both the basic processing needs of the experiment, but also for responding to changing requirements throughout its lifetime. The heavy use of abstract interfaces allows for different implementations to be provided, supporting different persistency technologies, or optimized for the offline or high-level trigger environments.
The Athena framework is an enhanced version of the Gaudi framework that was originally developed by the LHCb experiment, but is now a common ATLAS-LHCb project. Major
design principles are the clear separation of data and algorithms, and between transient (in-memory) and persistent (in-file) data. All levels of processing of ATLAS data, from high-level trigger to event simulation, reconstruction and analysis, take place within the Athena framework; in this way it is easier for code developers and users to test and run algorithmic code, with the assurance that all geometry and conditions data will be the same for all types of applications ( simulation, reconstruction, analysis, visualization).
One of the principal challenges for ATLAS computing is to develop and operate a data storage and management infrastructure able to meet the demands of a yearly data volume of O(10PB) utilized by data processing and analysis activities spread around the world. The ATLAS Computing Model establishes the environment and operational requirements that ATLAS data-handling systems must support and provides the primary guidance for the development of the data management systems.
The ATLAS Databases and Data Management Project (DB Project) leads and coordinates ATLAS activities in these areas, with a scope encompassing technical data bases (detector production, installation and survey data), detector geometry, online/TDAQ databases, conditions databases (online and offline), event data, offline processing configuration and bookkeeping, distributed data management, and distributed database and data management services. The project is responsible for ensuring the coherent development, integration and operational capability of the distributed database and data management software and infrastructure for ATLAS across these areas.
The ATLAS Computing Model defines the distribution of raw and processed data to Tier-1 and Tier-2 centres, so as to be able to exploit fully the computing resources that are made available to the Collaboration. Additional computing resources are available for data processing and analysis at Tier-3 centres and other computing facilities to which ATLAS may have access. A complex set of tools and distributed services, enabling the automatic distribution and processing of the large amounts of data, has been developed and deployed by ATLAS in cooperation with the LHC Computing Grid (LCG) Project and with the middleware providers of the three large Grid infrastructures we use: EGI, OSG and NorduGrid. The tools are designed in a flexible way, in order to have the possibility to extend them to use other types of Grid middleware in the future.
The main computing operations that ATLAS have to run comprise the preparation, distribution and validation of ATLAS software, and the computing and data management operations run centrally on Tier-0, Tier-1s and Tier-2s. The ATLAS Virtual Organization allows production and analysis users to run jobs and access data at remote sites using the ATLAS-developed Grid tools.
The Computing Model, together with the knowledge of the resources needed to store and process each ATLAS event, gives rise to estimates of required resources that can be used to design and set up the various facilities. It is not assumed that all Tier-1s or Tier-2s are of the same size; however, in order to ensure a smooth operation of the Computing Model, all Tier-1s usually have broadly similar proportions of disk, tape and CPU, and similarly for the Tier-2s.
The ATLAS Computing Model defines the distribution of raw and processed data to Tier 1 and Tier 2 centers, so as to be able to exploit fully the computing resources that are made available to the Collaboration. Additional computing resources are available for data processing and analysis at Tier 3 centers and other computing facilities to which ATLAS may have access. A complex set of tools and distributed services, enabling the automatic distribution and processing of the large amounts of data, has been developed and deployed by ATLAS in cooperation with the LHC Computing Grid (LCG) Project and with the middleware providers of the three large Grid infrastructures we use: EGI, OSG and NorduGrid. The tools are designed in a flexible way, in order to have the possibility to extend them to use other types of Grid middleware in the future.
The main computing operations that ATLAS have to run comprise the preparation, distribution and validation of ATLAS software, and the computing and data management operations run centrally on Tier 0, Tier 1 sites and Tier 2 sites. The ATLAS Virtual Organization allows production and analysis users to run jobs and access data at remote sites using the ATLAS-developed Grid tools.
The Computing Model, together with the knowledge of the resources needed to store and process each ATLAS event, gives rise to estimates of required resources that can be used to design and set up the various facilities. It is not assumed that all Tier 1 sites or Tier 2 sites are of the same size; however, in order to ensure a smooth operation of the Computing Model, all Tier 1 centers usually have broadly similar proportions of disk, tape and CPU, and similarly for the Tier 2 sites.
The organization of the ATLAS Software and Computing Project reflects all areas of activity within the project itself. Strong high-level links are established with other parts of the ATLAS organization, such as the TDAQ Project and Physics Coordination, through cross-representation in the respective steering boards. The Computing Management
Board, and in particular the Planning Officer, acts to make sure that software and computing developments take place coherently across sub-systems and that the project as a whole meets its milestones. The International Computing Board assures the information flow between the ATLAS Software and Computing Project and the national resources and their Funding Agencies.
......@@ -45,22 +46,22 @@ Board, and in particular the Planning Officer, acts to make sure that software a
\section{The role of the Italian Computing facilities in the global ATLAS Computing}
Italy provides Tier-1, Tier-2 and Tier-3 facilities to the ATLAS collaboration. The Tier-1, located at CNAF, Bologna, is the main centre, also referred as “regional” centre. The Tier-2 centres are distributed in different areas of Italy, namely in Frascati, Napoli, Milano and Roma. All 4 Tier-2 sites are considered as Direct Tier-2 (T2D), meaning that they have an higher importance with respect to normal Tier-2s and can have primary data too. They are also considered satellites of the Tier-1, also identified as nucleus. The total of the Tier-2 sites corresponds to more than the total ATLAS size at the Tier-1, for what concerns disk and CPUs; tape is not available in the Tier-2 sites. A third category of sites is the so-called Tier-3 centres. Those are smaller centres, scattered in different places in Italy, that nevertheless contributes in a consistent way to the overall computing power, in terms of disk and CPUs. The overall size of the Tier-3 sites corresponds roughly to the size of a Tier-2 site. The Tier-1 and Tier-2 sites have pledged resources, while the Tier-3 sites do not have any pledge resource available.
In terms of pledged resources, Italy contributes to the ATLAS computing as 9\% of both CPU and disk for the Tier-1. The share of the Tier-2 facilities corresponds to 7\% of disk and 9\% of CPU of the whole ATLAS computing infrastructure. The Italian Tier-1, together with the other Italian centres, provides both resources and expertise to the ATLAS computing community, and manages the so-called Italian Cloud of computing. Since 2015 the Italian Cloud does not only include Italian sites, but also Tier-3 sites of other countries, namely South Africa and Greece.
The computing resources, in terms of disk, tape and CPU, available in the Tier-1 at CNAF have been very important for all kind of activities, including event generation, simulation, reconstruction, reprocessing and analysis, for both MonteCarlo and real data. Its major contribution has been the data reprocessing, since this is a very I/O and memory intense operation, normally executed only in Tier-1 centres. In this sense CNAF has played a fundamental role for the fine measurement of the Higgs [3] properties in 2018 and other analysis. The Italian centres, including CNAF, have been very active not only in the operation side, but contributed a lot in various aspect of the Computing of the ATLAS experiment, in particular for what concerns the network, the storage systems, the storage federations and the monitoring tools. The Tier-1 at CNAF has been very important for the ATLAS community in 2018, for some specific activities:
Italy provides Tier 1, Tier 2 and Tier 3 facilities to the ATLAS collaboration. The Tier 1, located at CNAF, Bologna, is the main center, also referred as “regional” center. The Tier 2 centers are distributed in different areas of Italy, namely in Frascati, Napoli, Milano and Roma. All 4 Tier 2 sites are considered as Direct Tier 2 (T2D), meaning that they have an higher importance with respect to normal Tier 2s and can have primary data too. They are also considered satellites of the Tier 1, also identified as nucleus. The total of the Tier 2 sites corresponds to more than the total ATLAS size at the Tier 1, for what concerns disk and CPUs; tape is not available in the Tier 2 sites. A third category of sites is the so-called Tier 3 centers. Those are smaller centers, scattered in different places in Italy, that nevertheless contributes in a consistent way to the overall computing power, in terms of disk and CPUs. The overall size of the Tier 3 sites corresponds roughly to the size of a Tier 2 site. The Tier 1 and Tier 2 sites have pledged resources, while the Tier 3 sites do not have any pledge resource available.
In terms of pledged resources, Italy contributes to the ATLAS computing as 9\% of both CPU and disk for the Tier 1. The share of the Tier 2 facilities corresponds to 7\% of disk and 9\% of CPU of the whole ATLAS computing infrastructure. The Italian Tier 1, together with the other Italian centers, provides both resources and expertise to the ATLAS computing community, and manages the so-called Italian Cloud of computing. Since 2015 the Italian Cloud does not only include Italian sites, but also Tier 3 sites of other countries, namely South Africa and Greece.
The computing resources, in terms of disk, tape and CPU, available in the Tier 1 at CNAF have been very important for all kind of activities, including event generation, simulation, reconstruction, reprocessing and analysis, for both MonteCarlo and real data. Its major contribution has been the data reprocessing, since this is a very I/O and memory intense operation, normally executed only in Tier 1 centers. In this sense CNAF has played a fundamental role for the fine measurement of the Higgs [3] properties in 2018 and other analysis. The Italian centers, including CNAF, have been very active not only in the operation side, but contributed a lot in various aspect of the Computing of the ATLAS experiment, in particular for what concerns the network, the storage systems, the storage federations and the monitoring tools. The Tier 1 at CNAF has been very important for the ATLAS community in 2018, for some specific activities:
\begin{itemize}
\item improvements on the WebDAV/HTTPS access for StoRM, in order to be used as main renaming method for the ATLAS files in StoRM and for http federation purposes;
\item improvements of the dynamic model of the multi-core resources operated via the LSF resource management system and simplification of the PanDA queues, using the Harvester service to mediate the control and information flow between PanDA and the resources.
\item network troubleshooting via the Perfsonar-PS network monitoring system, used for the LHCONE overlay network, together with the other Tier-1 and Tier-2 sites;
\item network troubleshooting via the Perfsonar-PS network monitoring system, used for the LHCONE overlay network, together with the other Tier 1 and Tier 2 sites;
\item planning, readiness testing and implementation of the HTCondor batch system for the farming resources management.
\end{itemize}
\section{Main achievements of ATLAS Computing centers in Italy}
The Italian Tier-2 Federation runs all the ATLAS computing activities in the Italian cloud supporting the operations at CNAF, the Italian Tier-1 centre, and the Milano, Napoli, Roma1 and Frascati Tier-2 sites. This insures an optimized use of the resources and a fair and efficient data access. The computing activities of the ATLAS collaboration have been constantly carried out over the whole 2018, in order to analyse the data of the Run-2 and produce the Monte Carlo data needed for the 2018 run.
The Italian Tier 2 Federation runs all the ATLAS computing activities in the Italian cloud supporting the operations at CNAF, the Italian Tier 1 center, and the Milano, Napoli, Roma1 and Frascati Tier 2 sites. This insures an optimized use of the resources and a fair and efficient data access. The computing activities of the ATLAS collaboration have been constantly carried out over the whole 2018, in order to analyse the data of the Run-2 and produce the Monte Carlo data needed for the 2018 run.
The LHC data taking started in April 2018 and, until the end of the operation in December 2018, all the Italian sites, the CNAF Tier-1 and the four Tier-2s, have been involved in all the computing operations of the collaboration: data reconstruction, Monte Carlo simulation, user and group analysis and data transfer among all the sites. Besides these activities, the Italian centers have contributed to the upgrade of the Computing Model both from the testing side and the development of specific working groups. ATLAS collected and registered at the Tier-0 ~60.6 fb$^{-1}$ and ~25 PB of raw and derived data, while the cumulative data volume distributed in all the data centers in the grid was of the order of ~80 PB. The data has been replicated with an efficiency of 100\% and an average throughput of the order of ~13 GB/s during the data taking period, with peaks above 25 GB/s. For just Italy, the average throughput was of the order of 800 MB/s with peaks above 2GB/s. The data replication speed from Tier-0 to the Tier-2s has been quite fast with a transfer time lower than 4 hours. The average number of simultaneous jobs running on the grid has been of about 110k for production (simulation and reconstruction) and data analysis, with peaks over 150k, with an average CPU efficiency up to more than 80\%. The use of the grid for analysis has been stable on ~26k simultaneous jobs, with peaks around the conferences’ periods to over 40k, showing the reliability and effectiveness of the use of grid tools for data analysis.
The LHC data taking started in April 2018 and, until the end of the operation in December 2018, all the Italian sites, the CNAF Tier 1 and the four Tier 2 sites, have been involved in all the computing operations of the collaboration: data reconstruction, Monte Carlo simulation, user and group analysis and data transfer among all the sites. Besides these activities, the Italian centers have contributed to the upgrade of the Computing Model both from the testing side and the development of specific working groups. ATLAS collected and registered at the Tier 0 ~60.6 fb$^{-1}$ and ~25 PB of raw and derived data, while the cumulative data volume distributed in all the data centers in the grid was of the order of ~80 PB. The data has been replicated with an efficiency of 100\% and an average throughput of the order of ~13 GB/s during the data taking period, with peaks above 25 GB/s. For just Italy, the average throughput was of the order of 800 MB/s with peaks above 2GB/s. The data replication speed from Tier 0 to the Tier 2 sites has been quite fast with a transfer time lower than 4 hours. The average number of simultaneous jobs running on the grid has been of about 110k for production (simulation and reconstruction) and data analysis, with peaks over 150k, with an average CPU efficiency up to more than 80\%. The use of the grid for analysis has been stable on ~26k simultaneous jobs, with peaks around the conferences’ periods to over 40k, showing the reliability and effectiveness of the use of grid tools for data analysis.
The Italian sites contributed to the development of the Xrootd and http/webdav federation. In the latter case the access to the storage resources is managed using the http/webdav protocol, in collaboration with the CERN DPM team, the Belle2 experiment, the Canadian Corporate Cloud ant the RAL (UK) site. The purpose is to build a reliable storage federation, alternative to the Xrootd one, to access physics data both on the grid and on cloud storage infrastructures (like Amazon S3, MicroSoft Azure, etc). The Italian community is particularly involved in this project and the first results have been presented to the WLCG collaboration.
......@@ -68,7 +69,7 @@ The Italian community also contributes to develop new tools for distributed data
The contribution of the Italian sites to the computing activities in terms of processed jobs and data recorded has been of about 9\%, corresponding to the order of the resource pledged to the collaboration, with very good performance in term of availability, reliability and efficiency. All the sites are always in the top positions in the ranking of the collaboration sites.
Besides the Tier-1 and Tier-2s, in 2018 also the Tier-3s gave a significant contribution to the Italian physicists community for the data analysis. The Tier-3s are local farms dedicated to the interactive data analysis, the last step of the analysis workflow, and to the grid analysis over small data sample. Several italian groups set up a farm for such a purpose in their universities and, after a testing and validation process performed by the distributed computing team of the collaboration, all have been recognized as official Tier-3s of the collaboration.
Besides the Tier 1 and Tier 2 sites, in 2018 also the Tier 3 sites gave a significant contribution to the Italian physicists community for the data analysis. The Tier 3 centers are local farms dedicated to the interactive data analysis, the last step of the analysis workflow, and to the grid analysis over small data sample. Several italian groups set up a farm for such a purpose in their universities and, after a testing and validation process performed by the distributed computing team of the collaboration, all have been recognized as official Tier 3s of the collaboration.
......@@ -76,7 +77,7 @@ Besides the Tier-1 and Tier-2s, in 2018 also the Tier-3s gave a significant cont
The ATLAS Computing Model was designed to have a sufficient redundancy of the available resources in order to tackle emergency situations like the flooding occurred on November 9th 2017 at CNAF. Thanks to the huge effort of the whole community of the CNAF, the operativity of the data centre restarted gradually from the second half of February 2018. A continuous interaction between ATLAS distributed computing community and CNAF people was needed to bring the computing operation fully back to normality. The deep collaboration was very successful and after one month the site was almost fully operational and the ATLAS data management and processing activities were running smoothly again. Eventually, the overall impact of the incident was limited enough, mainly thanks to the relatively quick recovery of the CNAF data center and to the robustness of the computing model.
The ATLAS Computing Model was designed to have a sufficient redundancy of the available resources in order to tackle emergency situations like the flooding occurred on November 9th 2017 at CNAF. Thanks to the huge effort of the whole community of the CNAF, the operativity of the data center restarted gradually from the second half of February 2018. A continuous interaction between ATLAS distributed computing community and CNAF people was needed to bring the computing operation fully back to normality. The deep collaboration was very successful and after one month the site was almost fully operational and the ATLAS data management and processing activities were running smoothly again. Eventually, the overall impact of the incident was limited enough, mainly thanks to the relatively quick recovery of the CNAF data center and to the robustness of the computing model.
......
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\bibliographystyle{iopart-num}
\begin{document}
......@@ -17,28 +18,136 @@ This article describes the startup activity for the group.
\end{abstract}
\section{Compliance Auditing Group}
\subsection{Rationale for creation}
When discussing GDPR application during the Commissione Calcolo e Reti
(CCR) 2018 workshop in Rimini, it became clear that setting up
a set of rules and assuming that all parts of INFN would correctly
follow them was not, by itself, enough. Indeed it was necessary to
comply with the duty of vigilance, which in turn required periodic
checkups.
\subsection{GDPR Introduction}
The GDPR, or EU Regulation 2016/679, is an European statute which aims
to regulate collection and use of personal data. This law introduces
several innovation when compared to the previous law dealing with data
protection.
The GDPR predicates its data management phylosophy on a few high level
principles, namely \emph{lawfulness}, \emph{fairness},
\emph{transparency}, \emph{purpose limitation}, \emph{data
minimisation}, \emph{accuracy}, \emph{storage limitation},
\emph{integrity and confidentiality} and finally
\emph{accountability}, which are clearly delineated in the second
chapter, with the rest of the law further characterizing and
contestualizing them.
Before delving any further, it is important to correctly define these principles:
\begin{itemize}
\item Lawfulness means that at any moment in time there must be a
valid legal justification for the treatment of personal data.
This can be an existing law that specifically allows a treatment,
or one from a set of reasons explicitly listed in the GDPR
itself. If none of those applies, lawfulness may be granted by an
explicit permission for the owner of the data, permission that is
only valid for the specific treatment for which it was obtained.
Any further usage of the data requires a new explicit permission.
\item Fairness and transparency mean that any usage of data must be
known the the owner, and such usage must be ``fair.''
\item Purpose limitation means that data collected for a specific
purpose \emph{cannot} be used for any other purpose without an
explicit authorization.
\item Data minimization means that only the data that is relevant for
the purpose for which it is collected must be collected and kept.
\item Accuracy means that the data should be accurate and, if
necessary, kept up-to-date. Data that is inaccurate should be
delated or corrected.
\item Storage limitation means that data should not be kept in a form
that permits personal identification for longer than is required by
the purpose for which it was collected.
\item Integrity and confidentiality means that all collected data must
be kept secret for all the time they are kept, and that they should
be preserved in a form that would preserve them from corruption.
Furthermore, measures must be taken to preempt disclosure, even just
accidental or as a consequence of a crime, to unauthorized persons.
\item Accountability means that the entity or entities that decide
for what purpose data is collected and how it is processed is
responsible for, and must be able to demonstrate compliance with
GDPR.
\end{itemize}
The GDPR does not describe how, exactly, these principles should be
implemented in practice, leaving instead full freedom on deciding how
to satisfy them to the entities that are accountable for respecting
it.
It is therefore clear the disruptive effect of the regulation when
compared to the existing Italian Privacy Law, (NNN) which instead
clearly described a set of rules that \emph{had} to be respected.
This means that the organization needs to implement a set of
regulations and instruments to organize people with responsibilites
and skills to handle, manage and check treatment of personal data.
One organizational measure is actually mandated by GDPR, and it is a
position of Data Protection Officer (DPO), which is an organization's
representative managing dealings with external entities pertaining to
personal data issues. The DPO also has a consultative and reference
role for the organization and all users when dealing with privacy
issues.
GDPR conformancye implementatio rests on five concepts that build and
depend on each other like a wheel, as can be seen from the following
figure:
\includegraphics[width=.9\linewidth]{image.png}
To counteract this worries, and to vigilate on its proper application,
it was soon proposed to create a team which would take the
name of ``compliance auditors,'' whose job was to act as internal
auditors for all of INFN structures to check on the proper
application of the regulations as implemented by INFN.
\subsubsection{Organization and Roles}
Starting from the appointing of a DPO, the organizational model must
be formally defined and organized in all its components, defining with
precision roles and responsibilities for members of the organization
in regard to direction and management of all privacy-related issues.
\subsubsection{People, Culture and Skills}
The organization designs and spreads the culture of data protection
and security policies through training and other sensibilization
activities.
\subsubsection{Processes and Rules}
Starting from a culture of security and data protection, Processes and
rules are designed and implemented to ensure privacy by design, data
portability, data breach management, data treatment register and
others whose existence is mandated by GDPR.
\subsubsection{Technologies and Tools}
Technologies and Tools to implement the processes and rules defined in
the previous point, e.g.: antivirus, firewalls, encipherment
algorithms, identity management, etc... are chosen and put in
production.
\subsubsection{Control System}
A monitoring system must be created to invigilate on the compliance of
the organization to laws (e.g.: GDPR) and internal
processes/regulamentation. A key tool of this monitoring is the
realization of audits, internal or external.
\subsection{Rationale for creation}
In the context of the required vigilance was a fundamental process for
GDPR compliance, a group of people was formed in INFN, called
``Compliance Auditing INFN'' whose duty is the verification of
compliance with both external (e.g. GDPR) and internal (e.g. Norme per
l'uso delle risorse informatiche) norms of the actual behaviour of the
different INFN structures.
Proposed in the first half of 2018, the group is staffed by Patrizia
Belluomo (Lead Auditor, INFN Catania) and Vincenzo Ciaschini (Auditor,
INFN CNAF) who have experience in auditing due to having ISO 27001
certifications.
\subsection{Startup Activity}
Following the proposal of the group creation, the first task to solve
was how to staff it. Two people, who had previous experience with the
setup of ISO compliance structures for some of INFN sections
volunteered, Patrizia Belluomo (Lead auditor, Sezione di Catania) and
Vincenzo Ciaschini (CNAF).
\subsubsection{Operative Plan}
The first activity undertaken by the group was a collection, followed
by the study of all the norms applicable to INFN's implementation of
GDPR, like the text of the normative itself, other applicable Italian
......@@ -47,28 +156,63 @@ several INFN regulations that, while not specifically talking about
GDPR, still governed issues that were related to it, e.g data
retention policies.
We also had to decide how to structure the audits. We decided to
implement it according to well-known quality assurance principles. To
apply these principles, we ended up deciding on a set of arguments
that would be investigated during the audits, and a set of questions
that could, but not necessarily would, be asked during the audits
themselves, to act as a set of guidelines and to permit INFN
structures to prepare properly.
When the group was formally approved, these procedures were
presented at the CCR workshop in Pisa in October, and an indicative
calendar for the audits created and sent to the structures as a
proposal on when they would be audited.
Due to budget limitations, it was also decided that, at least for the
first year, most of the audits would be done by telepresence, with
on-site audits reserved for the sections that had, or would have, the
most critical data, i.e: the structures that hosted or would host
INFN's Sistema Informativo.
The rest of the year was devoted to refine this organization and
prepare the formal documentation that would be the output of the
audits and the procedures that we would follow during the audits,
which began in earnest in 9 January 2019, but that would be out of
scope for 2018's Annual Report.
Preparation for the audits entailed several months of study of the
norms applicable to INFN's implementation, like the GDPR itself, other
applicable Italian legislation, the documents describing INFN's
implementation, and several INFN regulations that, while not
specifically talking about GDPR, still governed issues that were
related to it, e.g data retention policies. From this, we
extrapolated a set of indication ``possible requests index'' that we
shared with all of the INFN structures that would receive an audit.
Another fundamental step that in this case preceded the foundation of
the group was the compilation by each INFN structure of a file
(Minimal Measure Implementation) describing the implementation of a
minimal set of security controls which were to be collected on an
official CCR repository along with the corollary information that each
section deemed necessary.
We identified four main points that we had to evaluate:
\begin{description}
\item[Data Discovery and Risk Assessment] Identify the personal data
kept by the structure and where it was stored or used.
\item[Protection] How the data is protected.
\item[Vigilance] How possible threats are discovered and how to
evaluate the extent of a security violation.
\item[Response] Incident Response Preparation and actions to
mitigate impact and reduce future risk.
\end{description}
\subsubsection{Official Documents Preparation}
We decided to implement the audit according to the well known
priciples in ISO 19011 standard. To adapt that to INFN's
specificities, we created a set of documents and procedures that would
ensure uniformity of judgement and that would make results directly
comparable among successive audits.
We also setup a document repository, which would contain both the
official documentation and the results of all audits of all structures
that would be performed. It is inside INFN's official document
repository, Alfresco.
\subsubsection{Audit Planning}
According to the formal procedure, Audit Plans for INFN structures
were formalized and scheduled, and we started contacting the various
parts of INFN to share it. The plan was to complete all audits in the
first half of 2019, starting from January. Budgetary reasons forbade
phisically traveling to al the cities that housed INFN, so most of
the audits were planned to be done in telepresence, with only the 5
that housed the most delicate data.
\subsubsection{Structure Feedback}
Finally, we defined a procedure to control the actions to undertake
during the audit and how to receive feedback from INFN structures.
\section{Conclusion}
With all these work done, we were ready to start, and began
in earnest January 9, 2019 with our first Audit, but that would be out
of scope for 2018's Annual Report.
\end{document}
contributions/audit/image.png

50.5 KiB

......@@ -3,8 +3,8 @@
\begin{document}
\title{The Borexino experiment at the INFN- CNAF}
\author{Alessandra Carlotta Re\\ \small{on behalf of the BOREXINO collaboration}}
\address{Universit\`{a} degli Studi e INFN di Milano, via Celoria 16, 20133 Milano, Italy}
\author{Alessandra Carlotta Re$^1$\\ \small{on behalf of the BOREXINO collaboration}}
\address{$^1$ Universit\`{a} degli Studi di Milano e INFN Sezione di Milano, Milano, IT}
\ead{alessandra.re@mi.infn.it}
\begin{abstract} %OK
......@@ -45,7 +45,7 @@ The INFN-CNAF currently hosts the whole Borexino data statistics and the users'
The Borexino data are classified into three types:
\begin{itemize}
\item {\bf raw data~} Raw data are compressed binary files with a typical size of about 600 Mb corresponding to a data taking time of $\sim$6h.
\item {\bf root files~~~~} Root files are files containing the Borexino reconstructed events, each organized in a {\tt ROOT TTree}: their typical dimension is $\sim$1Gb.
\item {\bf ROOT files~~~~} ROOT files are files containing the Borexino reconstructed events, each organized in a {\tt ROOT TTree}: their typical dimension is $\sim$1Gb.
\item {\bf DSTs~~~~~~~} DST files contain only a selection of events for the high level analyses.
\end{itemize}
......@@ -53,7 +53,7 @@ Borexino standard data taking requires a disk space increase of about 10 Tb/year
The CNAF TAPE area also hosts a full backup of the Borexino rawdata.
Our dedicated front-end machine ({\tt ui-borexino.cr.cnaf.infn.it}) and pledged CPU resources (about 1500 HS06) are used by the Borexino collaboration for root files production, Monte Carlo simulations, interactive and batch analysis jobs.
Our dedicated front-end machine ({\tt ui-borexino.cr.cnaf.infn.it}) and pledged CPU resources (about 1500 HS06) are used by the Borexino collaboration for ROOT files production, Monte Carlo simulations, interactive and batch analysis jobs.
Moreover, few times a year, an extraordinary peak usage (up to 3000 HS06 at least) is needed in order to perform a full reprocessing of the whole data statistics with updated versions of the reconstruction code and/or a massive Monte Carlo generation.
\section{Conclusions} %OK
......
contributions/chnet/ArchDiagram.png

36.9 KiB

\documentclass[a4paper]{jpconf}
\usepackage[T1]{fontenc}
\usepackage[utf8]{inputenc}
\usepackage{graphicx}
\usepackage{url}
\begin{document}
\title{DHLab: a digital library for the INFN Cultural Heritage Network}
\author{F. Proietti$^1$, L. dell'Agnello$^1$, F. Giacomini$^1$}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{fabio.proietti@cnaf.infn.it}
\begin{abstract}
DHLab, as part of the Cultural Heritage Network (CHNet) promoted by
INFN, is a cloud-based environment to process, visualise and analyse
data acquired from members of the network and that will be provided
to technical and non-technical users. DHLab is under development and
currently its main features are a cloud service to upload and manage
the data, a form to assign metadata to uploaded datasets and a
service used to analyze data obtained from XRF measurements.
\end{abstract}
\section{Introduction}
CHNet\footnote{http://chnet.infn.it/} is a network composed by several
INFN teams who devote their expertise in physics research to the study
and diagnostics of Cultural Heritage. By using their existing instruments,
developed for Nuclear Physics, or even by building new ones,
INFN laboratories started to address the needs of archaeologists,
historians, art historians, restorers and conservators. This unified
knowledge can provide useful indications about the correct procedures
to be applied for restoration or conservation, could be important to
date or verify, for example, the authenticity of an artwork or study
the provenance of raw material in order to retrace ancient trade
routes. In this context the purpose of the DHLab is to host all the
data acquired by the CHNet laboratories, together with the
descriptions and annotations added by humanists.
\section{Architecture}
The infrastructure system, shown in figure~\ref{fig:architecture},
follows a cloud-based model and can be divided in multiple modular
frontends, providing the interface towards the clients, and a
monolithic backend service.
\begin{figure}[ht]
\begin{center}
\includegraphics[scale=.4]{ArchDiagram.png}
\caption{\label{fig:architecture}High level overview of DHLab
architecture}
\end{center}
\end{figure}
The frontend includes three main blocks: a cloud service, a metadata
form and an application service. Of these, the metadata form, used to
fill details about a work or an analysis (see
section~\ref{sec:metadata-form}), is usable also while being offline;
the requirement addresses the use case of an operator who, while
disconnected from the network, needs to fill the metadata saving them
as a file on the local machine. The same requirement may be at least
partly satisfied also for the application services.
On the backend side, which is only partially implemented and not yet
even fully designed, we currently expect to have a listener, to
dispatch client requests, two data stores, one for user profiles and
the other for actual datasets, and a set of auxiliary services, for
example to automate the filling of the metadata form and to
standardize some of its fields (see again
section~\ref{sec:metadata-form}).
The entire system is hosted at the CNAF data center.
\section{Technologies and protocols}
As stated above, the design of the system is not yet complete and we
are still investigating different options to address the challenges we
face.
Open aspects concern:
\begin{itemize}
\item the data model, which must accomodate both datasets (possibly
composed of multiple files), the corresponding metadata and a
mechanism to link them together;
\item the authentication and authorization model, which should use as
much as possible standard web technologies and have flexible
mechanisms to authenticate users coming from different institutions,
leveraging their own Identity Providers;
\item how to access the available storage from a client, both to
upload datasets and their metadata and subsequently access them.
\end{itemize}
The experimentation makes use of an installation of
NextCloud~\cite{ref:nextcloud}, an open-source suite of client-server
software for creating and using file hosting services, with
functionality often extended through the use of plugins.
Authentication is based on OpenID Connect~\cite{ref:oidc} and makes
use of the INDIGO-IAM~\cite{ref:iam} service, an Identity and Access
Management product developed within the EU-funded
INDIGO-DataCloud~\cite{ref:indigo} project. INDIGO-IAM offers a
service to manage identities, user enrollment, group membership,
attributes and policies to access distributed resources and services
in a homogeneous and interoperable way; hence it represents a perfect
match to manage users, groups and resources of the CHNet
organization. In particular INDIGO-IAM delegates the authentication of
a user to their home institution identity provider under a trust
agreement.
NextCloud offers also the possibility to access data via the WebDAV
protocol, allowing users to mount the remote storage on their local
machine and see it as if it were a local disk. This feature becomes
useful when interaction through a web browser is not the most
effective tool, for example for batch or bulk operations.
\section{Metadata Form}
\label{sec:metadata-form}
The Metadata form is a web application whose purpose is to associate
metadata with art works, measurement campaigns and analysis
results. The application, written in Typescript~\cite{ref:typescript}
and based on the Angular 2 framework~\cite{ref:angular2}, is under
development; the main deployment option foresees its integration into
the cloud platform, but the combination with
Electron~\cite{ref:electron} makes a desktop application a viable
alternative.
As shown in figure~\ref{fig:metadataSchema}, to fill the metadata form
a user can follow two paths: they can create a \textit{campaign} and
associate it with multiple \textit{sessions} and \textit{analyses} or
they can store information about a single \textit{analysis}. In
particular, each \textit{analysis} can be associated with one or more
\textit{datasets}, the studied \textit{object} (i.e.,~an art work) and
all the information about its \textit{type}, \textit{author},
\textit{holder}, \textit{owner}, etc. In addition, users can provide
information about the analysis type, the operator who performed the
analysis, the devices, components and software used to scan, create or
read the resulting dataset. When completed, the resulting form,
translated into a JSON file, can be saved locally or uploaded to the
remote storage.
\begin{figure}[ht]
\begin{center}
\includegraphics[scale=.4]{metadataSchema.png}
\end{center}
\caption{\label{fig:metadataSchema}Schema of the sections included
in the metadata description.}
\end{figure}
\section{Application services}
DHLab is also designed to provide visualization and analysis services
for some of the stored datasets. Currently a proof-of-concept
application is available, to visualize and perform some analysis of
images obtained from XRF scans~\cite{ref:xrf}.
\section{Conclusions}
DHLab is a project born from the need to group, share, catalogue and
reuse data that comes from measurements and analyses of cultural
heritage works. It aims at being flexible and usable by persons
covering different roles: physicists, computer scientists, cultural
heritage operators. The system is designed and deployed around a core
Cloud-based infrastructure, but some of its parts must be functioning
in offline situations.
A web application for filling a form with metadata to be associated to
collected datasets according to an agreed-upon schema is being
developed.
Other web applications are foreseen for the visualization and analysis
of the stored datasets, starting from those coming from XRF,
radiocarbon and thermoluminescence analysis.
\section*{References}
\begin{thebibliography}{9}
\bibitem{ref:nextcloud} NextCloud \url{https://nextcloud.com/}
\bibitem{ref:oidc} OpenId Connect \url{https://openid.net/connect}
\bibitem{ref:iam} A Ceccanti, E Vianello, M Caberletti. (2018,
May). INDIGO Identity and Access Management
(IAM). Zenodo. \url{http://doi.org/10.5281/zenodo.1874790}
\bibitem{ref:indigo} The INDIGO-DataCloud project
\url{https://www.indigo-datacloud.eu/}
\bibitem{ref:typescript} Typescript language
\url{https://www.typescriptlang.org/}
\bibitem{ref:angular2} Angular 2 framework
\url{https://angular.io/}
\bibitem{ref:electron} Electron
\url{https://electronjs.org/}
\bibitem{ref:xrf} Cappelli L, Giacomini F, Taccetti F, Castelli L,
dell'Agnello L. 2016. A web application to analyse XRF scanning data. INFN-CNAF
Annual Report. \url{https://www.cnaf.infn.it/annual-report}
\end{thebibliography}
\end{document}
contributions/chnet/metadataSchema.png

31.6 KiB

\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{The CMS Experiment at the INFN CNAF Tier1}
\author{Giuseppe Bagliesi}
\address{INFN Sezione di Pisa, L.go B.Pontecorvo 3, 56127 Pisa, Italy}
\ead{giuseppe.bagliesi@cern.ch}
\begin{abstract}
A brief description of the CMS Computing operations during LHC RunII and their recent developments is given. The CMS utilization at Tier-1 CNAF is described
\end{abstract}
\section{Introduction}
The CMS Experiment \cite{CMS-descr} at CERN collects and analyses data from the pp collisions in the LHC Collider.
The first physics Run, at centre of mass energy of 7-8 TeV, started in late March 2010, and ended in February 2013; more than 25~fb$^{-1}$ of collisions were collected during the Run. RunII, at 13 TeV, started in 2015, and finished at the end of 2018.
During the first two years of RunII, LHC has been able to largely exceed its design parameters: already in 2016 instantaneous luminosity reached $1.5\times 10^{34}\mathrm{cm^{-2}s^{-1}}$, 50\% more than the planned “high luminosity” LHC phase. The most astonishing achievement, still, is a huge improvement on the fraction of time LHC can serve physics collision, increased form ~35\% of RunI to more than 80\% in some months on 2016.
The most visible effect, computing wise, is a large increase of data to be stored, processed and analysed offline, with 2016 allowing for the collection of more than 40 fb$^{-1}$ of physics data.
In 2017 CMS recorded more than 46 fb$^{-1}$ of pp collisions, in addition to the data collected during 2016. These data were collected under considerably higher than expected pileup conditions forcing CMS to request a lumi-levelling to PU~55 for the first hours of the LHC fill; this has challenged both the computing system and CMS analysts with more complex events to process with respect to the modelling. From the computing operations side, higher pileup meant larger events and more time to process events than anticipated in the 2017 planning. As these data taking conditions affected only the second part of the year, the average 2017 pileup was in line with that used during the CMS resource planning.
2018 was another excellent year for LHC operations and luminosity delivered to the experiments. CMS recorded 64 fb$^{-1}$ of pp collisions during 2018, in addition to the 84 fb$^{-1}$ collected during 2016 and 2017. This brings the total luminosity delivered in RunII to more than 150 fb$^{-1}$ , and the total RunI + RunII dataset to more than 190 fb$^{-1}$.
\section{Run II computing operations}
During Run~II, the computing 2004 model designed for Run~I has greatly evolved. The MONARC Hierarchical division of sites in Tier0, Tier-1s and Tier-2s, is still present, but less relevant during operations. All simulation, analysis and processing workflows can now be executed at virtually any site, with a full transfer mesh allowing for point-to-point data movement, outside the rigid hierarchy.
Remote access to data, using WAN-aware protocols like XrootD and data federations, are used more and more instead of planned data movement, allowing for an easier exploitation of CPU resources.
Opportunistic computing is becoming a key component, with CMS having explored access to HPC systems, Commercial Clouds, and with the capability of running its workflows on virtually any (sizeable) resource we have access to.
In 2018 CMS deployed singularity \cite{singu} to all sites supporting the CMS VO. Singularity is a container solution which allows CMS to select the OS on a per job basis and decouples the OS of worker nodes from that required by experiments. Sites can setup worker nodes with a Singularity supported OS and CMS will choose the appropriate OS image for each job.
CMS deployed a new version of the prompt reconstruction software on July 2018, during LHC MD2. This software is adapted to detector upgrades and data taking conditions, and the production level of alignment and calibration algorithms is reached. Data collected before this point has now been reprocessed for a fully consistent data set for analysis, in time for the Moriond 2019 conference. Production and distributed analysis activities continued at a very high level throughout 2018. The MC17 campaign, to be used for Winter and Summer 18 conferences, continued throughout the year, with decreasing utilization of resources; overall, more than 15B events were available by the Summer. The equivalent simulation campaign for 2018 data, MC18, started in October 2018 and is now almost completed.
Developments to increase CMS throughput and disk usage efficiently continue. Of particular interest is the development of the NanoAOD data tier as a new alternative for analysis users.
The NanoAOD size per event is approximately 1 kB, 30-50 times smaller than the MiniAOD data tier and relies on only simple data types rather than the hierarchical data format structure in the CMS MiniAOD (and AOD) data tier. NanoAOD samples for the 2016, 2017 and 2018 data and corresponding Monte Carlo simulation have been produced, and are being used in many analyses. NanoAOD is now automatically produced in all the central production campaigns, and fast reprocessing campaigns from MiniAOD to NanoAOD have been tested and are able to achieve more than 4B events per day using only a fraction of CMS resources.
\section{CMS WLCG Resources and expected increase}
CMS Computing model has been used to request resources for 2018-19 RunII data taking and reprocessing, with total requests (Tier-0 + Tier-1s + Tier-2s) exceeding 2073 kHS06, 172 PB on disk, and 320 PB on tape.
However the actual pledged resources have been substantially lower than the requests due to budget restrictions from the funding agencies. To reduce the impact of this issue, CMS was able to achieve and deploy several technological advancements, including reducing the needed amount of AOD(SIM) on disk and to reduce the amount of simulated raw events on tape. In addition, some computing resource providers were able to provide more than their pledged level of resources to CMS during 2018.
Thanks to the optimizations and technological improvements described before it has been possible to tune accordingly the computing model of CMS. Year-by-year increases, which would have been large in presence of the reference computing model, have been reduced substantially.
Italy contributes to CMS computing with 13\% of the Tier-1 and Tier-2 resources. The increase of CNAF pledges for 2019 have been reduced by a factor two with respect to the original request, due to INFN budget limitations, and the remaining increase has been postponed to 2021.
The 2019 pledges are therefore 78 kHS06 of CPU, 8020 TB of disk, and 26 PB for tape.
CMS usage of CNAF is very intense and it represents one of the largest Tier-1 in CMS as number of processed hours, after the US Tier-1; the same holds for total number of processed jobs, as shown in Fig.~\ref{cms-jobs}.
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth,bb=0 0 900 900]{tier1-jobs-2018.pdf}
\end{center}
\caption{\label{cms-jobs}Jobs processed at CMS Tier1s during 2018}
\end{figure}
\section{The CNAF flood incident}
On November 9th 2017 a major incident happened when the CNAF computer center was flooded.
This caused an interruption of all CNAF services and the damage of many disk arrays and servers, as well as of the tape library. About 40 damaged tapes (out of a total of 150) belonged to CMS. They contained unique copy of MC and RECO data. Six tapes contained a 2nd custodial copy of RAW data.
A special recovery procedure was adopted by CNAF team through a specialized company and no data have been permanently lost.
The impact of this incident for CMS, although serious, was mitigated thanks to the intrinsic redundancy of our distributed computing model. Other Tier1s increased temporary their share to compensate the CPU loss, deploying the 2018 pledges as soon as possible.
A full recovery of CMS services of CNAF was achieved by beginning of March 2018.
It is important to point out that, despite the incident affecting the first months of 2018, the integrated site readiness of CNAF in 2018 was very good and at the same level or better than the other CMS Tier1s, see Fig.~\ref{tier1-cms-sr}.
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth,bb=0 0 900 900]{tier1-readiness-2018.pdf}
\end{center}
\caption{\label{tier1-cms-sr}Site readiness of CMS Tier1s in 2018}
\end{figure}
\section{Conclusions}
CNAF is an important asset for the CMS Collaboration, being the second Tier1 in terms of resource utilization, pledges and availability.
The unfortunate incident of the end of 2017 has been managed professionally and efficiently by the CNAF staff, guaranteeing the fastest possible recovery with minimal data losses at the beginning of 2018.
\section*{References}
\begin{thebibliography}{9}
\bibitem{CMS-descr}CMS Collaboration, The CMS experiment at the CERN LHC, JINST 3 (2008) S08004,
doi:10.1088/1748-0221/3/08/S08004.
\bibitem{singu} http://singularity.lbl.gov/
\end{thebibliography}
\end{document}
\documentclass[a4paper]{jpconf}
\usepackage{graphicx}
\begin{document}
\title{The CMS Experiment at the INFN CNAF Tier 1}
\author{Giuseppe Bagliesi$^1$}
\address{$^1$ INFN Sezione di Pisa, Pisa, IT}
\ead{giuseppe.bagliesi@cern.ch}
\begin{abstract}
A brief description of the CMS Computing operations during LHC RunII and their recent developments is given. The CMS utilization at Tier 1 CNAF is described
\end{abstract}
\section{Introduction}
The CMS Experiment \cite{CMS-descr} at CERN collects and analyses data from the pp collisions in the LHC Collider.
The first physics Run, at center of mass energy of 7-8 TeV, started in late March 2010, and ended in February 2013; more than 25~fb$^{-1}$ of collisions were collected during the Run. RunII, at 13 TeV, started in 2015, and finished at the end of 2018.
During the first two years of RunII, LHC has been able to largely exceed its design parameters: already in 2016 instantaneous luminosity reached $1.5\times 10^{34}\mathrm{cm^{-2}s^{-1}}$, 50\% more than the planned “high luminosity” LHC phase. The most astonishing achievement, still, is a huge improvement on the fraction of time LHC can serve physics collisions, increased from ~35\% of RunI to more than 80\% in some months on 2016.
The most visible effect, computing wise, is a large increase of data to be stored, processed and analysed offline, with 2016 allowing for the collection of more than 40 fb$^{-1}$ of physics data.
In 2017 CMS recorded more than 46 fb$^{-1}$ of pp collisions, in addition to the data collected during 2016. These data were collected under considerably higher than expected pileup conditions forcing CMS to request a lumi-levelling to PU~55 for the first hours of the LHC fill; this has challenged both the computing system and CMS analysts with more complex events to process with respect to the modelling. From the computing operations side, higher pileup meant larger events and more time to process events than anticipated in the 2017 planning. As these data taking conditions affected only the second part of the year, the average 2017 pileup was in line with that used during the CMS resource planning.
2018 was another excellent year for LHC operations and luminosity delivered to the experiments. CMS recorded 64 fb$^{-1}$ of pp collisions during 2018, in addition to the 84 fb$^{-1}$ collected during 2016 and 2017. This brings the total luminosity delivered in RunII to more than 150 fb$^{-1}$ , and the total RunI + RunII dataset to a total of about 180 fb$^{-1}$.
\section{Run II computing operations}
During Run~II, the computing 2004 model designed for Run~I has greatly evolved. The MONARC Hierarchical division of sites in Tier 0, Tier 1s and Tier 2s, is still present, but less relevant during operations. All simulation, analysis and processing workflows can now be executed at virtually any site, with a full transfer mesh allowing for point-to-point data movement, outside the rigid hierarchy.
Remote access to data, using WAN-aware protocols like XrootD and data federations, are used more and more instead of planned data movement, allowing for an easier exploitation of CPU resources.
Opportunistic computing is becoming a key component, with CMS having explored access to HPC systems, Commercial Clouds, and with the capability of running its workflows on virtually any (sizeable) resource we have access to.
In 2018 CMS deployed Singularity \cite{singu} to all sites supporting the CMS VO. Singularity is a container solution which allows CMS to select the OS on a per job basis and decouples the OS of worker nodes from that required by experiments. Sites can setup worker nodes with a Singularity supported OS and CMS will choose the appropriate OS image for each job.
CMS deployed a new version of the prompt reconstruction software on July 2018, during LHC Machine Development 2. This software is adapted to detector upgrades and data taking conditions, and the production level of alignment and calibration algorithms is reached. Data collected before this point has now been reprocessed for a fully consistent data set for analysis, in time for the Moriond 2019 conference. Production and distributed analysis activities continued at a very high level throughout 2018. The MC17 campaign, to be used for Winter and Summer 18 conferences, continued throughout the year, with decreasing utilization of resources; overall, more than 15 billion of events were available by the Summer. The equivalent simulation campaign for 2018 data, MC18, started in October 2018 and is now almost completed.
Developments to increase CMS throughput and disk usage efficiently continue. Of particular interest is the development of the NanoAOD data tier as a new alternative for analysis users.
The NanoAOD size per event is approximately 1 kB, 30-50 times smaller than the MiniAOD data tier and relies on only simple data types rather than the hierarchical data format structure in the CMS MiniAOD (and AOD) data tier. NanoAOD samples for the 2016, 2017 and 2018 data and corresponding Monte Carlo simulation have been produced, and are being used in many analyses. NanoAOD is now automatically produced in all the central production campaigns, and fast reprocessing campaigns from MiniAOD to NanoAOD have been tested and are able to achieve more than 4B events per day using only a fraction of CMS resources.
\section{CMS WLCG Resources and expected increase}
CMS Computing model has been used to request resources for 2018-19 RunII data taking and reprocessing, with total requests (Tier 0 + Tier 1s + Tier 2s) exceeding 2073 kHS06, 172 PB on disk, and 320 PB on tape.
However the actual pledged resources have been substantially lower than the requests due to budget restrictions from the funding agencies. To reduce the impact of this issue, CMS was able to achieve and deploy several technological advancements, including reducing the needed amount of AOD(SIM) on disk and to reduce the amount of simulated raw events on tape. In addition, some computing resource providers were able to provide more than their pledged level of resources to CMS during 2018.
Thanks to the optimizations and technological improvements described before it has been possible to tune accordingly the computing model of CMS. Year-by-year increases, which would have been large in presence of the reference computing model, have been reduced substantially.
Italy contributes to CMS computing with 13\% of the Tier 1 and Tier 2 resources. The increase of CNAF pledges for 2019 have been reduced by a factor two with respect to the original request, due to INFN budget limitations, and the remaining increase has been postponed to 2021.
The 2019 total pledges are therefore 78 kHS06 of CPU, 8020 TB of disk, and 26 PB for tape.
CMS usage of CNAF is very intense and it represents one of the largest Tier 1 in CMS as number of processed hours, after the US Tier 1; the same holds for total number of processed jobs, as shown in Fig.~\ref{cms-jobs}.
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth,bb=0 0 900 900]{tier1-jobs-2018.pdf}
\end{center}
\caption{\label{cms-jobs}Jobs processed at CMS Tier 1 sites during 2018}
\end{figure}
\section{The CNAF flood incident}
On November 9th 2017 a major incident happened when the CNAF computer center was flooded.
This caused an interruption of all CNAF services and the damage of many disk arrays and servers, as well as of the tape library. About 40 damaged tapes (out of a total of 150) belonged to CMS. They contained unique copy of MC and RECO data. Six tapes contained a 2nd custodial copy of RAW data.
A special recovery procedure was adopted by CNAF team through a specialized company and no data have been permanently lost.
The impact of this incident for CMS, although serious, was mitigated thanks to the intrinsic redundancy of our distributed computing model. Other Tier 1 sites increased temporary their share to compensate the CPU loss, deploying the 2018 pledges as soon as possible.
A full recovery of CMS services of CNAF was achieved by beginning of March 2018.
It is important to point out that, despite the incident affecting the first months of 2018, the integrated site readiness of CNAF in 2018 was very good and at the same level or better than the other CMS Tier 1 sites, see Fig.~\ref{tier1-cms-sr}.
\begin{figure}
\begin{center}
\includegraphics[width=0.8\textwidth,bb=0 0 900 900]{tier1-readiness-2018.pdf}
\end{center}
\caption{\label{tier1-cms-sr}Site readiness of CMS Tier 1s in 2018}
\end{figure}
\section{Conclusions}
CNAF is an important asset for the CMS Collaboration, being the second Tier 1 in terms of resource utilization, pledges and availability.
The unfortunate incident of the end of 2017 has been managed professionally and efficiently by the CNAF staff, guaranteeing the fastest possible recovery with minimal data losses at the beginning of 2018.
\section*{References}
\begin{thebibliography}{9}
\bibitem{CMS-descr}CMS Collaboration, The CMS experiment at the CERN LHC, JINST 3 (2008) S08004,
doi:10.1088/1748-0221/3/08/S08004.
\bibitem{singu} http://singularity.lbl.gov/
\end{thebibliography}
\end{document}
>>>>>>> df6666e07f77183d3b9b7271912d9bff64107129
......@@ -7,14 +7,14 @@
\title{CNAF Provisioning system: Puppet 5 upgrade}
\author{
Stefano Bovina$^1$,
Diego Michelotto$^1$,
Enrico Fattibene$^1$,
Antonio Falabella$^1$,
Andrea Chierici$^1$
S. Bovina$^1$,
D. Michelotto$^1$,
E. Fattibene$^1$,
A. Falabella$^1$,
A. Chierici$^1$
}
\address{$^1$ INFN CNAF, Viale Berti Pichat 6/2, 40126, Bologna, Italy}
\address{$^1$ INFN-CNAF, Bologna, IT}
\ead{
stefano.bovina@cnaf.infn.it,
......@@ -26,13 +26,13 @@
\begin{abstract}
Since 2015 CNAF departments can take advantage of a common provisioning system based on Foreman/Puppet to install and configure heterogeneous sets of physical and virtual machines.
During 2017 and 2018, the CNAF provisioning system, preciously based on Puppet~\cite{ref:puppet} version 3, has been upgraded, since that version has reached "End-of-life" at 31/12/2016.
During 2017 and 2018, the CNAF provisioning system, preciously based on Puppet~\cite{ref:puppet} version 3, has been upgraded, since that version has reached ``End-of-life'' at 31/12/2016.
Due to other higher priority tasks, the start of this activity was postponed to 2017.
In this report we are going to describe activities that have been carried on in order to finalize the migration from Puppet 3 to Puppet 5.
\end{abstract}
\section{Provisioning at CNAF}
The installation and configuration activity, in a big computing centre like CNAF, must take into account the size of the resources
The installation and configuration activity, in a big computing center like CNAF, must take into account the size of the resources
(roughly a thousand nodes to manage), the heterogeneity of the systems (virtual vs physical nodes, computing nodes and different type of servers)
and the different working group in charge for their management.
To meet this challenge CNAF implemented a unique solution, adopted by all the departments,
......
File added
......@@ -5,13 +5,12 @@
\begin{document}
\title{The Cherenkov Telescope Array}
\author{L. Arrabito$^1$, C. Bigongiari$^2$, F. Di Pierro$^3$ and P. Vallania$^{3,4}$}
\author{L. Arrabito$^1$, C. Bigongiari$^2$, F. Di Pierro$^3$, P. Vallania$^{3,4}$}
\address{$^1$ Laboratoire Univers et Particules de Montpellier, Universit\'e de Montpellier II Place Eug\`ene Bataillon - CC 72, CNRS/IN2P3,
F-34095 Montpellier, France}
\address{$^2$ INAF Osservatorio Astronomico di Roma - Via Frascati 33, 00040, Monte Porzio Catone (RM), Italy}
\address{$^3$ INFN Sezione di Torino - Via Pietro Giuria 1, 10125, Torino (TO), Italy}
\address{$^4$ INAF Osservatorio Astrofisico di Torino - Via Pietro Giuria 1, 10125, Torino (TO), Italy}
\address{$^1$ Laboratoire Univers et Particules de Montpellier et Universit\'e de Montpellier II, Montpellier, FR}
\address{$^2$ INAF Osservatorio Astronomico di Roma, Monte Porzio Catone (RM), IT}
\address{$^3$ INFN Sezione di Torino, Torino, IT}
\address{$^4$ INAF Osservatorio Astrofisico di Torino, Torino, IT}
\ead{arrabito@in2p3.fr, ciro.bigongiari@oa-roma.inaf.it, federico.dipierro@to.infn.it, piero.vallania@to.infn.it}
......@@ -55,7 +54,7 @@ The northern hemisphere array instead will consist of telescopes of two differen
The CTA observatory with its two arrays will be operated by one single consortium and a significant and increasing fraction of the observation time will be open to the general astrophysical community to maximize CTA scientific return.
The CTA project has entered the pre-construction phase. The first Large Size Telescope has been inaugurated in October 2018, accordingly to the schedule (see Fig. \ref{CtaTimeline}), in the La Palma CTA Northern Site. During 2019 the construction of 3 more LSTs will start. In December 2018 another telescope prototype, the Dual Mirror Medium Size Telescope has been also inaugurated at the Mount Whipple Observatory (Arizona, US).
The CTA project has entered the pre-construction phase. The first Large Size Telescope (LST) has been inaugurated in October 2018, accordingly to the schedule (see Fig. \ref{CtaTimeline}), in the La Palma CTA Northern Site. During 2019 the construction of 3 more LSTs will start. In December 2018 another telescope prototype, the Dual Mirror Medium Size Telescope has been also inaugurated at the Mount Whipple Observatory (Arizona, US).
Meanwhile detailed geophysical characterization of the southern site is ongoing and the agreement between the hosting country and the CTA Observatory has been signed.
First commissioning data from LST1 have started to be acquired at the end of 2018, in 2019 the first gamma-rays observations are expected.
CTA Observatory is expected to become fully operational by 2025 but precursors mini-arrays are expected to operate already in 2020.
......@@ -79,7 +78,7 @@ The CTA production system currently in use \cite{Arrabito2015} is based on the D
A massive production of simulated data has been carried on in 2018 to estimate the expected performance with improved telescopes' models and with different night-sky background levels. A simulation dedicated to the detailed comparison of different Small Size Telescope versions was also carried on. Simulated data have been analyzed with two different analysis chains to crosscheck the results and have been also used for the development of the new official CTA reconstruction and analysis pipeline.
\begin{figure}[ht]
\includegraphics[width=\textwidth]{cpu-days-used-2018-bysite.eps}
\includegraphics[width=0.8\textwidth]{cpu-days-used-2018-bysite.eps}
\caption{\label{CPU} CPU power provided in 2018 by Grid sites in the CTA Virtual Organization.}
\end{figure}
......
%% This BibTeX bibliography file was created using BibDesk.
%% http://bibdesk.sourceforge.net/
%% Created for Fabio Bellini at 2017-02-28 14:54:59 +0100
%% Saved with string encoding Unicode (UTF-8)
@article{Alduino:2017ehq,
author = "Alduino, C. and others",
title = "{First Results from CUORE: A Search for Lepton Number
Violation via $0\nu\beta\beta$ Decay of $^{130}$Te}",
collaboration = "CUORE",
journal = "Phys. Rev. Lett.",
volume = "120",
year = "2018",
number = "13",
pages = "132501",
doi = "10.1103/PhysRevLett.120.132501",
eprint = "1710.07988",
archivePrefix = "arXiv",
primaryClass = "nucl-ex",
SLACcitation = "%%CITATION = ARXIV:1710.07988;%%"
}
@article{Alduino:2016vtd,
Archiveprefix = {arXiv},
Author = {Alduino, C. and others},
Collaboration = {CUORE},
Date-Added = {2017-02-28 13:49:12 +0000},
Date-Modified = {2017-02-28 13:49:12 +0000},
Doi = {10.1140/epjc/s10052-016-4498-6},
Eprint = {1609.01666},
Journal = {Eur. Phys. J.},
Number = {1},
Pages = {13},
Primaryclass = {nucl-ex},
Slaccitation = {%%CITATION = ARXIV:1609.01666;%%},
Title = {{Measurement of the two-neutrino double-beta decay half-life of$^{130}$ Te with the CUORE-0 experiment}},
Volume = {C77},
Year = {2017},
Bdsk-Url-1 = {http://dx.doi.org/10.1140/epjc/s10052-016-4498-6}}
@article{Artusa:2014lgv,
Archiveprefix = {arXiv},
Author = {Artusa, D.R. and others},
Collaboration = {CUORE},
Doi = {10.1155/2015/879871},
Eprint = {1402.6072},
Journal = {Adv.High Energy Phys.},
Pages = {879871},
Primaryclass = {physics.ins-det},
Slaccitation = {%%CITATION = ARXIV:1402.6072;%%},
Title = {{Searching for neutrinoless double-beta decay of $^{130}$Te with CUORE}},
Volume = {2015},
Year = {2015},
Bdsk-Url-1 = {http://dx.doi.org/10.1155/2015/879871}}
@inproceedings{Adams:2018nek,
author = "Adams, D. Q. and others",
title = "{Update on the recent progress of the CUORE experiment}",
booktitle = "{28th International Conference on Neutrino Physics and
Astrophysics (Neutrino 2018) Heidelberg, Germany, June
4-9, 2018}",
collaboration = "CUORE",
url = "https://doi.org/10.5281/zenodo.1286904",
year = "2018",
eprint = "1808.10342",
archivePrefix = "arXiv",
primaryClass = "nucl-ex",
SLACcitation = "%%CITATION = ARXIV:1808.10342;%%"
}
\ No newline at end of file
File deleted